EPUBChop 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/EPUBChop.gemspec +2 -2
- data/bin/epubchop +9 -6
- data/lib/EPUBChop/chop.rb +35 -8
- data/lib/EPUBChop/version.rb +1 -1
- data/lib/EPUBChop/zip/entry.rb +19 -7
- data/spec/epubchop_spec.rb +16 -0
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 310ed2a66e86fba757f244581f9cfbe4cbf69094
|
4
|
+
data.tar.gz: 4243b1aae661b31a47d3c78d2e027717f1c6d5a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b4afb0cc16991ced9a95117a45d974a1b58797a574eaed765fb17009dbf4b1df725fe4e0d347939fbdf5dec8316d36b03c82471b6af0478993766c7216d5022
|
7
|
+
data.tar.gz: 63959397ad7d9589e0c0ee4b466ec46360eb9372e43d3fe4068c4515ae56f83ef24ca2175c212a7b66d51b4a8c86251c7b34ae493bb8cdbc23e76feaa56bbc6c
|
data/EPUBChop.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_runtime_dependency "epubinfo_with_toc"
|
25
|
-
spec.add_runtime_dependency "rubyzip", "~> 1.
|
24
|
+
spec.add_runtime_dependency "epubinfo_with_toc", "0.5.6"
|
25
|
+
spec.add_runtime_dependency "rubyzip", "~> 1.1.3"
|
26
26
|
spec.add_runtime_dependency "nokogiri"
|
27
27
|
end
|
data/bin/epubchop
CHANGED
@@ -35,14 +35,17 @@ begin
|
|
35
35
|
chop_by = options[:chop]
|
36
36
|
verbose = options[:verbose]
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
Dir.glob(filename).each do |f|
|
39
|
+
puts "loading EPUB #{f}"
|
40
|
+
b=EPUBChop.get(f, :chop_by => chop_by.to_sym, :verbose => verbose)
|
41
|
+
puts 'chopping EPUB'
|
42
|
+
c=b.chop({:base => base.to_s, :words => words, :text => text})
|
43
|
+
puts 'rebuilding EPUB'
|
44
|
+
FileUtils.move(c, "chopped_#{File.basename(f)}")
|
45
|
+
end
|
44
46
|
|
45
47
|
rescue Exception => e
|
46
48
|
puts "An error occured\n#{e.message}"
|
49
|
+
puts e.backtrace.join("/n")
|
47
50
|
exit 1
|
48
51
|
end
|
data/lib/EPUBChop/chop.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'epubinfo'
|
4
4
|
require 'tempfile'
|
@@ -39,7 +39,7 @@ module EPUBChop
|
|
39
39
|
|
40
40
|
|
41
41
|
return rebuild_epub_from_tmp_dir(extract_dir)
|
42
|
-
rescue Zip::
|
42
|
+
rescue Zip::Error => e
|
43
43
|
raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
|
44
44
|
rescue Exception => e
|
45
45
|
puts e.backtrace.join("\n")
|
@@ -80,18 +80,19 @@ module EPUBChop
|
|
80
80
|
|
81
81
|
else
|
82
82
|
#noinspection RubyResolve
|
83
|
-
resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
|
84
|
-
#resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
|
83
|
+
resource = Nokogiri::HTML(@book.table_of_contents.resources[filename].force_encoding('UTF-8')) do |config|
|
85
84
|
config.noblanks.nonet
|
86
85
|
end
|
87
|
-
resource.encoding = 'UTF-8'
|
88
86
|
|
87
|
+
# resource.encoding = 'UTF-8'
|
89
88
|
resource = chop_file(resource, processed_file_size)
|
90
89
|
|
91
90
|
#persist page
|
91
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
92
92
|
File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
|
93
|
-
#
|
94
|
-
f.puts resource.serialize(:encoding => 'UTF-8', :save_with =>
|
93
|
+
#f.puts resource.serialize(:encoding => 'ISO-8859-1', :save_with => save_options)
|
94
|
+
f.puts resource.serialize(:encoding => 'UTF-8', :save_with => save_options)
|
95
|
+
#f.puts resource.serialize(:encoding => resource.encoding, :save_with => save_options)
|
95
96
|
end
|
96
97
|
|
97
98
|
end
|
@@ -150,12 +151,31 @@ module EPUBChop
|
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
154
|
+
meta = Nokogiri::XML::Node.new('meta', resource)
|
155
|
+
meta['http-equiv'] = "Content-Type"
|
156
|
+
meta['content'] = "text/html; charset=UTF-8"
|
157
|
+
|
158
|
+
meta_charset = Nokogiri::XML::Node.new('meta', resource)
|
159
|
+
meta_charset['charset'] = 'UTF-8'
|
160
|
+
|
161
|
+
resource.css('head').first << meta
|
162
|
+
resource.css('head').first << meta_charset
|
163
|
+
|
153
164
|
resource
|
154
165
|
end
|
155
166
|
|
156
167
|
|
157
168
|
def rebuild_epub_from_tmp_dir(extract_dir)
|
158
169
|
#zip new ebook
|
170
|
+
|
171
|
+
Zip.setup do |z|
|
172
|
+
z.write_zip64_support = false
|
173
|
+
z.on_exists_proc = true
|
174
|
+
z.continue_on_exists_proc = true
|
175
|
+
z.unicode_names = true
|
176
|
+
z.default_compression = Zlib::BEST_COMPRESSION
|
177
|
+
end
|
178
|
+
|
159
179
|
new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
|
160
180
|
new_ebook_name_path = new_ebook_name.path
|
161
181
|
new_ebook_name_path.gsub!('-', '')
|
@@ -166,7 +186,14 @@ module EPUBChop
|
|
166
186
|
|
167
187
|
#minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
|
168
188
|
mimetype = epub_files.delete("#{extract_dir}/mimetype")
|
169
|
-
mimetype_entry = Zip::Entry.new(zipfile,
|
189
|
+
mimetype_entry = Zip::Entry.new(zipfile, #@zipfile
|
190
|
+
mimetype.sub("#{extract_dir}/", ''), #@name
|
191
|
+
'', #@comment
|
192
|
+
'', #@extra
|
193
|
+
0, #@compressed_size
|
194
|
+
0, #@crc
|
195
|
+
Zip::Entry::STORED) #@compression_method
|
196
|
+
|
170
197
|
zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
|
171
198
|
|
172
199
|
#all the other files
|
data/lib/EPUBChop/version.rb
CHANGED
data/lib/EPUBChop/zip/entry.rb
CHANGED
@@ -1,10 +1,22 @@
|
|
1
|
-
# Monkey patching bug in rubyzip
|
2
|
-
|
1
|
+
# Monkey patching bug in rubyzip can not reproduce this on the main branch
|
2
|
+
require 'zip/version'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
if Zip::VERSION.eql?('1.1.3')
|
5
|
+
module Zip
|
6
|
+
class Entry
|
7
|
+
alias_method :old_write_to_zip_output_stream, :write_to_zip_output_stream
|
8
|
+
|
9
|
+
def write_to_zip_output_stream(zip_output_stream) #:nodoc:all
|
10
|
+
if @ftype == :directory
|
11
|
+
zip_output_stream.put_next_entry(self, nil, nil, ::Zip::Entry::STORED)
|
12
|
+
elsif @filepath
|
13
|
+
zip_output_stream.put_next_entry(self, nil, nil, self.compression_method || ::Zip::Entry::DEFLATED )
|
14
|
+
get_input_stream { |is| ::Zip::IOExtras.copy_stream(zip_output_stream, is) }
|
15
|
+
else
|
16
|
+
zip_output_stream.copy_raw_entry(self)
|
17
|
+
end
|
8
18
|
end
|
19
|
+
|
20
|
+
end
|
9
21
|
end
|
10
|
-
end
|
22
|
+
end
|
data/spec/epubchop_spec.rb
CHANGED
@@ -30,4 +30,20 @@ describe 'EPUBChop' do
|
|
30
30
|
deviation.should < 5
|
31
31
|
end
|
32
32
|
|
33
|
+
context 'created epub file' do
|
34
|
+
before(:all) do
|
35
|
+
b = EPUBChop.get('./spec/epub/Verne_20000_West_pg11393.epub', {:base => :percentage, :words => 10})
|
36
|
+
@chopped = b.chop()
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should contain a minetype file' do
|
40
|
+
f= File.open(@chopped, 'rb')
|
41
|
+
first_100_bytes = f.read(100)
|
42
|
+
f.close
|
43
|
+
|
44
|
+
first_100_bytes.should match(/mimetypeapplication\/epub\+zip/)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
33
49
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: EPUBChop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: epubinfo_with_toc
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - '
|
59
|
+
- - '='
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.5.6
|
62
62
|
requirement: !ruby/object:Gem::Requirement
|
63
63
|
requirements:
|
64
|
-
- - '
|
64
|
+
- - '='
|
65
65
|
- !ruby/object:Gem::Version
|
66
|
-
version:
|
66
|
+
version: 0.5.6
|
67
67
|
prerelease: false
|
68
68
|
type: :runtime
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -72,12 +72,12 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ~>
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 1.1.3
|
76
76
|
requirement: !ruby/object:Gem::Requirement
|
77
77
|
requirements:
|
78
78
|
- - ~>
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version:
|
80
|
+
version: 1.1.3
|
81
81
|
prerelease: false
|
82
82
|
type: :runtime
|
83
83
|
- !ruby/object:Gem::Dependency
|