EPUBChop 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/EPUBChop.gemspec +2 -2
- data/bin/epubchop +9 -6
- data/lib/EPUBChop/chop.rb +35 -8
- data/lib/EPUBChop/version.rb +1 -1
- data/lib/EPUBChop/zip/entry.rb +19 -7
- data/spec/epubchop_spec.rb +16 -0
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 310ed2a66e86fba757f244581f9cfbe4cbf69094
|
4
|
+
data.tar.gz: 4243b1aae661b31a47d3c78d2e027717f1c6d5a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b4afb0cc16991ced9a95117a45d974a1b58797a574eaed765fb17009dbf4b1df725fe4e0d347939fbdf5dec8316d36b03c82471b6af0478993766c7216d5022
|
7
|
+
data.tar.gz: 63959397ad7d9589e0c0ee4b466ec46360eb9372e43d3fe4068c4515ae56f83ef24ca2175c212a7b66d51b4a8c86251c7b34ae493bb8cdbc23e76feaa56bbc6c
|
data/EPUBChop.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_runtime_dependency "epubinfo_with_toc"
|
25
|
-
spec.add_runtime_dependency "rubyzip", "~> 1.
|
24
|
+
spec.add_runtime_dependency "epubinfo_with_toc", "0.5.6"
|
25
|
+
spec.add_runtime_dependency "rubyzip", "~> 1.1.3"
|
26
26
|
spec.add_runtime_dependency "nokogiri"
|
27
27
|
end
|
data/bin/epubchop
CHANGED
@@ -35,14 +35,17 @@ begin
|
|
35
35
|
chop_by = options[:chop]
|
36
36
|
verbose = options[:verbose]
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
Dir.glob(filename).each do |f|
|
39
|
+
puts "loading EPUB #{f}"
|
40
|
+
b=EPUBChop.get(f, :chop_by => chop_by.to_sym, :verbose => verbose)
|
41
|
+
puts 'chopping EPUB'
|
42
|
+
c=b.chop({:base => base.to_s, :words => words, :text => text})
|
43
|
+
puts 'rebuilding EPUB'
|
44
|
+
FileUtils.move(c, "chopped_#{File.basename(f)}")
|
45
|
+
end
|
44
46
|
|
45
47
|
rescue Exception => e
|
46
48
|
puts "An error occured\n#{e.message}"
|
49
|
+
puts e.backtrace.join("/n")
|
47
50
|
exit 1
|
48
51
|
end
|
data/lib/EPUBChop/chop.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'epubinfo'
|
4
4
|
require 'tempfile'
|
@@ -39,7 +39,7 @@ module EPUBChop
|
|
39
39
|
|
40
40
|
|
41
41
|
return rebuild_epub_from_tmp_dir(extract_dir)
|
42
|
-
rescue Zip::
|
42
|
+
rescue Zip::Error => e
|
43
43
|
raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
|
44
44
|
rescue Exception => e
|
45
45
|
puts e.backtrace.join("\n")
|
@@ -80,18 +80,19 @@ module EPUBChop
|
|
80
80
|
|
81
81
|
else
|
82
82
|
#noinspection RubyResolve
|
83
|
-
resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
|
84
|
-
#resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
|
83
|
+
resource = Nokogiri::HTML(@book.table_of_contents.resources[filename].force_encoding('UTF-8')) do |config|
|
85
84
|
config.noblanks.nonet
|
86
85
|
end
|
87
|
-
resource.encoding = 'UTF-8'
|
88
86
|
|
87
|
+
# resource.encoding = 'UTF-8'
|
89
88
|
resource = chop_file(resource, processed_file_size)
|
90
89
|
|
91
90
|
#persist page
|
91
|
+
save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
92
92
|
File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
|
93
|
-
#
|
94
|
-
f.puts resource.serialize(:encoding => 'UTF-8', :save_with =>
|
93
|
+
#f.puts resource.serialize(:encoding => 'ISO-8859-1', :save_with => save_options)
|
94
|
+
f.puts resource.serialize(:encoding => 'UTF-8', :save_with => save_options)
|
95
|
+
#f.puts resource.serialize(:encoding => resource.encoding, :save_with => save_options)
|
95
96
|
end
|
96
97
|
|
97
98
|
end
|
@@ -150,12 +151,31 @@ module EPUBChop
|
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
154
|
+
meta = Nokogiri::XML::Node.new('meta', resource)
|
155
|
+
meta['http-equiv'] = "Content-Type"
|
156
|
+
meta['content'] = "text/html; charset=UTF-8"
|
157
|
+
|
158
|
+
meta_charset = Nokogiri::XML::Node.new('meta', resource)
|
159
|
+
meta_charset['charset'] = 'UTF-8'
|
160
|
+
|
161
|
+
resource.css('head').first << meta
|
162
|
+
resource.css('head').first << meta_charset
|
163
|
+
|
153
164
|
resource
|
154
165
|
end
|
155
166
|
|
156
167
|
|
157
168
|
def rebuild_epub_from_tmp_dir(extract_dir)
|
158
169
|
#zip new ebook
|
170
|
+
|
171
|
+
Zip.setup do |z|
|
172
|
+
z.write_zip64_support = false
|
173
|
+
z.on_exists_proc = true
|
174
|
+
z.continue_on_exists_proc = true
|
175
|
+
z.unicode_names = true
|
176
|
+
z.default_compression = Zlib::BEST_COMPRESSION
|
177
|
+
end
|
178
|
+
|
159
179
|
new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
|
160
180
|
new_ebook_name_path = new_ebook_name.path
|
161
181
|
new_ebook_name_path.gsub!('-', '')
|
@@ -166,7 +186,14 @@ module EPUBChop
|
|
166
186
|
|
167
187
|
#minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
|
168
188
|
mimetype = epub_files.delete("#{extract_dir}/mimetype")
|
169
|
-
mimetype_entry = Zip::Entry.new(zipfile,
|
189
|
+
mimetype_entry = Zip::Entry.new(zipfile, #@zipfile
|
190
|
+
mimetype.sub("#{extract_dir}/", ''), #@name
|
191
|
+
'', #@comment
|
192
|
+
'', #@extra
|
193
|
+
0, #@compressed_size
|
194
|
+
0, #@crc
|
195
|
+
Zip::Entry::STORED) #@compression_method
|
196
|
+
|
170
197
|
zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
|
171
198
|
|
172
199
|
#all the other files
|
data/lib/EPUBChop/version.rb
CHANGED
data/lib/EPUBChop/zip/entry.rb
CHANGED
@@ -1,10 +1,22 @@
|
|
1
|
-
# Monkey patching bug in rubyzip
|
2
|
-
|
1
|
+
# Monkey patching bug in rubyzip can not reproduce this on the main branch
|
2
|
+
require 'zip/version'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
if Zip::VERSION.eql?('1.1.3')
|
5
|
+
module Zip
|
6
|
+
class Entry
|
7
|
+
alias_method :old_write_to_zip_output_stream, :write_to_zip_output_stream
|
8
|
+
|
9
|
+
def write_to_zip_output_stream(zip_output_stream) #:nodoc:all
|
10
|
+
if @ftype == :directory
|
11
|
+
zip_output_stream.put_next_entry(self, nil, nil, ::Zip::Entry::STORED)
|
12
|
+
elsif @filepath
|
13
|
+
zip_output_stream.put_next_entry(self, nil, nil, self.compression_method || ::Zip::Entry::DEFLATED )
|
14
|
+
get_input_stream { |is| ::Zip::IOExtras.copy_stream(zip_output_stream, is) }
|
15
|
+
else
|
16
|
+
zip_output_stream.copy_raw_entry(self)
|
17
|
+
end
|
8
18
|
end
|
19
|
+
|
20
|
+
end
|
9
21
|
end
|
10
|
-
end
|
22
|
+
end
|
data/spec/epubchop_spec.rb
CHANGED
@@ -30,4 +30,20 @@ describe 'EPUBChop' do
|
|
30
30
|
deviation.should < 5
|
31
31
|
end
|
32
32
|
|
33
|
+
context 'created epub file' do
|
34
|
+
before(:all) do
|
35
|
+
b = EPUBChop.get('./spec/epub/Verne_20000_West_pg11393.epub', {:base => :percentage, :words => 10})
|
36
|
+
@chopped = b.chop()
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should contain a minetype file' do
|
40
|
+
f= File.open(@chopped, 'rb')
|
41
|
+
first_100_bytes = f.read(100)
|
42
|
+
f.close
|
43
|
+
|
44
|
+
first_100_bytes.should match(/mimetypeapplication\/epub\+zip/)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
33
49
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: EPUBChop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mehmet Celik
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: epubinfo_with_toc
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - '
|
59
|
+
- - '='
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 0.5.6
|
62
62
|
requirement: !ruby/object:Gem::Requirement
|
63
63
|
requirements:
|
64
|
-
- - '
|
64
|
+
- - '='
|
65
65
|
- !ruby/object:Gem::Version
|
66
|
-
version:
|
66
|
+
version: 0.5.6
|
67
67
|
prerelease: false
|
68
68
|
type: :runtime
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -72,12 +72,12 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ~>
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 1.1.3
|
76
76
|
requirement: !ruby/object:Gem::Requirement
|
77
77
|
requirements:
|
78
78
|
- - ~>
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version:
|
80
|
+
version: 1.1.3
|
81
81
|
prerelease: false
|
82
82
|
type: :runtime
|
83
83
|
- !ruby/object:Gem::Dependency
|