EPUBChop 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eae1dc4152536744c7d2a51ee3231526632c742b
4
- data.tar.gz: de5b13b1bfe346629bf89ff4c9c7764db3cb1746
3
+ metadata.gz: 310ed2a66e86fba757f244581f9cfbe4cbf69094
4
+ data.tar.gz: 4243b1aae661b31a47d3c78d2e027717f1c6d5a6
5
5
  SHA512:
6
- metadata.gz: 410cec8dcdda2bd1aeff728fbb22d87c12d797ca805d19eab81ee8f1152b9b8be52fb78e11cd9ca942e6544fe70c32f9ecd378fc09d06299a0c5a2b7221adb8e
7
- data.tar.gz: ae763206822c42f8df9887c8ec9df52bf9e7bfc97c419a243ae9435990c82819ddb17df2c8b37cd0d0e65d8310329bdd6a12221f8c738e5357ee89b6d2bf8ab7
6
+ metadata.gz: 4b4afb0cc16991ced9a95117a45d974a1b58797a574eaed765fb17009dbf4b1df725fe4e0d347939fbdf5dec8316d36b03c82471b6af0478993766c7216d5022
7
+ data.tar.gz: 63959397ad7d9589e0c0ee4b466ec46360eb9372e43d3fe4068c4515ae56f83ef24ca2175c212a7b66d51b4a8c86251c7b34ae493bb8cdbc23e76feaa56bbc6c
data/EPUBChop.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_development_dependency "rspec"
24
- spec.add_runtime_dependency "epubinfo_with_toc"
25
- spec.add_runtime_dependency "rubyzip", "~> 1.0"
24
+ spec.add_runtime_dependency "epubinfo_with_toc", "0.5.6"
25
+ spec.add_runtime_dependency "rubyzip", "~> 1.1.3"
26
26
  spec.add_runtime_dependency "nokogiri"
27
27
  end
data/bin/epubchop CHANGED
@@ -35,14 +35,17 @@ begin
35
35
  chop_by = options[:chop]
36
36
  verbose = options[:verbose]
37
37
 
38
- puts "loading EPUB #{filename}"
39
- b=EPUBChop.get(filename, :chop_by => chop_by.to_sym, :verbose => verbose)
40
- puts 'chopping EPUB'
41
- c=b.chop({:base => base.to_s, :words => words, :text => text})
42
- puts 'rebuilding EPUB'
43
- FileUtils.move(c, "chopped_#{File.basename(filename)}")
38
+ Dir.glob(filename).each do |f|
39
+ puts "loading EPUB #{f}"
40
+ b=EPUBChop.get(f, :chop_by => chop_by.to_sym, :verbose => verbose)
41
+ puts 'chopping EPUB'
42
+ c=b.chop({:base => base.to_s, :words => words, :text => text})
43
+ puts 'rebuilding EPUB'
44
+ FileUtils.move(c, "chopped_#{File.basename(f)}")
45
+ end
44
46
 
45
47
  rescue Exception => e
46
48
  puts "An error occured\n#{e.message}"
49
+ puts e.backtrace.join("/n")
47
50
  exit 1
48
51
  end
data/lib/EPUBChop/chop.rb CHANGED
@@ -1,4 +1,4 @@
1
- #encoding: UTF-8
1
+ # coding: utf-8
2
2
  require 'nokogiri'
3
3
  require 'epubinfo'
4
4
  require 'tempfile'
@@ -39,7 +39,7 @@ module EPUBChop
39
39
 
40
40
 
41
41
  return rebuild_epub_from_tmp_dir(extract_dir)
42
- rescue Zip::ZipError => e
42
+ rescue Zip::Error => e
43
43
  raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
44
44
  rescue Exception => e
45
45
  puts e.backtrace.join("\n")
@@ -80,18 +80,19 @@ module EPUBChop
80
80
 
81
81
  else
82
82
  #noinspection RubyResolve
83
- resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
84
- #resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
83
+ resource = Nokogiri::HTML(@book.table_of_contents.resources[filename].force_encoding('UTF-8')) do |config|
85
84
  config.noblanks.nonet
86
85
  end
87
- resource.encoding = 'UTF-8'
88
86
 
87
+ # resource.encoding = 'UTF-8'
89
88
  resource = chop_file(resource, processed_file_size)
90
89
 
91
90
  #persist page
91
+ save_options = Nokogiri::XML::Node::SaveOptions::FORMAT | Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::AS_XHTML
92
92
  File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
93
- # f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
94
- f.puts resource.serialize(:encoding => 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
93
+ #f.puts resource.serialize(:encoding => 'ISO-8859-1', :save_with => save_options)
94
+ f.puts resource.serialize(:encoding => 'UTF-8', :save_with => save_options)
95
+ #f.puts resource.serialize(:encoding => resource.encoding, :save_with => save_options)
95
96
  end
96
97
 
97
98
  end
@@ -150,12 +151,31 @@ module EPUBChop
150
151
  end
151
152
  end
152
153
 
154
+ meta = Nokogiri::XML::Node.new('meta', resource)
155
+ meta['http-equiv'] = "Content-Type"
156
+ meta['content'] = "text/html; charset=UTF-8"
157
+
158
+ meta_charset = Nokogiri::XML::Node.new('meta', resource)
159
+ meta_charset['charset'] = 'UTF-8'
160
+
161
+ resource.css('head').first << meta
162
+ resource.css('head').first << meta_charset
163
+
153
164
  resource
154
165
  end
155
166
 
156
167
 
157
168
  def rebuild_epub_from_tmp_dir(extract_dir)
158
169
  #zip new ebook
170
+
171
+ Zip.setup do |z|
172
+ z.write_zip64_support = false
173
+ z.on_exists_proc = true
174
+ z.continue_on_exists_proc = true
175
+ z.unicode_names = true
176
+ z.default_compression = Zlib::BEST_COMPRESSION
177
+ end
178
+
159
179
  new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
160
180
  new_ebook_name_path = new_ebook_name.path
161
181
  new_ebook_name_path.gsub!('-', '')
@@ -166,7 +186,14 @@ module EPUBChop
166
186
 
167
187
  #minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
168
188
  mimetype = epub_files.delete("#{extract_dir}/mimetype")
169
- mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0, 0, Zip::Entry::STORED)
189
+ mimetype_entry = Zip::Entry.new(zipfile, #@zipfile
190
+ mimetype.sub("#{extract_dir}/", ''), #@name
191
+ '', #@comment
192
+ '', #@extra
193
+ 0, #@compressed_size
194
+ 0, #@crc
195
+ Zip::Entry::STORED) #@compression_method
196
+
170
197
  zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
171
198
 
172
199
  #all the other files
@@ -1,3 +1,3 @@
1
1
  module EPUBChop
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.3"
3
3
  end
@@ -1,10 +1,22 @@
1
- # Monkey patching bug in rubyzip, currently fixed on master, but not yet released
2
- # and EPUBChop brings in the 1.0 release.
1
+ # Monkey patching bug in rubyzip can not reproduce this on the main branch
2
+ require 'zip/version'
3
3
 
4
- module Zip
5
- class Entry
6
- private
7
- def prep_zip64_extra(for_local_header)
4
+ if Zip::VERSION.eql?('1.1.3')
5
+ module Zip
6
+ class Entry
7
+ alias_method :old_write_to_zip_output_stream, :write_to_zip_output_stream
8
+
9
+ def write_to_zip_output_stream(zip_output_stream) #:nodoc:all
10
+ if @ftype == :directory
11
+ zip_output_stream.put_next_entry(self, nil, nil, ::Zip::Entry::STORED)
12
+ elsif @filepath
13
+ zip_output_stream.put_next_entry(self, nil, nil, self.compression_method || ::Zip::Entry::DEFLATED )
14
+ get_input_stream { |is| ::Zip::IOExtras.copy_stream(zip_output_stream, is) }
15
+ else
16
+ zip_output_stream.copy_raw_entry(self)
17
+ end
8
18
  end
19
+
20
+ end
9
21
  end
10
- end
22
+ end
@@ -30,4 +30,20 @@ describe 'EPUBChop' do
30
30
  deviation.should < 5
31
31
  end
32
32
 
33
+ context 'created epub file' do
34
+ before(:all) do
35
+ b = EPUBChop.get('./spec/epub/Verne_20000_West_pg11393.epub', {:base => :percentage, :words => 10})
36
+ @chopped = b.chop()
37
+ end
38
+
39
+ it 'should contain a minetype file' do
40
+ f= File.open(@chopped, 'rb')
41
+ first_100_bytes = f.read(100)
42
+ f.close
43
+
44
+ first_100_bytes.should match(/mimetypeapplication\/epub\+zip/)
45
+ end
46
+
47
+ end
48
+
33
49
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: EPUBChop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-26 00:00:00.000000000 Z
11
+ date: 2014-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: epubinfo_with_toc
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - '='
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: 0.5.6
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - '>='
64
+ - - '='
65
65
  - !ruby/object:Gem::Version
66
- version: '0'
66
+ version: 0.5.6
67
67
  prerelease: false
68
68
  type: :runtime
69
69
  - !ruby/object:Gem::Dependency
@@ -72,12 +72,12 @@ dependencies:
72
72
  requirements:
73
73
  - - ~>
74
74
  - !ruby/object:Gem::Version
75
- version: '1.0'
75
+ version: 1.1.3
76
76
  requirement: !ruby/object:Gem::Requirement
77
77
  requirements:
78
78
  - - ~>
79
79
  - !ruby/object:Gem::Version
80
- version: '1.0'
80
+ version: 1.1.3
81
81
  prerelease: false
82
82
  type: :runtime
83
83
  - !ruby/object:Gem::Dependency