EPUBChop 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZDdjMWZlMTIyOTJmMjJiN2I0OGQxYTUzMjA5MTlkNzZhZjcwMDQ3MQ==
5
- data.tar.gz: !binary |-
6
- ZjdjY2JiNzkwY2RiZTIxMDMyM2M4YTU1MjY0OGMwOTZiYjI4NmY4Mw==
2
+ SHA1:
3
+ metadata.gz: 2ff6270a266184b41507e29e43994de705c56357
4
+ data.tar.gz: a257fd9d69ba6eb453c936626b509cc40596adf5
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZjJkYTdmNDI4OGFkNWUxZDRlZjlkZWQ1NDZkYTA3MWE2MmQ3YWMwYmZjZjkz
10
- NDZiZGVjMGEwNWY5ZjgwZjA2ZjQ4OTM4MDBhMGRlNWQ3ODk5Njc1ODhlY2U2
11
- ZmVjNmEwNGYwYTQ1ZGZiNDc0Nzc4ODliZWI0N2Y1MjcxNDQzMGY=
12
- data.tar.gz: !binary |-
13
- ZTNlY2E2OTA4NTNkM2I0YWRjMDc1YTkxMWU5YjgyMDY4YWM0ODE2MGVjZjQ5
14
- OGI2YmNjNWFmZjA1ZTI1OGViZjI0N2UyNjJiOTQ3MmRhOTUyOTYwZDIwNDc2
15
- NGQ0ZWU1N2RhZDBmNGMyOGZlZWM1NzIwMDgzMGM3Y2FkZjdhYWI=
6
+ metadata.gz: 5b8d5d76d9aabb81e9f0afe92d2c0b3aa058b6be46ce9eb3dc6c61f15eceaaaa49a27a696a2063b6bc9158ddb4992be0020ba1f7072ff9b90c2cbdf73b286527
7
+ data.tar.gz: 47742a21f5173c931b35f82df4c4fa5ea2f23e2b040073a9f575f38cab819c34a100b3bf3f1dcdd6e05d88ebe3c44445c2dfadb5e378d6f509332371caa00577
data/bin/epubchop CHANGED
@@ -19,6 +19,7 @@ BANNER
19
19
  opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
20
20
  opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
21
21
  opt :chop, "Follow the SPINE or the NCX of the ePub", :type => :string, :default => 'spine'
22
+ opt :verbose, "more loging true/false", :type => :boolean, :default => false
22
23
  end
23
24
 
24
25
  Trollop::die "need an EPUB file name" if ARGV.empty?
@@ -32,9 +33,10 @@ begin
32
33
  text << options[:line1] if options.has_key?(:line1)
33
34
  text << options[:line2] if options.has_key?(:line2)
34
35
  chop_by = options[:chop]
36
+ verbose = options[:verbose]
35
37
 
36
- puts 'loading EPUB'
37
- b=EPUBChop.get(filename, :chop_by => chop_by.to_sym)
38
+ puts "loading EPUB #{filename}"
39
+ b=EPUBChop.get(filename, :chop_by => chop_by.to_sym, :verbose => verbose)
38
40
  puts 'chopping EPUB'
39
41
  c=b.chop({:base => base.to_s, :words => words, :text => text})
40
42
  puts 'rebuilding EPUB'
data/lib/EPUBChop/chop.rb CHANGED
@@ -1,3 +1,4 @@
1
+ #encoding: UTF-8
1
2
  require 'nokogiri'
2
3
  require 'epubinfo'
3
4
  require 'tempfile'
@@ -38,12 +39,10 @@ module EPUBChop
38
39
 
39
40
  return rebuild_epub_from_tmp_dir(extract_dir)
40
41
  rescue Zip::ZipError => e
41
- raise RuntimeError, "Error processing EPUB. #{e.message}"
42
+ raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
42
43
  rescue Exception => e
43
- puts "Chopping went wrong. #{e.message}"
44
- puts e.backtrace
45
-
46
- return nil
44
+ puts e.backtrace.join("\n")
45
+ raise RuntimeError, "Chopping went wrong for #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
47
46
  ensure
48
47
  FileUtils.remove_entry_secure(extract_dir)
49
48
  end
@@ -80,50 +79,18 @@ module EPUBChop
80
79
 
81
80
  else
82
81
  #noinspection RubyResolve
83
- resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
82
+ resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
83
+ #resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
84
84
  config.noblanks.nonet
85
85
  end
86
- resource.css('script').remove
87
- resource.css('style').remove
88
- resource_text = resource.at_css('body').text.split[0..processed_file_size]
89
- #resource_text_length = resource_text.length
90
-
91
- # get a string that can be found
92
- data = nil
93
- window_begin = default_window_begin = 5
94
- window_end = 0
95
- while data.nil?
96
- look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)]
97
-
98
- if look_for.nil?
99
- window_begin = default_window_begin += 5
100
- window_end = 0
101
- else
102
- data = resource.at_css("*:contains('#{look_for.join(' ')}')")
103
- window_begin -= 1
104
- window_end += 1
105
-
106
- if window_begin == window_end
107
- window_begin = default_window_begin += 5
108
- window_end = 0
109
- end
110
- end
111
- end
86
+ resource.encoding = 'UTF-8'
112
87
 
113
- #limit on found string
114
- if data
115
- next_data = data.next_element
116
- while next_data
117
- in_resource = resource.css(next_data.css_path)
118
- in_resource.remove
119
-
120
- next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
121
- end
122
- end
88
+ resource = chop_file(resource, processed_file_size)
123
89
 
124
90
  #persist page
125
- File.open("#{extract_dir}/#{filename}", 'w') do |f|
126
- f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
91
+ File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
92
+ # f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
93
+ f.puts resource.serialize(:encoding => 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
127
94
  end
128
95
 
129
96
  end
@@ -131,6 +98,61 @@ module EPUBChop
131
98
  end
132
99
  end
133
100
 
101
+ def chop_file(resource, processed_file_size)
102
+ #TODO: get a better algorithm to determine where to chop
103
+ return resource if resource.nil?
104
+
105
+ resource.css('script').remove
106
+ resource.css('style').remove
107
+ resource_text = resource.at_css('body').text.split[0..processed_file_size]
108
+
109
+ # get a string that can be found
110
+ data = nil
111
+ window_begin = default_window_begin = 5
112
+ window_end = 0
113
+ while data.nil?
114
+ puts "data window:#{(processed_file_size - window_begin)}..#{(processed_file_size - window_end)}" if @verbose
115
+ processed_window_begin = processed_file_size - window_begin
116
+ processed_window_end = processed_file_size - window_end
117
+
118
+ processed_window_begin = 0 if processed_window_begin < 0
119
+ processed_window_end = processed_file_size
120
+
121
+ look_for = resource_text[processed_window_begin..processed_window_end]
122
+
123
+ if look_for.nil?
124
+ window_begin = default_window_begin += 5
125
+ window_end = 0
126
+ else
127
+ look_for.map! {|m| m.gsub("'", "\'")}
128
+ data = resource.at_css("p:contains(\"#{look_for.join(' ')}\")")
129
+ data = resource.at_css("body:contains(\"#{look_for.join(' ')}\")") if data.nil?
130
+
131
+ window_begin -= 1
132
+ window_end += 1
133
+
134
+ if window_begin == window_end
135
+ window_begin = default_window_begin += 5
136
+ window_end = 0
137
+ end
138
+ end
139
+ end
140
+
141
+ #limit on found string
142
+ if data
143
+ next_data = data.next_element
144
+ while next_data
145
+ in_resource = resource.css(next_data.css_path)
146
+ in_resource.remove
147
+
148
+ next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
149
+ end
150
+ end
151
+
152
+ resource
153
+ end
154
+
155
+
134
156
  def rebuild_epub_from_tmp_dir(extract_dir)
135
157
  #zip new ebook
136
158
  new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
@@ -143,7 +165,7 @@ module EPUBChop
143
165
 
144
166
  #minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
145
167
  mimetype = epub_files.delete("#{extract_dir}/mimetype")
146
- mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0,0, Zip::Entry::STORED)
168
+ mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0, 0, Zip::Entry::STORED)
147
169
  zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
148
170
 
149
171
  #all the other files
@@ -164,13 +186,14 @@ module EPUBChop
164
186
 
165
187
  #noinspection RubyInstanceMethodNamingConvention
166
188
  def remove_unused_images_from_tmp_dir(extract_dir)
167
- puts 'removing unused media'
189
+ puts 'removing unused media' if @verbose
168
190
  not_to_be_deleted_images = []
169
- all_images = @book.table_of_contents.resources.images.map {|i| i[:uri]}
191
+ all_images = @book.table_of_contents.resources.images.map { |i| i[:uri] }
170
192
  @book.table_of_contents.resources.html.each do |resource|
171
193
  file = Nokogiri::HTML(File.read("#{extract_dir}/#{resource[:uri]}"))
172
194
 
173
195
  all_images.each do |image|
196
+ next if image.nil?
174
197
  i = image.split('/').last
175
198
  data = file.at_css("img[src$='#{i}']")
176
199
 
@@ -182,7 +205,8 @@ module EPUBChop
182
205
 
183
206
  to_be_deleted_images = (all_images - not_to_be_deleted_images)
184
207
  to_be_deleted_images.each do |image|
185
- puts "\t\tremoving #{image}"
208
+ next if image.nil?
209
+ puts "\t\tremoving #{image}" if @verbose
186
210
  File.delete("#{extract_dir}/#{image}") if File.exists?("#{extract_dir}/#{image}")
187
211
  end
188
212
 
@@ -202,13 +226,14 @@ module EPUBChop
202
226
  end
203
227
 
204
228
  @chop_by = options[:chop_by] || :spine
229
+ @verbose = options[:verbose] || false
205
230
  end
206
231
 
207
232
  def empty_file_with_cover(filename)
208
233
  number_of_subdirectories = filename.split('/').size - 1
209
234
 
210
235
  cover_path = ''
211
- number_of_subdirectories.times{ cover_path += '../'}
236
+ number_of_subdirectories.times { cover_path += '../' }
212
237
 
213
238
  cover_path += @book.cover && @book.cover.exists? ? @book.cover.exists?.to_s : ''
214
239
 
@@ -233,7 +258,7 @@ module EPUBChop
233
258
  </div>
234
259
 
235
260
  <div style='padding-top:10px;'>
236
- <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '' )}</h3>
261
+ <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '')}</h3>
237
262
  </div>
238
263
 
239
264
  <div>
@@ -259,7 +284,7 @@ DATA
259
284
  resource_word_count = {}
260
285
  if @book
261
286
  resources = @book.table_of_contents.resources.to_a
262
- chop_by = @chop_by.eql?(:ncx) ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
287
+ chop_by = @chop_by.eql?(:ncx) ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
263
288
 
264
289
  chop_by.each do |resource|
265
290
  raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|
@@ -1,3 +1,3 @@
1
1
  module EPUBChop
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.10"
3
3
  end
metadata CHANGED
@@ -1,99 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: EPUBChop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-19 00:00:00.000000000 Z
11
+ date: 2014-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.3'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
29
34
  requirement: !ruby/object:Gem::Requirement
30
35
  requirements:
31
- - - ! '>='
36
+ - - '>='
32
37
  - !ruby/object:Gem::Version
33
38
  version: '0'
34
- type: :development
35
39
  prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
36
43
  version_requirements: !ruby/object:Gem::Requirement
37
44
  requirements:
38
- - - ! '>='
45
+ - - '>='
39
46
  - !ruby/object:Gem::Version
40
47
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
43
48
  requirement: !ruby/object:Gem::Requirement
44
49
  requirements:
45
- - - ! '>='
50
+ - - '>='
46
51
  - !ruby/object:Gem::Version
47
52
  version: '0'
48
- type: :development
49
53
  prerelease: false
54
+ type: :development
55
+ - !ruby/object:Gem::Dependency
56
+ name: epubinfo_with_toc
50
57
  version_requirements: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ! '>='
59
+ - - '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: epubinfo_with_toc
57
62
  requirement: !ruby/object:Gem::Requirement
58
63
  requirements:
59
- - - ! '>='
64
+ - - '>='
60
65
  - !ruby/object:Gem::Version
61
66
  version: '0'
62
- type: :runtime
63
67
  prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ! '>='
67
- - !ruby/object:Gem::Version
68
- version: '0'
68
+ type: :runtime
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rubyzip
71
- requirement: !ruby/object:Gem::Requirement
71
+ version_requirements: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ~>
74
74
  - !ruby/object:Gem::Version
75
75
  version: '1.0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
76
+ requirement: !ruby/object:Gem::Requirement
79
77
  requirements:
80
78
  - - ~>
81
79
  - !ruby/object:Gem::Version
82
80
  version: '1.0'
81
+ prerelease: false
82
+ type: :runtime
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: nokogiri
85
- requirement: !ruby/object:Gem::Requirement
85
+ version_requirements: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
- type: :runtime
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
90
+ requirement: !ruby/object:Gem::Requirement
93
91
  requirements:
94
- - - ! '>='
92
+ - - '>='
95
93
  - !ruby/object:Gem::Version
96
94
  version: '0'
95
+ prerelease: false
96
+ type: :runtime
97
97
  description: Create EPUB previews
98
98
  email:
99
99
  - mehmet@celik.be
@@ -122,24 +122,24 @@ homepage: https://github.com/mehmetc/EPUBChop
122
122
  licenses:
123
123
  - MIT
124
124
  metadata: {}
125
- post_install_message:
125
+ post_install_message:
126
126
  rdoc_options: []
127
127
  require_paths:
128
128
  - lib
129
129
  required_ruby_version: !ruby/object:Gem::Requirement
130
130
  requirements:
131
- - - ! '>='
131
+ - - '>='
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - ! '>='
136
+ - - '>='
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.1.10
142
- signing_key:
140
+ rubyforge_project:
141
+ rubygems_version: 2.2.2
142
+ signing_key:
143
143
  specification_version: 4
144
144
  summary: Removes unwanted content from an EPUB
145
145
  test_files: