EPUBChop 0.0.7 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- ZDdjMWZlMTIyOTJmMjJiN2I0OGQxYTUzMjA5MTlkNzZhZjcwMDQ3MQ==
5
- data.tar.gz: !binary |-
6
- ZjdjY2JiNzkwY2RiZTIxMDMyM2M4YTU1MjY0OGMwOTZiYjI4NmY4Mw==
2
+ SHA1:
3
+ metadata.gz: 2ff6270a266184b41507e29e43994de705c56357
4
+ data.tar.gz: a257fd9d69ba6eb453c936626b509cc40596adf5
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZjJkYTdmNDI4OGFkNWUxZDRlZjlkZWQ1NDZkYTA3MWE2MmQ3YWMwYmZjZjkz
10
- NDZiZGVjMGEwNWY5ZjgwZjA2ZjQ4OTM4MDBhMGRlNWQ3ODk5Njc1ODhlY2U2
11
- ZmVjNmEwNGYwYTQ1ZGZiNDc0Nzc4ODliZWI0N2Y1MjcxNDQzMGY=
12
- data.tar.gz: !binary |-
13
- ZTNlY2E2OTA4NTNkM2I0YWRjMDc1YTkxMWU5YjgyMDY4YWM0ODE2MGVjZjQ5
14
- OGI2YmNjNWFmZjA1ZTI1OGViZjI0N2UyNjJiOTQ3MmRhOTUyOTYwZDIwNDc2
15
- NGQ0ZWU1N2RhZDBmNGMyOGZlZWM1NzIwMDgzMGM3Y2FkZjdhYWI=
6
+ metadata.gz: 5b8d5d76d9aabb81e9f0afe92d2c0b3aa058b6be46ce9eb3dc6c61f15eceaaaa49a27a696a2063b6bc9158ddb4992be0020ba1f7072ff9b90c2cbdf73b286527
7
+ data.tar.gz: 47742a21f5173c931b35f82df4c4fa5ea2f23e2b040073a9f575f38cab819c34a100b3bf3f1dcdd6e05d88ebe3c44445c2dfadb5e378d6f509332371caa00577
data/bin/epubchop CHANGED
@@ -19,6 +19,7 @@ BANNER
19
19
  opt :line1, "Text that is shown on line 1 of the chopped pages", :type => :string, :default => 'Continue reading?'
20
20
  opt :line2, "Text that is shown on line 2 of the chopped pages", :type => :string, :default => 'Go to your local library or buy the book.'
21
21
  opt :chop, "Follow the SPINE or the NCX of the ePub", :type => :string, :default => 'spine'
22
+ opt :verbose, "more loging true/false", :type => :boolean, :default => false
22
23
  end
23
24
 
24
25
  Trollop::die "need an EPUB file name" if ARGV.empty?
@@ -32,9 +33,10 @@ begin
32
33
  text << options[:line1] if options.has_key?(:line1)
33
34
  text << options[:line2] if options.has_key?(:line2)
34
35
  chop_by = options[:chop]
36
+ verbose = options[:verbose]
35
37
 
36
- puts 'loading EPUB'
37
- b=EPUBChop.get(filename, :chop_by => chop_by.to_sym)
38
+ puts "loading EPUB #{filename}"
39
+ b=EPUBChop.get(filename, :chop_by => chop_by.to_sym, :verbose => verbose)
38
40
  puts 'chopping EPUB'
39
41
  c=b.chop({:base => base.to_s, :words => words, :text => text})
40
42
  puts 'rebuilding EPUB'
data/lib/EPUBChop/chop.rb CHANGED
@@ -1,3 +1,4 @@
1
+ #encoding: UTF-8
1
2
  require 'nokogiri'
2
3
  require 'epubinfo'
3
4
  require 'tempfile'
@@ -38,12 +39,10 @@ module EPUBChop
38
39
 
39
40
  return rebuild_epub_from_tmp_dir(extract_dir)
40
41
  rescue Zip::ZipError => e
41
- raise RuntimeError, "Error processing EPUB. #{e.message}"
42
+ raise RuntimeError, "Error processing EPUB #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
42
43
  rescue Exception => e
43
- puts "Chopping went wrong. #{e.message}"
44
- puts e.backtrace
45
-
46
- return nil
44
+ puts e.backtrace.join("\n")
45
+ raise RuntimeError, "Chopping went wrong for #{@book.table_of_contents.parser.path}.\n #{e.message}", e.backtrace
47
46
  ensure
48
47
  FileUtils.remove_entry_secure(extract_dir)
49
48
  end
@@ -80,50 +79,18 @@ module EPUBChop
80
79
 
81
80
  else
82
81
  #noinspection RubyResolve
83
- resource = Nokogiri::XML(@book.table_of_contents.resources[filename]) do |config|
82
+ resource = Nokogiri::HTML(@book.table_of_contents.resources[filename]) do |config|
83
+ #resource = Nokogiri::HTML.parse(@book.table_of_contents.resources[filename], 'UTF-8') do |config|
84
84
  config.noblanks.nonet
85
85
  end
86
- resource.css('script').remove
87
- resource.css('style').remove
88
- resource_text = resource.at_css('body').text.split[0..processed_file_size]
89
- #resource_text_length = resource_text.length
90
-
91
- # get a string that can be found
92
- data = nil
93
- window_begin = default_window_begin = 5
94
- window_end = 0
95
- while data.nil?
96
- look_for = resource_text[(processed_file_size - window_begin)..(processed_file_size - window_end)]
97
-
98
- if look_for.nil?
99
- window_begin = default_window_begin += 5
100
- window_end = 0
101
- else
102
- data = resource.at_css("*:contains('#{look_for.join(' ')}')")
103
- window_begin -= 1
104
- window_end += 1
105
-
106
- if window_begin == window_end
107
- window_begin = default_window_begin += 5
108
- window_end = 0
109
- end
110
- end
111
- end
86
+ resource.encoding = 'UTF-8'
112
87
 
113
- #limit on found string
114
- if data
115
- next_data = data.next_element
116
- while next_data
117
- in_resource = resource.css(next_data.css_path)
118
- in_resource.remove
119
-
120
- next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
121
- end
122
- end
88
+ resource = chop_file(resource, processed_file_size)
123
89
 
124
90
  #persist page
125
- File.open("#{extract_dir}/#{filename}", 'w') do |f|
126
- f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
91
+ File.open("#{extract_dir}/#{filename}", 'w:UTF-8') do |f|
92
+ # f.puts resource.to_xml(:save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
93
+ f.puts resource.serialize(:encoding => 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::NO_DECLARATION)
127
94
  end
128
95
 
129
96
  end
@@ -131,6 +98,61 @@ module EPUBChop
131
98
  end
132
99
  end
133
100
 
101
+ def chop_file(resource, processed_file_size)
102
+ #TODO: get a better algorithm to determine where to chop
103
+ return resource if resource.nil?
104
+
105
+ resource.css('script').remove
106
+ resource.css('style').remove
107
+ resource_text = resource.at_css('body').text.split[0..processed_file_size]
108
+
109
+ # get a string that can be found
110
+ data = nil
111
+ window_begin = default_window_begin = 5
112
+ window_end = 0
113
+ while data.nil?
114
+ puts "data window:#{(processed_file_size - window_begin)}..#{(processed_file_size - window_end)}" if @verbose
115
+ processed_window_begin = processed_file_size - window_begin
116
+ processed_window_end = processed_file_size - window_end
117
+
118
+ processed_window_begin = 0 if processed_window_begin < 0
119
+ processed_window_end = processed_file_size
120
+
121
+ look_for = resource_text[processed_window_begin..processed_window_end]
122
+
123
+ if look_for.nil?
124
+ window_begin = default_window_begin += 5
125
+ window_end = 0
126
+ else
127
+ look_for.map! {|m| m.gsub("'", "\'")}
128
+ data = resource.at_css("p:contains(\"#{look_for.join(' ')}\")")
129
+ data = resource.at_css("body:contains(\"#{look_for.join(' ')}\")") if data.nil?
130
+
131
+ window_begin -= 1
132
+ window_end += 1
133
+
134
+ if window_begin == window_end
135
+ window_begin = default_window_begin += 5
136
+ window_end = 0
137
+ end
138
+ end
139
+ end
140
+
141
+ #limit on found string
142
+ if data
143
+ next_data = data.next_element
144
+ while next_data
145
+ in_resource = resource.css(next_data.css_path)
146
+ in_resource.remove
147
+
148
+ next_data = data.nil? || data.next_element.to_s.length == 1 ? nil : data.next_element
149
+ end
150
+ end
151
+
152
+ resource
153
+ end
154
+
155
+
134
156
  def rebuild_epub_from_tmp_dir(extract_dir)
135
157
  #zip new ebook
136
158
  new_ebook_name = Tempfile.new(['epub', '.epub'], Dir.tmpdir)
@@ -143,7 +165,7 @@ module EPUBChop
143
165
 
144
166
  #minetype should be the first entry and should not be zipped. Else FIDO will not know that this is an EPUB
145
167
  mimetype = epub_files.delete("#{extract_dir}/mimetype")
146
- mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0,0, Zip::Entry::STORED)
168
+ mimetype_entry = Zip::Entry.new(zipfile, mimetype.sub("#{extract_dir}/", ''), '', '', 0, 0, Zip::Entry::STORED)
147
169
  zipfile.add(mimetype_entry, mimetype) unless mimetype.nil?
148
170
 
149
171
  #all the other files
@@ -164,13 +186,14 @@ module EPUBChop
164
186
 
165
187
  #noinspection RubyInstanceMethodNamingConvention
166
188
  def remove_unused_images_from_tmp_dir(extract_dir)
167
- puts 'removing unused media'
189
+ puts 'removing unused media' if @verbose
168
190
  not_to_be_deleted_images = []
169
- all_images = @book.table_of_contents.resources.images.map {|i| i[:uri]}
191
+ all_images = @book.table_of_contents.resources.images.map { |i| i[:uri] }
170
192
  @book.table_of_contents.resources.html.each do |resource|
171
193
  file = Nokogiri::HTML(File.read("#{extract_dir}/#{resource[:uri]}"))
172
194
 
173
195
  all_images.each do |image|
196
+ next if image.nil?
174
197
  i = image.split('/').last
175
198
  data = file.at_css("img[src$='#{i}']")
176
199
 
@@ -182,7 +205,8 @@ module EPUBChop
182
205
 
183
206
  to_be_deleted_images = (all_images - not_to_be_deleted_images)
184
207
  to_be_deleted_images.each do |image|
185
- puts "\t\tremoving #{image}"
208
+ next if image.nil?
209
+ puts "\t\tremoving #{image}" if @verbose
186
210
  File.delete("#{extract_dir}/#{image}") if File.exists?("#{extract_dir}/#{image}")
187
211
  end
188
212
 
@@ -202,13 +226,14 @@ module EPUBChop
202
226
  end
203
227
 
204
228
  @chop_by = options[:chop_by] || :spine
229
+ @verbose = options[:verbose] || false
205
230
  end
206
231
 
207
232
  def empty_file_with_cover(filename)
208
233
  number_of_subdirectories = filename.split('/').size - 1
209
234
 
210
235
  cover_path = ''
211
- number_of_subdirectories.times{ cover_path += '../'}
236
+ number_of_subdirectories.times { cover_path += '../' }
212
237
 
213
238
  cover_path += @book.cover && @book.cover.exists? ? @book.cover.exists?.to_s : ''
214
239
 
@@ -233,7 +258,7 @@ module EPUBChop
233
258
  </div>
234
259
 
235
260
  <div style='padding-top:10px;'>
236
- <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '' )}</h3>
261
+ <h3>#{CGI.escape_html(@book.titles.first ? @book.titles.first : '')}</h3>
237
262
  </div>
238
263
 
239
264
  <div>
@@ -259,7 +284,7 @@ DATA
259
284
  resource_word_count = {}
260
285
  if @book
261
286
  resources = @book.table_of_contents.resources.to_a
262
- chop_by = @chop_by.eql?(:ncx) ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
287
+ chop_by = @chop_by.eql?(:ncx) ? @book.table_of_contents.resources.ncx : @book.table_of_contents.resources.spine
263
288
 
264
289
  chop_by.each do |resource|
265
290
  raw = Nokogiri::HTML(@book.table_of_contents.resources[resource[:uri]]) do |config|
@@ -1,3 +1,3 @@
1
1
  module EPUBChop
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.10"
3
3
  end
metadata CHANGED
@@ -1,99 +1,99 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: EPUBChop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mehmet Celik
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-19 00:00:00.000000000 Z
11
+ date: 2014-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - ~>
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.3'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
29
34
  requirement: !ruby/object:Gem::Requirement
30
35
  requirements:
31
- - - ! '>='
36
+ - - '>='
32
37
  - !ruby/object:Gem::Version
33
38
  version: '0'
34
- type: :development
35
39
  prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
36
43
  version_requirements: !ruby/object:Gem::Requirement
37
44
  requirements:
38
- - - ! '>='
45
+ - - '>='
39
46
  - !ruby/object:Gem::Version
40
47
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
43
48
  requirement: !ruby/object:Gem::Requirement
44
49
  requirements:
45
- - - ! '>='
50
+ - - '>='
46
51
  - !ruby/object:Gem::Version
47
52
  version: '0'
48
- type: :development
49
53
  prerelease: false
54
+ type: :development
55
+ - !ruby/object:Gem::Dependency
56
+ name: epubinfo_with_toc
50
57
  version_requirements: !ruby/object:Gem::Requirement
51
58
  requirements:
52
- - - ! '>='
59
+ - - '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: epubinfo_with_toc
57
62
  requirement: !ruby/object:Gem::Requirement
58
63
  requirements:
59
- - - ! '>='
64
+ - - '>='
60
65
  - !ruby/object:Gem::Version
61
66
  version: '0'
62
- type: :runtime
63
67
  prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ! '>='
67
- - !ruby/object:Gem::Version
68
- version: '0'
68
+ type: :runtime
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rubyzip
71
- requirement: !ruby/object:Gem::Requirement
71
+ version_requirements: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ~>
74
74
  - !ruby/object:Gem::Version
75
75
  version: '1.0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
76
+ requirement: !ruby/object:Gem::Requirement
79
77
  requirements:
80
78
  - - ~>
81
79
  - !ruby/object:Gem::Version
82
80
  version: '1.0'
81
+ prerelease: false
82
+ type: :runtime
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: nokogiri
85
- requirement: !ruby/object:Gem::Requirement
85
+ version_requirements: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
- type: :runtime
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
90
+ requirement: !ruby/object:Gem::Requirement
93
91
  requirements:
94
- - - ! '>='
92
+ - - '>='
95
93
  - !ruby/object:Gem::Version
96
94
  version: '0'
95
+ prerelease: false
96
+ type: :runtime
97
97
  description: Create EPUB previews
98
98
  email:
99
99
  - mehmet@celik.be
@@ -122,24 +122,24 @@ homepage: https://github.com/mehmetc/EPUBChop
122
122
  licenses:
123
123
  - MIT
124
124
  metadata: {}
125
- post_install_message:
125
+ post_install_message:
126
126
  rdoc_options: []
127
127
  require_paths:
128
128
  - lib
129
129
  required_ruby_version: !ruby/object:Gem::Requirement
130
130
  requirements:
131
- - - ! '>='
131
+ - - '>='
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - ! '>='
136
+ - - '>='
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.1.10
142
- signing_key:
140
+ rubyforge_project:
141
+ rubygems_version: 2.2.2
142
+ signing_key:
143
143
  specification_version: 4
144
144
  summary: Removes unwanted content from an EPUB
145
145
  test_files: