epub-parser 0.4.5 → 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25b4d1e1300bee0055f6bf79b79c7c3404e1a18752a558160ef4978c38cd1e46
4
- data.tar.gz: e467ba867b6fd5e9959e35bdec9c896f2c361f7cceb93fcb6c42539f55213ecc
3
+ metadata.gz: 2373a741e554fee584d9b3562e655a33e34420c6dd335ffc32f041da5507c1d5
4
+ data.tar.gz: 2224f8bf83deed854f8181f791bf0e5e58b0feaa40e39a5d94b754acef6f841c
5
5
  SHA512:
6
- metadata.gz: c1af1a66faccb0812659841be637d51e0e70cbf41d2fd3672287dded4bf147adfa131eba6b4297eadb108d4aa7c6f1414ad932861b3963ff6cab9a235d4b9b78
7
- data.tar.gz: 6561f63c30009f4e333e99189966aeeceb6c8c190b541d2c0b12711c565a8cb51bcaa1ee37d3623806693981250739371b9006a9e7d8776343b94b5f47c4f65e
6
+ metadata.gz: 5c9eeed9058f10661e27e42ad1366d831ebdebe06879b80c8e44114d0ea74de8f2b15e2909d9602a7deacc351ef1074cf37c5a2e7f4592a141ef2b336474efeb
7
+ data.tar.gz: c2b8c9f85ed82c2720816b80d393884be55c258084b89723adc84cdf490d6e721926df931ff660bb0d06234650018f09f8ef85604cacae5f4b0575c6c691168f
data/.gitlab-ci.yml CHANGED
@@ -58,6 +58,16 @@ test:2.7:
58
58
  paths:
59
59
  - deps
60
60
 
61
+ test:3.0:
62
+ image: ruby:3.0-rc
63
+ script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
64
+ except:
65
+ - tags
66
+ cache:
67
+ key: ruby:3.0
68
+ paths:
69
+ - deps
70
+
61
71
  pages:
62
72
  stage: deploy
63
73
  image: ruby:2.7
data/CHANGELOG.adoc CHANGED
@@ -1,8 +1,17 @@
1
1
  = CHANGELOG
2
2
 
3
+ == 0.4.7
4
+
5
+ * Remove version specification from Nokogiri to migrate to Ruby 3.1
6
+
7
+ == 0.4.6
8
+
9
+ * [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
10
+ * Tiny modifcation on Zip archive manipulation
11
+
3
12
  == 0.4.5
4
13
 
5
- [BUG FIX]Handle the case EPUB path is a Pathname
14
+ * [BUG FIX]Handle the case EPUB path is a Pathname
6
15
 
7
16
  == 0.4.4
8
17
 
data/README.adoc CHANGED
@@ -176,9 +176,18 @@ If you find other gems, please tell me or request a pull request.
176
176
 
177
177
  == RECENT CHANGES
178
178
 
179
+ === 0.4.7
180
+
181
+ * Remove version specification from Nokogiri to migrate to Ruby 3.1
182
+
183
+ === 0.4.6
184
+
185
+ * [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
186
+ * Tiny modifcation on Zip archive manipulation
187
+
179
188
  === 0.4.5
180
189
 
181
- [BUG FIX]Handle the case EPUB path is a Pathname
190
+ * [BUG FIX]Handle the case EPUB path is a Pathname
182
191
 
183
192
  === 0.4.4
184
193
 
data/bin/epubinfo CHANGED
@@ -65,7 +65,8 @@ data.merge!(book.metadata.to_h)
65
65
  data['modified'] = book.modified
66
66
  data['unique identifier'] = book.metadata.unique_identifier
67
67
  data['epub version'] = book.package.version
68
- data["navigations"] = book.manifest.navs.first&.content_document.navigations.collect(&:type).join(", ")
68
+ nav = book.manifest.navs.first
69
+ data["navigations"] = nav ? nav.book.manifest.navs.first&.content_document&.navigations&.collect(&:type)&.join(", ") : []
69
70
 
70
71
  data.each_pair do |(key, value)|
71
72
  data[key] = value.respond_to?(:join) ? value.join(", ") : value.to_s
@@ -99,12 +100,17 @@ if options[:format] == :line
99
100
  if options[key]
100
101
  puts ""
101
102
  puts "=== #{name} ==="
102
- nav = book.manifest.nav.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
103
- if nav
104
- nav.traverse do |item, depth|
105
- text = item.text || "(No heading)"
106
- text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
107
- puts "#{' ' * depth}#{text}"
103
+ nav_item = book.manifest.nav
104
+ if nav_item
105
+ nav = nav_item.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
106
+ if nav
107
+ nav.traverse do |item, depth|
108
+ text = item.text || "(No heading)"
109
+ text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
110
+ puts "#{' ' * depth}#{text}"
111
+ end
112
+ else
113
+ puts "(No #{name})"
108
114
  end
109
115
  else
110
116
  puts "(No #{name})"
data/bin/epubtotext ADDED
@@ -0,0 +1,39 @@
1
+ require "optparse"
2
+ require "epub/parser"
3
+
4
+ using EPUB::Parser::XMLDocument::Refinements
5
+
6
+ def main(argv)
7
+ option_parser = OptionParser.new {|opt|
8
+ opt.version = EPUB::Parser::VERSION
9
+ opt.banner = <<EOB
10
+ Extracts text from EPUB and output
11
+
12
+ Usage: #{opt.program_name} EPUBFILE
13
+ EOB
14
+ }
15
+ options = option_parser.getopts(argv)
16
+ path = argv.shift
17
+ raise "Specify EPUBFILE" unless path
18
+ EPUB::Parser.parse(path).each_page_on_spine do |page|
19
+ unless page.xhtml?
20
+ $stderr.puts "Cannot parse non-XHTML document(#{page.media_type}): #{page.entry_name}"
21
+ next
22
+ end
23
+ doc = EPUB::Parser::XMLDocument.new(page.read)
24
+ body = doc.each_element_by_xpath("//xhtml:body", EPUB::NAMESPACES).first
25
+ unless body
26
+ $stderr.puts "body element doesn't exist in #{page.entry_name}"
27
+ next
28
+ end
29
+ puts body.content
30
+ # handle spaces
31
+ # handle img@alt
32
+ end
33
+ rescue => err
34
+ $stderr.puts "Error: #{err}"
35
+ $stderr.puts
36
+ abort option_parser.help
37
+ end
38
+
39
+ main(ARGV)
data/docs/Home.adoc CHANGED
@@ -118,7 +118,7 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
118
118
 
119
119
  ==== Switching XML Library
120
120
 
121
- EPUB Parser tries to load https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser at first. If Oga is not available, then it tries https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more. If both are not available, it fallbacks to https://ruby-doc.org/stdlib-2.5.3/libdoc/rexml/rdoc/index.html[REXML], a standard-bundled library. You can also specify REXML explicitly:
121
+ EPUB Parser tries to load https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more at first. If Nokogiri is not available, then it tries https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser. If both are not available, it fallbacks to https://github.com/ruby/rexml/[REXML], a standard-bundled library. You can also specify REXML explicitly:
122
122
 
123
123
  ----
124
124
  EPUB::Parser::XMLDocument.backend = :REXML
data/epub-parser.gemspec CHANGED
@@ -40,10 +40,12 @@ Gem::Specification.new do |s|
40
40
  s.add_development_dependency 'pretty_backtrace'
41
41
  s.add_development_dependency 'epub-maker'
42
42
  s.add_development_dependency 'asciidoctor'
43
- s.add_development_dependency 'nokogiri', '>= 1.6.0', '< 1.11'
43
+ s.add_development_dependency 'nokogiri'
44
44
  s.add_development_dependency 'oga', '>= 2.16'
45
+ s.add_development_dependency 'packnga'
45
46
 
46
47
  s.add_runtime_dependency 'archive-zip'
48
+ s.add_runtime_dependency 'rexml'
47
49
  s.add_runtime_dependency 'addressable', '>= 2.3.5'
48
50
  s.add_runtime_dependency 'rchardet', '>= 1.6.1'
49
51
  s.add_runtime_dependency 'epub-cfi'
@@ -44,7 +44,7 @@ module EPUB
44
44
  end
45
45
  end
46
46
 
47
- raise NoEntry
47
+ raise NoEntry, "Entry not found: #{path_name}"
48
48
  else
49
49
  open {|container| container.read(path_name)}
50
50
  end
@@ -45,7 +45,7 @@ module EPUB
45
45
 
46
46
  def read(container_path, path_name)
47
47
  open(container_path) {|container|
48
- container.read(path_name)
48
+ container.read(path_name.to_s)
49
49
  }
50
50
  end
51
51
 
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.4.5"
3
+ VERSION = "0.4.6"
4
4
  end
5
5
  end
@@ -8,7 +8,7 @@ module EPUB
8
8
 
9
9
  class << self
10
10
  def search_text(package, word, **options)
11
- new(package).search_text(word, options)
11
+ new(package).search_text(word, **options)
12
12
  end
13
13
 
14
14
  def search_element(package, css: nil, xpath: nil, namespaces: {})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - KITAITI Makoto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-29 00:00:00.000000000 Z
11
+ date: 2021-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -226,20 +226,14 @@ dependencies:
226
226
  requirements:
227
227
  - - ">="
228
228
  - !ruby/object:Gem::Version
229
- version: 1.6.0
230
- - - "<"
231
- - !ruby/object:Gem::Version
232
- version: '1.11'
229
+ version: '0'
233
230
  type: :development
234
231
  prerelease: false
235
232
  version_requirements: !ruby/object:Gem::Requirement
236
233
  requirements:
237
234
  - - ">="
238
235
  - !ruby/object:Gem::Version
239
- version: 1.6.0
240
- - - "<"
241
- - !ruby/object:Gem::Version
242
- version: '1.11'
236
+ version: '0'
243
237
  - !ruby/object:Gem::Dependency
244
238
  name: oga
245
239
  requirement: !ruby/object:Gem::Requirement
@@ -254,6 +248,20 @@ dependencies:
254
248
  - - ">="
255
249
  - !ruby/object:Gem::Version
256
250
  version: '2.16'
251
+ - !ruby/object:Gem::Dependency
252
+ name: packnga
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - ">="
256
+ - !ruby/object:Gem::Version
257
+ version: '0'
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ version: '0'
257
265
  - !ruby/object:Gem::Dependency
258
266
  name: archive-zip
259
267
  requirement: !ruby/object:Gem::Requirement
@@ -268,6 +276,20 @@ dependencies:
268
276
  - - ">="
269
277
  - !ruby/object:Gem::Version
270
278
  version: '0'
279
+ - !ruby/object:Gem::Dependency
280
+ name: rexml
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - ">="
284
+ - !ruby/object:Gem::Version
285
+ version: '0'
286
+ type: :runtime
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - ">="
291
+ - !ruby/object:Gem::Version
292
+ version: '0'
271
293
  - !ruby/object:Gem::Dependency
272
294
  name: addressable
273
295
  requirement: !ruby/object:Gem::Requirement
@@ -317,6 +339,7 @@ executables:
317
339
  - epub-cover
318
340
  - epub-open
319
341
  - epubinfo
342
+ - epubtotext
320
343
  extensions: []
321
344
  extra_rdoc_files: []
322
345
  files:
@@ -332,6 +355,7 @@ files:
332
355
  - bin/epub-cover
333
356
  - bin/epub-open
334
357
  - bin/epubinfo
358
+ - bin/epubtotext
335
359
  - docs/AggregateContentsFromWeb.markdown
336
360
  - docs/EpubCover.adoc
337
361
  - docs/EpubOpen.markdown
@@ -448,7 +472,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
448
472
  - !ruby/object:Gem::Version
449
473
  version: '0'
450
474
  requirements: []
451
- rubygems_version: 3.1.2
475
+ rubygems_version: 3.3.3
452
476
  signing_key:
453
477
  specification_version: 4
454
478
  summary: EPUB 3 Parser