epub-parser 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25b4d1e1300bee0055f6bf79b79c7c3404e1a18752a558160ef4978c38cd1e46
4
- data.tar.gz: e467ba867b6fd5e9959e35bdec9c896f2c361f7cceb93fcb6c42539f55213ecc
3
+ metadata.gz: 2373a741e554fee584d9b3562e655a33e34420c6dd335ffc32f041da5507c1d5
4
+ data.tar.gz: 2224f8bf83deed854f8181f791bf0e5e58b0feaa40e39a5d94b754acef6f841c
5
5
  SHA512:
6
- metadata.gz: c1af1a66faccb0812659841be637d51e0e70cbf41d2fd3672287dded4bf147adfa131eba6b4297eadb108d4aa7c6f1414ad932861b3963ff6cab9a235d4b9b78
7
- data.tar.gz: 6561f63c30009f4e333e99189966aeeceb6c8c190b541d2c0b12711c565a8cb51bcaa1ee37d3623806693981250739371b9006a9e7d8776343b94b5f47c4f65e
6
+ metadata.gz: 5c9eeed9058f10661e27e42ad1366d831ebdebe06879b80c8e44114d0ea74de8f2b15e2909d9602a7deacc351ef1074cf37c5a2e7f4592a141ef2b336474efeb
7
+ data.tar.gz: c2b8c9f85ed82c2720816b80d393884be55c258084b89723adc84cdf490d6e721926df931ff660bb0d06234650018f09f8ef85604cacae5f4b0575c6c691168f
data/.gitlab-ci.yml CHANGED
@@ -58,6 +58,16 @@ test:2.7:
58
58
  paths:
59
59
  - deps
60
60
 
61
+ test:3.0:
62
+ image: ruby:3.0-rc
63
+ script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
64
+ except:
65
+ - tags
66
+ cache:
67
+ key: ruby:3.0
68
+ paths:
69
+ - deps
70
+
61
71
  pages:
62
72
  stage: deploy
63
73
  image: ruby:2.7
data/CHANGELOG.adoc CHANGED
@@ -1,8 +1,17 @@
1
1
  = CHANGELOG
2
2
 
3
+ == 0.4.7
4
+
5
+ * Remove version specification from Nokogiri to migrate to Ruby 3.1
6
+
7
+ == 0.4.6
8
+
9
+ * [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
10
+ * Tiny modifcation on Zip archive manipulation
11
+
3
12
  == 0.4.5
4
13
 
5
- [BUG FIX]Handle the case EPUB path is a Pathname
14
+ * [BUG FIX]Handle the case EPUB path is a Pathname
6
15
 
7
16
  == 0.4.4
8
17
 
data/README.adoc CHANGED
@@ -176,9 +176,18 @@ If you find other gems, please tell me or request a pull request.
176
176
 
177
177
  == RECENT CHANGES
178
178
 
179
+ === 0.4.7
180
+
181
+ * Remove version specification from Nokogiri to migrate to Ruby 3.1
182
+
183
+ === 0.4.6
184
+
185
+ * [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
186
+ * Tiny modifcation on Zip archive manipulation
187
+
179
188
  === 0.4.5
180
189
 
181
- [BUG FIX]Handle the case EPUB path is a Pathname
190
+ * [BUG FIX]Handle the case EPUB path is a Pathname
182
191
 
183
192
  === 0.4.4
184
193
 
data/bin/epubinfo CHANGED
@@ -65,7 +65,8 @@ data.merge!(book.metadata.to_h)
65
65
  data['modified'] = book.modified
66
66
  data['unique identifier'] = book.metadata.unique_identifier
67
67
  data['epub version'] = book.package.version
68
- data["navigations"] = book.manifest.navs.first&.content_document.navigations.collect(&:type).join(", ")
68
+ nav = book.manifest.navs.first
69
+ data["navigations"] = nav ? nav.book.manifest.navs.first&.content_document&.navigations&.collect(&:type)&.join(", ") : []
69
70
 
70
71
  data.each_pair do |(key, value)|
71
72
  data[key] = value.respond_to?(:join) ? value.join(", ") : value.to_s
@@ -99,12 +100,17 @@ if options[:format] == :line
99
100
  if options[key]
100
101
  puts ""
101
102
  puts "=== #{name} ==="
102
- nav = book.manifest.nav.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
103
- if nav
104
- nav.traverse do |item, depth|
105
- text = item.text || "(No heading)"
106
- text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
107
- puts "#{' ' * depth}#{text}"
103
+ nav_item = book.manifest.nav
104
+ if nav_item
105
+ nav = nav_item.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
106
+ if nav
107
+ nav.traverse do |item, depth|
108
+ text = item.text || "(No heading)"
109
+ text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
110
+ puts "#{' ' * depth}#{text}"
111
+ end
112
+ else
113
+ puts "(No #{name})"
108
114
  end
109
115
  else
110
116
  puts "(No #{name})"
data/bin/epubtotext ADDED
@@ -0,0 +1,39 @@
1
+ require "optparse"
2
+ require "epub/parser"
3
+
4
+ using EPUB::Parser::XMLDocument::Refinements
5
+
6
+ def main(argv)
7
+ option_parser = OptionParser.new {|opt|
8
+ opt.version = EPUB::Parser::VERSION
9
+ opt.banner = <<EOB
10
+ Extracts text from EPUB and output
11
+
12
+ Usage: #{opt.program_name} EPUBFILE
13
+ EOB
14
+ }
15
+ options = option_parser.getopts(argv)
16
+ path = argv.shift
17
+ raise "Specify EPUBFILE" unless path
18
+ EPUB::Parser.parse(path).each_page_on_spine do |page|
19
+ unless page.xhtml?
20
+ $stderr.puts "Cannot parse non-XHTML document(#{page.media_type}): #{page.entry_name}"
21
+ next
22
+ end
23
+ doc = EPUB::Parser::XMLDocument.new(page.read)
24
+ body = doc.each_element_by_xpath("//xhtml:body", EPUB::NAMESPACES).first
25
+ unless body
26
+ $stderr.puts "body element doesn't exist in #{page.entry_name}"
27
+ next
28
+ end
29
+ puts body.content
30
+ # handle spaces
31
+ # handle img@alt
32
+ end
33
+ rescue => err
34
+ $stderr.puts "Error: #{err}"
35
+ $stderr.puts
36
+ abort option_parser.help
37
+ end
38
+
39
+ main(ARGV)
data/docs/Home.adoc CHANGED
@@ -118,7 +118,7 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
118
118
 
119
119
  ==== Switching XML Library
120
120
 
121
- EPUB Parser tries to load https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser at first. If Oga is not available, then it tries https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more. If both are not available, it fallbacks to https://ruby-doc.org/stdlib-2.5.3/libdoc/rexml/rdoc/index.html[REXML], a standard-bundled library. You can also specify REXML explicitly:
121
+ EPUB Parser tries to load https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more at first. If Nokogiri is not available, then it tries https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser. If both are not available, it fallbacks to https://github.com/ruby/rexml/[REXML], a standard-bundled library. You can also specify REXML explicitly:
122
122
 
123
123
  ----
124
124
  EPUB::Parser::XMLDocument.backend = :REXML
data/epub-parser.gemspec CHANGED
@@ -40,10 +40,12 @@ Gem::Specification.new do |s|
40
40
  s.add_development_dependency 'pretty_backtrace'
41
41
  s.add_development_dependency 'epub-maker'
42
42
  s.add_development_dependency 'asciidoctor'
43
- s.add_development_dependency 'nokogiri', '>= 1.6.0', '< 1.11'
43
+ s.add_development_dependency 'nokogiri'
44
44
  s.add_development_dependency 'oga', '>= 2.16'
45
+ s.add_development_dependency 'packnga'
45
46
 
46
47
  s.add_runtime_dependency 'archive-zip'
48
+ s.add_runtime_dependency 'rexml'
47
49
  s.add_runtime_dependency 'addressable', '>= 2.3.5'
48
50
  s.add_runtime_dependency 'rchardet', '>= 1.6.1'
49
51
  s.add_runtime_dependency 'epub-cfi'
@@ -44,7 +44,7 @@ module EPUB
44
44
  end
45
45
  end
46
46
 
47
- raise NoEntry
47
+ raise NoEntry, "Entry not found: #{path_name}"
48
48
  else
49
49
  open {|container| container.read(path_name)}
50
50
  end
@@ -45,7 +45,7 @@ module EPUB
45
45
 
46
46
  def read(container_path, path_name)
47
47
  open(container_path) {|container|
48
- container.read(path_name)
48
+ container.read(path_name.to_s)
49
49
  }
50
50
  end
51
51
 
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.4.5"
3
+ VERSION = "0.4.6"
4
4
  end
5
5
  end
@@ -8,7 +8,7 @@ module EPUB
8
8
 
9
9
  class << self
10
10
  def search_text(package, word, **options)
11
- new(package).search_text(word, options)
11
+ new(package).search_text(word, **options)
12
12
  end
13
13
 
14
14
  def search_element(package, css: nil, xpath: nil, namespaces: {})
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - KITAITI Makoto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-29 00:00:00.000000000 Z
11
+ date: 2021-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -226,20 +226,14 @@ dependencies:
226
226
  requirements:
227
227
  - - ">="
228
228
  - !ruby/object:Gem::Version
229
- version: 1.6.0
230
- - - "<"
231
- - !ruby/object:Gem::Version
232
- version: '1.11'
229
+ version: '0'
233
230
  type: :development
234
231
  prerelease: false
235
232
  version_requirements: !ruby/object:Gem::Requirement
236
233
  requirements:
237
234
  - - ">="
238
235
  - !ruby/object:Gem::Version
239
- version: 1.6.0
240
- - - "<"
241
- - !ruby/object:Gem::Version
242
- version: '1.11'
236
+ version: '0'
243
237
  - !ruby/object:Gem::Dependency
244
238
  name: oga
245
239
  requirement: !ruby/object:Gem::Requirement
@@ -254,6 +248,20 @@ dependencies:
254
248
  - - ">="
255
249
  - !ruby/object:Gem::Version
256
250
  version: '2.16'
251
+ - !ruby/object:Gem::Dependency
252
+ name: packnga
253
+ requirement: !ruby/object:Gem::Requirement
254
+ requirements:
255
+ - - ">="
256
+ - !ruby/object:Gem::Version
257
+ version: '0'
258
+ type: :development
259
+ prerelease: false
260
+ version_requirements: !ruby/object:Gem::Requirement
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ version: '0'
257
265
  - !ruby/object:Gem::Dependency
258
266
  name: archive-zip
259
267
  requirement: !ruby/object:Gem::Requirement
@@ -268,6 +276,20 @@ dependencies:
268
276
  - - ">="
269
277
  - !ruby/object:Gem::Version
270
278
  version: '0'
279
+ - !ruby/object:Gem::Dependency
280
+ name: rexml
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - ">="
284
+ - !ruby/object:Gem::Version
285
+ version: '0'
286
+ type: :runtime
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - ">="
291
+ - !ruby/object:Gem::Version
292
+ version: '0'
271
293
  - !ruby/object:Gem::Dependency
272
294
  name: addressable
273
295
  requirement: !ruby/object:Gem::Requirement
@@ -317,6 +339,7 @@ executables:
317
339
  - epub-cover
318
340
  - epub-open
319
341
  - epubinfo
342
+ - epubtotext
320
343
  extensions: []
321
344
  extra_rdoc_files: []
322
345
  files:
@@ -332,6 +355,7 @@ files:
332
355
  - bin/epub-cover
333
356
  - bin/epub-open
334
357
  - bin/epubinfo
358
+ - bin/epubtotext
335
359
  - docs/AggregateContentsFromWeb.markdown
336
360
  - docs/EpubCover.adoc
337
361
  - docs/EpubOpen.markdown
@@ -448,7 +472,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
448
472
  - !ruby/object:Gem::Version
449
473
  version: '0'
450
474
  requirements: []
451
- rubygems_version: 3.1.2
475
+ rubygems_version: 3.3.3
452
476
  signing_key:
453
477
  specification_version: 4
454
478
  summary: EPUB 3 Parser