epub-parser 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +10 -0
- data/CHANGELOG.adoc +10 -1
- data/README.adoc +10 -1
- data/bin/epubinfo +13 -7
- data/bin/epubtotext +39 -0
- data/docs/Home.adoc +1 -1
- data/epub-parser.gemspec +3 -1
- data/lib/epub/ocf/physical_container/archive_zip.rb +1 -1
- data/lib/epub/ocf/physical_container.rb +1 -1
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/searcher/publication.rb +1 -1
- metadata +35 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2373a741e554fee584d9b3562e655a33e34420c6dd335ffc32f041da5507c1d5
|
4
|
+
data.tar.gz: 2224f8bf83deed854f8181f791bf0e5e58b0feaa40e39a5d94b754acef6f841c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c9eeed9058f10661e27e42ad1366d831ebdebe06879b80c8e44114d0ea74de8f2b15e2909d9602a7deacc351ef1074cf37c5a2e7f4592a141ef2b336474efeb
|
7
|
+
data.tar.gz: c2b8c9f85ed82c2720816b80d393884be55c258084b89723adc84cdf490d6e721926df931ff660bb0d06234650018f09f8ef85604cacae5f4b0575c6c691168f
|
data/.gitlab-ci.yml
CHANGED
@@ -58,6 +58,16 @@ test:2.7:
|
|
58
58
|
paths:
|
59
59
|
- deps
|
60
60
|
|
61
|
+
test:3.0:
|
62
|
+
image: ruby:3.0-rc
|
63
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
64
|
+
except:
|
65
|
+
- tags
|
66
|
+
cache:
|
67
|
+
key: ruby:3.0
|
68
|
+
paths:
|
69
|
+
- deps
|
70
|
+
|
61
71
|
pages:
|
62
72
|
stage: deploy
|
63
73
|
image: ruby:2.7
|
data/CHANGELOG.adoc
CHANGED
@@ -1,8 +1,17 @@
|
|
1
1
|
= CHANGELOG
|
2
2
|
|
3
|
+
== 0.4.7
|
4
|
+
|
5
|
+
* Remove version specification from Nokogiri to migrate to Ruby 3.1
|
6
|
+
|
7
|
+
== 0.4.6
|
8
|
+
|
9
|
+
* [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
|
10
|
+
* Tiny modifcation on Zip archive manipulation
|
11
|
+
|
3
12
|
== 0.4.5
|
4
13
|
|
5
|
-
[BUG FIX]Handle the case EPUB path is a Pathname
|
14
|
+
* [BUG FIX]Handle the case EPUB path is a Pathname
|
6
15
|
|
7
16
|
== 0.4.4
|
8
17
|
|
data/README.adoc
CHANGED
@@ -176,9 +176,18 @@ If you find other gems, please tell me or request a pull request.
|
|
176
176
|
|
177
177
|
== RECENT CHANGES
|
178
178
|
|
179
|
+
=== 0.4.7
|
180
|
+
|
181
|
+
* Remove version specification from Nokogiri to migrate to Ruby 3.1
|
182
|
+
|
183
|
+
=== 0.4.6
|
184
|
+
|
185
|
+
* [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
|
186
|
+
* Tiny modifcation on Zip archive manipulation
|
187
|
+
|
179
188
|
=== 0.4.5
|
180
189
|
|
181
|
-
[BUG FIX]Handle the case EPUB path is a Pathname
|
190
|
+
* [BUG FIX]Handle the case EPUB path is a Pathname
|
182
191
|
|
183
192
|
=== 0.4.4
|
184
193
|
|
data/bin/epubinfo
CHANGED
@@ -65,7 +65,8 @@ data.merge!(book.metadata.to_h)
|
|
65
65
|
data['modified'] = book.modified
|
66
66
|
data['unique identifier'] = book.metadata.unique_identifier
|
67
67
|
data['epub version'] = book.package.version
|
68
|
-
|
68
|
+
nav = book.manifest.navs.first
|
69
|
+
data["navigations"] = nav ? nav.book.manifest.navs.first&.content_document&.navigations&.collect(&:type)&.join(", ") : []
|
69
70
|
|
70
71
|
data.each_pair do |(key, value)|
|
71
72
|
data[key] = value.respond_to?(:join) ? value.join(", ") : value.to_s
|
@@ -99,12 +100,17 @@ if options[:format] == :line
|
|
99
100
|
if options[key]
|
100
101
|
puts ""
|
101
102
|
puts "=== #{name} ==="
|
102
|
-
|
103
|
-
if
|
104
|
-
nav.
|
105
|
-
|
106
|
-
|
107
|
-
|
103
|
+
nav_item = book.manifest.nav
|
104
|
+
if nav_item
|
105
|
+
nav = nav_item.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
|
106
|
+
if nav
|
107
|
+
nav.traverse do |item, depth|
|
108
|
+
text = item.text || "(No heading)"
|
109
|
+
text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
|
110
|
+
puts "#{' ' * depth}#{text}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
puts "(No #{name})"
|
108
114
|
end
|
109
115
|
else
|
110
116
|
puts "(No #{name})"
|
data/bin/epubtotext
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require "optparse"
|
2
|
+
require "epub/parser"
|
3
|
+
|
4
|
+
using EPUB::Parser::XMLDocument::Refinements
|
5
|
+
|
6
|
+
def main(argv)
|
7
|
+
option_parser = OptionParser.new {|opt|
|
8
|
+
opt.version = EPUB::Parser::VERSION
|
9
|
+
opt.banner = <<EOB
|
10
|
+
Extracts text from EPUB and output
|
11
|
+
|
12
|
+
Usage: #{opt.program_name} EPUBFILE
|
13
|
+
EOB
|
14
|
+
}
|
15
|
+
options = option_parser.getopts(argv)
|
16
|
+
path = argv.shift
|
17
|
+
raise "Specify EPUBFILE" unless path
|
18
|
+
EPUB::Parser.parse(path).each_page_on_spine do |page|
|
19
|
+
unless page.xhtml?
|
20
|
+
$stderr.puts "Cannot parse non-XHTML document(#{page.media_type}): #{page.entry_name}"
|
21
|
+
next
|
22
|
+
end
|
23
|
+
doc = EPUB::Parser::XMLDocument.new(page.read)
|
24
|
+
body = doc.each_element_by_xpath("//xhtml:body", EPUB::NAMESPACES).first
|
25
|
+
unless body
|
26
|
+
$stderr.puts "body element doesn't exist in #{page.entry_name}"
|
27
|
+
next
|
28
|
+
end
|
29
|
+
puts body.content
|
30
|
+
# handle spaces
|
31
|
+
# handle img@alt
|
32
|
+
end
|
33
|
+
rescue => err
|
34
|
+
$stderr.puts "Error: #{err}"
|
35
|
+
$stderr.puts
|
36
|
+
abort option_parser.help
|
37
|
+
end
|
38
|
+
|
39
|
+
main(ARGV)
|
data/docs/Home.adoc
CHANGED
@@ -118,7 +118,7 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
|
|
118
118
|
|
119
119
|
==== Switching XML Library
|
120
120
|
|
121
|
-
EPUB Parser tries to load https://
|
121
|
+
EPUB Parser tries to load https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more at first. If Nokogiri is not available, then it tries https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser. If both are not available, it fallbacks to https://github.com/ruby/rexml/[REXML], a standard-bundled library. You can also specify REXML explicitly:
|
122
122
|
|
123
123
|
----
|
124
124
|
EPUB::Parser::XMLDocument.backend = :REXML
|
data/epub-parser.gemspec
CHANGED
@@ -40,10 +40,12 @@ Gem::Specification.new do |s|
|
|
40
40
|
s.add_development_dependency 'pretty_backtrace'
|
41
41
|
s.add_development_dependency 'epub-maker'
|
42
42
|
s.add_development_dependency 'asciidoctor'
|
43
|
-
s.add_development_dependency 'nokogiri'
|
43
|
+
s.add_development_dependency 'nokogiri'
|
44
44
|
s.add_development_dependency 'oga', '>= 2.16'
|
45
|
+
s.add_development_dependency 'packnga'
|
45
46
|
|
46
47
|
s.add_runtime_dependency 'archive-zip'
|
48
|
+
s.add_runtime_dependency 'rexml'
|
47
49
|
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
48
50
|
s.add_runtime_dependency 'rchardet', '>= 1.6.1'
|
49
51
|
s.add_runtime_dependency 'epub-cfi'
|
data/lib/epub/parser/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -226,20 +226,14 @@ dependencies:
|
|
226
226
|
requirements:
|
227
227
|
- - ">="
|
228
228
|
- !ruby/object:Gem::Version
|
229
|
-
version:
|
230
|
-
- - "<"
|
231
|
-
- !ruby/object:Gem::Version
|
232
|
-
version: '1.11'
|
229
|
+
version: '0'
|
233
230
|
type: :development
|
234
231
|
prerelease: false
|
235
232
|
version_requirements: !ruby/object:Gem::Requirement
|
236
233
|
requirements:
|
237
234
|
- - ">="
|
238
235
|
- !ruby/object:Gem::Version
|
239
|
-
version:
|
240
|
-
- - "<"
|
241
|
-
- !ruby/object:Gem::Version
|
242
|
-
version: '1.11'
|
236
|
+
version: '0'
|
243
237
|
- !ruby/object:Gem::Dependency
|
244
238
|
name: oga
|
245
239
|
requirement: !ruby/object:Gem::Requirement
|
@@ -254,6 +248,20 @@ dependencies:
|
|
254
248
|
- - ">="
|
255
249
|
- !ruby/object:Gem::Version
|
256
250
|
version: '2.16'
|
251
|
+
- !ruby/object:Gem::Dependency
|
252
|
+
name: packnga
|
253
|
+
requirement: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - ">="
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '0'
|
258
|
+
type: :development
|
259
|
+
prerelease: false
|
260
|
+
version_requirements: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - ">="
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '0'
|
257
265
|
- !ruby/object:Gem::Dependency
|
258
266
|
name: archive-zip
|
259
267
|
requirement: !ruby/object:Gem::Requirement
|
@@ -268,6 +276,20 @@ dependencies:
|
|
268
276
|
- - ">="
|
269
277
|
- !ruby/object:Gem::Version
|
270
278
|
version: '0'
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: rexml
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - ">="
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '0'
|
286
|
+
type: :runtime
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - ">="
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: '0'
|
271
293
|
- !ruby/object:Gem::Dependency
|
272
294
|
name: addressable
|
273
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -317,6 +339,7 @@ executables:
|
|
317
339
|
- epub-cover
|
318
340
|
- epub-open
|
319
341
|
- epubinfo
|
342
|
+
- epubtotext
|
320
343
|
extensions: []
|
321
344
|
extra_rdoc_files: []
|
322
345
|
files:
|
@@ -332,6 +355,7 @@ files:
|
|
332
355
|
- bin/epub-cover
|
333
356
|
- bin/epub-open
|
334
357
|
- bin/epubinfo
|
358
|
+
- bin/epubtotext
|
335
359
|
- docs/AggregateContentsFromWeb.markdown
|
336
360
|
- docs/EpubCover.adoc
|
337
361
|
- docs/EpubOpen.markdown
|
@@ -448,7 +472,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
448
472
|
- !ruby/object:Gem::Version
|
449
473
|
version: '0'
|
450
474
|
requirements: []
|
451
|
-
rubygems_version: 3.
|
475
|
+
rubygems_version: 3.3.3
|
452
476
|
signing_key:
|
453
477
|
specification_version: 4
|
454
478
|
summary: EPUB 3 Parser
|