epub-parser 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +10 -0
- data/CHANGELOG.adoc +10 -1
- data/README.adoc +10 -1
- data/bin/epubinfo +13 -7
- data/bin/epubtotext +39 -0
- data/docs/Home.adoc +1 -1
- data/epub-parser.gemspec +3 -1
- data/lib/epub/ocf/physical_container/archive_zip.rb +1 -1
- data/lib/epub/ocf/physical_container.rb +1 -1
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/searcher/publication.rb +1 -1
- metadata +35 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2373a741e554fee584d9b3562e655a33e34420c6dd335ffc32f041da5507c1d5
|
4
|
+
data.tar.gz: 2224f8bf83deed854f8181f791bf0e5e58b0feaa40e39a5d94b754acef6f841c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c9eeed9058f10661e27e42ad1366d831ebdebe06879b80c8e44114d0ea74de8f2b15e2909d9602a7deacc351ef1074cf37c5a2e7f4592a141ef2b336474efeb
|
7
|
+
data.tar.gz: c2b8c9f85ed82c2720816b80d393884be55c258084b89723adc84cdf490d6e721926df931ff660bb0d06234650018f09f8ef85604cacae5f4b0575c6c691168f
|
data/.gitlab-ci.yml
CHANGED
@@ -58,6 +58,16 @@ test:2.7:
|
|
58
58
|
paths:
|
59
59
|
- deps
|
60
60
|
|
61
|
+
test:3.0:
|
62
|
+
image: ruby:3.0-rc
|
63
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
64
|
+
except:
|
65
|
+
- tags
|
66
|
+
cache:
|
67
|
+
key: ruby:3.0
|
68
|
+
paths:
|
69
|
+
- deps
|
70
|
+
|
61
71
|
pages:
|
62
72
|
stage: deploy
|
63
73
|
image: ruby:2.7
|
data/CHANGELOG.adoc
CHANGED
@@ -1,8 +1,17 @@
|
|
1
1
|
= CHANGELOG
|
2
2
|
|
3
|
+
== 0.4.7
|
4
|
+
|
5
|
+
* Remove version specification from Nokogiri to migrate to Ruby 3.1
|
6
|
+
|
7
|
+
== 0.4.6
|
8
|
+
|
9
|
+
* [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
|
10
|
+
* Tiny modifcation on Zip archive manipulation
|
11
|
+
|
3
12
|
== 0.4.5
|
4
13
|
|
5
|
-
[BUG FIX]Handle the case EPUB path is a Pathname
|
14
|
+
* [BUG FIX]Handle the case EPUB path is a Pathname
|
6
15
|
|
7
16
|
== 0.4.4
|
8
17
|
|
data/README.adoc
CHANGED
@@ -176,9 +176,18 @@ If you find other gems, please tell me or request a pull request.
|
|
176
176
|
|
177
177
|
== RECENT CHANGES
|
178
178
|
|
179
|
+
=== 0.4.7
|
180
|
+
|
181
|
+
* Remove version specification from Nokogiri to migrate to Ruby 3.1
|
182
|
+
|
183
|
+
=== 0.4.6
|
184
|
+
|
185
|
+
* [BUG FIX]Prevent epubinfo tool raise exception when no nav elements
|
186
|
+
* Tiny modifcation on Zip archive manipulation
|
187
|
+
|
179
188
|
=== 0.4.5
|
180
189
|
|
181
|
-
[BUG FIX]Handle the case EPUB path is a Pathname
|
190
|
+
* [BUG FIX]Handle the case EPUB path is a Pathname
|
182
191
|
|
183
192
|
=== 0.4.4
|
184
193
|
|
data/bin/epubinfo
CHANGED
@@ -65,7 +65,8 @@ data.merge!(book.metadata.to_h)
|
|
65
65
|
data['modified'] = book.modified
|
66
66
|
data['unique identifier'] = book.metadata.unique_identifier
|
67
67
|
data['epub version'] = book.package.version
|
68
|
-
|
68
|
+
nav = book.manifest.navs.first
|
69
|
+
data["navigations"] = nav ? nav.book.manifest.navs.first&.content_document&.navigations&.collect(&:type)&.join(", ") : []
|
69
70
|
|
70
71
|
data.each_pair do |(key, value)|
|
71
72
|
data[key] = value.respond_to?(:join) ? value.join(", ") : value.to_s
|
@@ -99,12 +100,17 @@ if options[:format] == :line
|
|
99
100
|
if options[key]
|
100
101
|
puts ""
|
101
102
|
puts "=== #{name} ==="
|
102
|
-
|
103
|
-
if
|
104
|
-
nav.
|
105
|
-
|
106
|
-
|
107
|
-
|
103
|
+
nav_item = book.manifest.nav
|
104
|
+
if nav_item
|
105
|
+
nav = nav_item.content_document.navigations.find {|nav| nav.type == key.to_s.sub("-", "_")}
|
106
|
+
if nav
|
107
|
+
nav.traverse do |item, depth|
|
108
|
+
text = item.text || "(No heading)"
|
109
|
+
text += "(#{item.types.sort.join(', ')})" unless item.types.empty?
|
110
|
+
puts "#{' ' * depth}#{text}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
puts "(No #{name})"
|
108
114
|
end
|
109
115
|
else
|
110
116
|
puts "(No #{name})"
|
data/bin/epubtotext
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require "optparse"
|
2
|
+
require "epub/parser"
|
3
|
+
|
4
|
+
using EPUB::Parser::XMLDocument::Refinements
|
5
|
+
|
6
|
+
def main(argv)
|
7
|
+
option_parser = OptionParser.new {|opt|
|
8
|
+
opt.version = EPUB::Parser::VERSION
|
9
|
+
opt.banner = <<EOB
|
10
|
+
Extracts text from EPUB and output
|
11
|
+
|
12
|
+
Usage: #{opt.program_name} EPUBFILE
|
13
|
+
EOB
|
14
|
+
}
|
15
|
+
options = option_parser.getopts(argv)
|
16
|
+
path = argv.shift
|
17
|
+
raise "Specify EPUBFILE" unless path
|
18
|
+
EPUB::Parser.parse(path).each_page_on_spine do |page|
|
19
|
+
unless page.xhtml?
|
20
|
+
$stderr.puts "Cannot parse non-XHTML document(#{page.media_type}): #{page.entry_name}"
|
21
|
+
next
|
22
|
+
end
|
23
|
+
doc = EPUB::Parser::XMLDocument.new(page.read)
|
24
|
+
body = doc.each_element_by_xpath("//xhtml:body", EPUB::NAMESPACES).first
|
25
|
+
unless body
|
26
|
+
$stderr.puts "body element doesn't exist in #{page.entry_name}"
|
27
|
+
next
|
28
|
+
end
|
29
|
+
puts body.content
|
30
|
+
# handle spaces
|
31
|
+
# handle img@alt
|
32
|
+
end
|
33
|
+
rescue => err
|
34
|
+
$stderr.puts "Error: #{err}"
|
35
|
+
$stderr.puts
|
36
|
+
abort option_parser.help
|
37
|
+
end
|
38
|
+
|
39
|
+
main(ARGV)
|
data/docs/Home.adoc
CHANGED
@@ -118,7 +118,7 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
|
|
118
118
|
|
119
119
|
==== Switching XML Library
|
120
120
|
|
121
|
-
EPUB Parser tries to load https://
|
121
|
+
EPUB Parser tries to load https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more at first. If Nokogiri is not available, then it tries https://gitlab.com/yorickpeterse/oga[Oga] a fast XML parser. If both are not available, it fallbacks to https://github.com/ruby/rexml/[REXML], a standard-bundled library. You can also specify REXML explicitly:
|
122
122
|
|
123
123
|
----
|
124
124
|
EPUB::Parser::XMLDocument.backend = :REXML
|
data/epub-parser.gemspec
CHANGED
@@ -40,10 +40,12 @@ Gem::Specification.new do |s|
|
|
40
40
|
s.add_development_dependency 'pretty_backtrace'
|
41
41
|
s.add_development_dependency 'epub-maker'
|
42
42
|
s.add_development_dependency 'asciidoctor'
|
43
|
-
s.add_development_dependency 'nokogiri'
|
43
|
+
s.add_development_dependency 'nokogiri'
|
44
44
|
s.add_development_dependency 'oga', '>= 2.16'
|
45
|
+
s.add_development_dependency 'packnga'
|
45
46
|
|
46
47
|
s.add_runtime_dependency 'archive-zip'
|
48
|
+
s.add_runtime_dependency 'rexml'
|
47
49
|
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
48
50
|
s.add_runtime_dependency 'rchardet', '>= 1.6.1'
|
49
51
|
s.add_runtime_dependency 'epub-cfi'
|
data/lib/epub/parser/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -226,20 +226,14 @@ dependencies:
|
|
226
226
|
requirements:
|
227
227
|
- - ">="
|
228
228
|
- !ruby/object:Gem::Version
|
229
|
-
version:
|
230
|
-
- - "<"
|
231
|
-
- !ruby/object:Gem::Version
|
232
|
-
version: '1.11'
|
229
|
+
version: '0'
|
233
230
|
type: :development
|
234
231
|
prerelease: false
|
235
232
|
version_requirements: !ruby/object:Gem::Requirement
|
236
233
|
requirements:
|
237
234
|
- - ">="
|
238
235
|
- !ruby/object:Gem::Version
|
239
|
-
version:
|
240
|
-
- - "<"
|
241
|
-
- !ruby/object:Gem::Version
|
242
|
-
version: '1.11'
|
236
|
+
version: '0'
|
243
237
|
- !ruby/object:Gem::Dependency
|
244
238
|
name: oga
|
245
239
|
requirement: !ruby/object:Gem::Requirement
|
@@ -254,6 +248,20 @@ dependencies:
|
|
254
248
|
- - ">="
|
255
249
|
- !ruby/object:Gem::Version
|
256
250
|
version: '2.16'
|
251
|
+
- !ruby/object:Gem::Dependency
|
252
|
+
name: packnga
|
253
|
+
requirement: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - ">="
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '0'
|
258
|
+
type: :development
|
259
|
+
prerelease: false
|
260
|
+
version_requirements: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - ">="
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '0'
|
257
265
|
- !ruby/object:Gem::Dependency
|
258
266
|
name: archive-zip
|
259
267
|
requirement: !ruby/object:Gem::Requirement
|
@@ -268,6 +276,20 @@ dependencies:
|
|
268
276
|
- - ">="
|
269
277
|
- !ruby/object:Gem::Version
|
270
278
|
version: '0'
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: rexml
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - ">="
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '0'
|
286
|
+
type: :runtime
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - ">="
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: '0'
|
271
293
|
- !ruby/object:Gem::Dependency
|
272
294
|
name: addressable
|
273
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -317,6 +339,7 @@ executables:
|
|
317
339
|
- epub-cover
|
318
340
|
- epub-open
|
319
341
|
- epubinfo
|
342
|
+
- epubtotext
|
320
343
|
extensions: []
|
321
344
|
extra_rdoc_files: []
|
322
345
|
files:
|
@@ -332,6 +355,7 @@ files:
|
|
332
355
|
- bin/epub-cover
|
333
356
|
- bin/epub-open
|
334
357
|
- bin/epubinfo
|
358
|
+
- bin/epubtotext
|
335
359
|
- docs/AggregateContentsFromWeb.markdown
|
336
360
|
- docs/EpubCover.adoc
|
337
361
|
- docs/EpubOpen.markdown
|
@@ -448,7 +472,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
448
472
|
- !ruby/object:Gem::Version
|
449
473
|
version: '0'
|
450
474
|
requirements: []
|
451
|
-
rubygems_version: 3.
|
475
|
+
rubygems_version: 3.3.3
|
452
476
|
signing_key:
|
453
477
|
specification_version: 4
|
454
478
|
summary: EPUB 3 Parser
|