epub-parser 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +16 -6
- data/CHANGELOG.adoc +7 -0
- data/README.adoc +7 -12
- data/Rakefile +2 -1
- data/bin/epub-cover +5 -3
- data/bin/epub-open +1 -1
- data/bin/epubinfo +6 -3
- data/docs/Home.adoc +2 -2
- data/lib/epub/parser.rb +14 -14
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/parser/xml_document/refinements/rexml.rb +8 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0d2b5eaf4a6fef5b3d223872db25abb9a087c3ba4e3aabc8dae4f5e854313e42
|
|
4
|
+
data.tar.gz: 94f742b2babd8fb916ff2ac38af7e6989fdb7ffad4da0be72ffefbb0015464d4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6f79e57d8876ddaad3a43ac633e09e313926272e9d028b33075decb2ca6180c22ca16698d311be958834f83c01c67ce976e53e34a63ef05fa6f326e47a994222
|
|
7
|
+
data.tar.gz: f21296e38086c227e4cb2423e3961e80099404b43a9a13327ddd350e1642152ff19bb08f23d4b21b4bc4873d2fb0bc9ed20cd8febfbbcb6f51e4c4cafb5a51dd
|
data/.gitlab-ci.yml
CHANGED
|
@@ -7,7 +7,7 @@ before_script:
|
|
|
7
7
|
|
|
8
8
|
test:2.3:
|
|
9
9
|
image: ruby:2.3
|
|
10
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
|
10
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
|
11
11
|
except:
|
|
12
12
|
- tags
|
|
13
13
|
cache:
|
|
@@ -17,7 +17,7 @@ test:2.3:
|
|
|
17
17
|
|
|
18
18
|
test:2.4:
|
|
19
19
|
image: ruby:2.4
|
|
20
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
|
20
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
|
21
21
|
except:
|
|
22
22
|
- tags
|
|
23
23
|
cache:
|
|
@@ -27,7 +27,7 @@ test:2.4:
|
|
|
27
27
|
|
|
28
28
|
test:2.5:
|
|
29
29
|
image: ruby:2.5
|
|
30
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
|
30
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
|
31
31
|
except:
|
|
32
32
|
- tags
|
|
33
33
|
artifacts:
|
|
@@ -40,7 +40,7 @@ test:2.5:
|
|
|
40
40
|
|
|
41
41
|
test:2.6:
|
|
42
42
|
image: ruby:2.6
|
|
43
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
|
43
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
|
44
44
|
except:
|
|
45
45
|
- tags
|
|
46
46
|
cache:
|
|
@@ -48,9 +48,19 @@ test:2.6:
|
|
|
48
48
|
paths:
|
|
49
49
|
- deps
|
|
50
50
|
|
|
51
|
+
test:2.7:
|
|
52
|
+
image: ruby:2.7
|
|
53
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
|
54
|
+
except:
|
|
55
|
+
- tags
|
|
56
|
+
cache:
|
|
57
|
+
key: ruby:2.7
|
|
58
|
+
paths:
|
|
59
|
+
- deps
|
|
60
|
+
|
|
51
61
|
pages:
|
|
52
62
|
stage: deploy
|
|
53
|
-
image: ruby:2.
|
|
63
|
+
image: ruby:2.7
|
|
54
64
|
dependencies:
|
|
55
65
|
- test:2.6
|
|
56
66
|
script:
|
|
@@ -63,6 +73,6 @@ pages:
|
|
|
63
73
|
only:
|
|
64
74
|
- master
|
|
65
75
|
cache:
|
|
66
|
-
key: ruby:2.
|
|
76
|
+
key: ruby:2.7
|
|
67
77
|
paths:
|
|
68
78
|
- deps
|
data/CHANGELOG.adoc
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
= CHANGELOG
|
|
2
2
|
|
|
3
|
+
== 0.4.2
|
|
4
|
+
|
|
5
|
+
* [BUG FIX]Fix a bug that epub-cover refers out of scope variable
|
|
6
|
+
|
|
7
|
+
* [BUG FIX]Fix a bug that `epubinfo` command with `--words` or `--chars` option causes load error if there is not Nokogiri in environment
|
|
8
|
+
* [BUG FIX]Fix a bug that REXML::Element#content ignores descendant nodes
|
|
9
|
+
|
|
3
10
|
== 0.4.1
|
|
4
11
|
|
|
5
12
|
* Add Oga backend for XML document
|
data/README.adoc
CHANGED
|
@@ -174,6 +174,13 @@ If you find other gems, please tell me or request a pull request.
|
|
|
174
174
|
|
|
175
175
|
== RECENT CHANGES
|
|
176
176
|
|
|
177
|
+
=== 0.4.2
|
|
178
|
+
|
|
179
|
+
* [BUG FIX]Fix a bug that epub-cover refers out of scope variable
|
|
180
|
+
|
|
181
|
+
* [BUG FIX]Fix a bug that `epubinfo` command with `--words` or `--chars` option causes load error if there is not Nokogiri in environment
|
|
182
|
+
* [BUG FIX]Fix a bug that REXML::Element#content ignores descendant nodes
|
|
183
|
+
|
|
177
184
|
=== 0.4.1
|
|
178
185
|
|
|
179
186
|
* Add Oga backend for XML document
|
|
@@ -182,18 +189,6 @@ If you find other gems, please tell me or request a pull request.
|
|
|
182
189
|
|
|
183
190
|
* [BUG FIX]Make epub:type a Set
|
|
184
191
|
|
|
185
|
-
=== 0.3.9
|
|
186
|
-
|
|
187
|
-
* [BUG FIX]Set {EPUB::Metadata::DCMES#lang} properly from xml:lang attribute
|
|
188
|
-
* Change default XML backend from REXML to Nokogiri
|
|
189
|
-
|
|
190
|
-
=== 0.3.8
|
|
191
|
-
|
|
192
|
-
* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
|
|
193
|
-
* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
|
|
194
|
-
* Make XML library switchable between REXML and Nokogiri
|
|
195
|
-
* Make REXML a default XML backend
|
|
196
|
-
|
|
197
192
|
See {file:CHANGELOG.adoc} for older changelogs and details.
|
|
198
193
|
|
|
199
194
|
== TODOS
|
data/Rakefile
CHANGED
|
@@ -3,7 +3,6 @@ require 'rake/testtask'
|
|
|
3
3
|
require 'rubygems/tasks'
|
|
4
4
|
require 'yard'
|
|
5
5
|
require 'rdoc/task'
|
|
6
|
-
require 'epub/parser/version'
|
|
7
6
|
require 'archive/zip'
|
|
8
7
|
require 'epub/maker'
|
|
9
8
|
require "tmpdir"
|
|
@@ -19,6 +18,8 @@ namespace :test do
|
|
|
19
18
|
|
|
20
19
|
file "test/fixtures/book.epub" => "test/fixtures/book" do |task|
|
|
21
20
|
EPUB::Maker.archive task.source
|
|
21
|
+
# We cannot include "CASE-SENSITIVE.xhtml" in Git repository because
|
|
22
|
+
# macOS remove it or case-sensitive.xhtml from file system.
|
|
22
23
|
small_file = File.read("#{task.source}/OPS/case-sensitive.xhtml")
|
|
23
24
|
Dir.mktmpdir do |dir|
|
|
24
25
|
upcase_file_path = File.join(dir, "CASE-SENSITIVE.xhtml")
|
data/bin/epub-cover
CHANGED
|
@@ -32,7 +32,7 @@ EOB
|
|
|
32
32
|
end
|
|
33
33
|
error "output not a directory" if options["output"] && !File.directory?(options["output"])
|
|
34
34
|
cover_image = EPUB::Parser.parse(path).cover_image
|
|
35
|
-
error "cover image not found" unless cover_image
|
|
35
|
+
error "cover image not found", option_parser.program_name, option_parser.help unless cover_image
|
|
36
36
|
path = File.basename(cover_image.href.to_s)
|
|
37
37
|
path = File.join(options["output"], path) if options["output"]
|
|
38
38
|
File.write path, cover_image.read
|
|
@@ -41,10 +41,12 @@ EOB
|
|
|
41
41
|
$stderr.puts ""
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
def error(message)
|
|
44
|
+
def error(message, program_name, help)
|
|
45
45
|
$stderr.puts "Error: #{message}"
|
|
46
46
|
$stderr.puts ""
|
|
47
|
-
$stderr.puts
|
|
47
|
+
$stderr.puts program_name
|
|
48
|
+
$stderr.puts "=" * program_name.length
|
|
49
|
+
$stderr.puts help
|
|
48
50
|
abort
|
|
49
51
|
end
|
|
50
52
|
|
data/bin/epub-open
CHANGED
data/bin/epubinfo
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
require 'optparse'
|
|
2
2
|
require 'epub/parser'
|
|
3
3
|
|
|
4
|
+
using EPUB::Parser::XMLDocument::Refinements
|
|
5
|
+
|
|
4
6
|
options = {:format => :line}
|
|
5
7
|
opt = OptionParser.new do |opt|
|
|
6
8
|
opt.banner = <<EOB
|
|
@@ -51,12 +53,13 @@ end
|
|
|
51
53
|
|
|
52
54
|
counts = {:chars => 0, :words => 0}
|
|
53
55
|
if options[:words] or options[:chars]
|
|
56
|
+
namespaces = {"xhtml" => "http://www.w3.org/1999/xhtml"}
|
|
54
57
|
book.resources.select(&:xhtml?).each do |xhtml|
|
|
55
58
|
begin
|
|
56
|
-
doc = xhtml.
|
|
57
|
-
body = doc.
|
|
58
|
-
content = body.content
|
|
59
|
+
doc = EPUB::Parser::XMLDocument.new(xhtml.read)
|
|
60
|
+
body = doc.each_element_by_xpath('//xhtml:body', namespaces).first
|
|
59
61
|
if body
|
|
62
|
+
content = body.content
|
|
60
63
|
counts[:words] += content.scan(/\S+/).length
|
|
61
64
|
counts[:chars] += content.gsub(/\r|\n/, '').length
|
|
62
65
|
end
|
data/docs/Home.adoc
CHANGED
|
@@ -89,7 +89,7 @@ end
|
|
|
89
89
|
|
|
90
90
|
book = EPUB::Parser.parse(
|
|
91
91
|
'uploaded-book.epub',
|
|
92
|
-
:
|
|
92
|
+
class: YourBook # *************** pass YourBook class
|
|
93
93
|
)
|
|
94
94
|
book.instance_of? YourBook # => true
|
|
95
95
|
book.required = 'value for required field'
|
|
@@ -110,7 +110,7 @@ You are also able to find YourBook object for the first:
|
|
|
110
110
|
book = YourBook.find params[:id]
|
|
111
111
|
ret = EPUB::Parser.parse(
|
|
112
112
|
'uploaded-book.epub',
|
|
113
|
-
:
|
|
113
|
+
book: book # ******************* pass your book instance
|
|
114
114
|
) # => book
|
|
115
115
|
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
|
116
116
|
# do something with your book
|
data/lib/epub/parser.rb
CHANGED
|
@@ -12,14 +12,14 @@ module EPUB
|
|
|
12
12
|
#
|
|
13
13
|
# @example
|
|
14
14
|
# class MyBook
|
|
15
|
-
# include EPUB
|
|
15
|
+
# include EPUB::Book::Feature
|
|
16
16
|
# end
|
|
17
17
|
# book = MyBook.new
|
|
18
|
-
# parsed_book = EPUB::Parser.parse('path/to/book.epub', :
|
|
18
|
+
# parsed_book = EPUB::Parser.parse('path/to/book.epub', book: book) # => #<MyBook:0x000000019760e8 @epub_file=..>
|
|
19
19
|
# parsed_book.equal? book # => true
|
|
20
20
|
#
|
|
21
21
|
# @example
|
|
22
|
-
# book = EPUB::Parser.parse('path/to/book.epub', :
|
|
22
|
+
# book = EPUB::Parser.parse('path/to/book.epub', class: MyBook) # => #<MyBook:0x000000019b0568 @epub_file=...>
|
|
23
23
|
# book.instance_of? MyBook # => true
|
|
24
24
|
#
|
|
25
25
|
# @param [String] filepath
|
|
@@ -35,21 +35,21 @@ module EPUB
|
|
|
35
35
|
# When option :book passed, returns the same object whose attributes about EPUB are set.
|
|
36
36
|
# When option :class passed, returns the instance of the class.
|
|
37
37
|
# Otherwise returns {EPUB::Book} object.
|
|
38
|
-
def parse(filepath, **options)
|
|
39
|
-
new(filepath, options).parse
|
|
38
|
+
def parse(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
|
|
39
|
+
new(filepath, container_adapter: container_adapter, book: book, initialize_with: initialize_with, **options).parse
|
|
40
40
|
end
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
-
def initialize(filepath, **options)
|
|
44
|
-
path_is_uri = (
|
|
45
|
-
|
|
43
|
+
def initialize(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
|
|
44
|
+
path_is_uri = (container_adapter == EPUB::OCF::PhysicalContainer::UnpackedURI or
|
|
45
|
+
container_adapter == :UnpackedURI or
|
|
46
46
|
EPUB::OCF::PhysicalContainer.adapter == EPUB::OCF::PhysicalContainer::UnpackedURI)
|
|
47
47
|
|
|
48
48
|
raise "File #{filepath} not found" if
|
|
49
49
|
!path_is_uri and !File.exist?(filepath)
|
|
50
50
|
|
|
51
51
|
@filepath = path_is_uri ? filepath : File.realpath(filepath)
|
|
52
|
-
@book = create_book(options)
|
|
52
|
+
@book = create_book(book: book, initialize_with: initialize_with, **options)
|
|
53
53
|
if path_is_uri
|
|
54
54
|
@book.container_adapter = :UnpackedURI
|
|
55
55
|
elsif File.directory? @filepath
|
|
@@ -77,13 +77,13 @@ module EPUB
|
|
|
77
77
|
|
|
78
78
|
private
|
|
79
79
|
|
|
80
|
-
def create_book(params)
|
|
80
|
+
def create_book(book: nil, initialize_with: nil, **params)
|
|
81
81
|
case
|
|
82
|
-
when
|
|
83
|
-
|
|
82
|
+
when book
|
|
83
|
+
book
|
|
84
84
|
when params[:class]
|
|
85
|
-
if
|
|
86
|
-
params[:class].new
|
|
85
|
+
if initialize_with
|
|
86
|
+
params[:class].new initialize_with
|
|
87
87
|
else
|
|
88
88
|
params[:class].new
|
|
89
89
|
end
|
data/lib/epub/parser/version.rb
CHANGED
|
@@ -27,7 +27,14 @@ module EPUB
|
|
|
27
27
|
alias namespace_uri namespace
|
|
28
28
|
|
|
29
29
|
def content
|
|
30
|
-
|
|
30
|
+
each_child.inject("") {|text, node|
|
|
31
|
+
case node.node_type
|
|
32
|
+
when :document, :element
|
|
33
|
+
text << node.content
|
|
34
|
+
when :text
|
|
35
|
+
text << node.value
|
|
36
|
+
end
|
|
37
|
+
}
|
|
31
38
|
end
|
|
32
39
|
end
|
|
33
40
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: epub-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- KITAITI Makoto
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-01-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -447,7 +447,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
447
447
|
- !ruby/object:Gem::Version
|
|
448
448
|
version: '0'
|
|
449
449
|
requirements: []
|
|
450
|
-
rubygems_version: 3.
|
|
450
|
+
rubygems_version: 3.1.2
|
|
451
451
|
signing_key:
|
|
452
452
|
specification_version: 4
|
|
453
453
|
summary: EPUB 3 Parser
|