epub-parser 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +16 -6
- data/CHANGELOG.adoc +7 -0
- data/README.adoc +7 -12
- data/Rakefile +2 -1
- data/bin/epub-cover +5 -3
- data/bin/epub-open +1 -1
- data/bin/epubinfo +6 -3
- data/docs/Home.adoc +2 -2
- data/lib/epub/parser.rb +14 -14
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/parser/xml_document/refinements/rexml.rb +8 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d2b5eaf4a6fef5b3d223872db25abb9a087c3ba4e3aabc8dae4f5e854313e42
|
4
|
+
data.tar.gz: 94f742b2babd8fb916ff2ac38af7e6989fdb7ffad4da0be72ffefbb0015464d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f79e57d8876ddaad3a43ac633e09e313926272e9d028b33075decb2ca6180c22ca16698d311be958834f83c01c67ce976e53e34a63ef05fa6f326e47a994222
|
7
|
+
data.tar.gz: f21296e38086c227e4cb2423e3961e80099404b43a9a13327ddd350e1642152ff19bb08f23d4b21b4bc4873d2fb0bc9ed20cd8febfbbcb6f51e4c4cafb5a51dd
|
data/.gitlab-ci.yml
CHANGED
@@ -7,7 +7,7 @@ before_script:
|
|
7
7
|
|
8
8
|
test:2.3:
|
9
9
|
image: ruby:2.3
|
10
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
10
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
11
11
|
except:
|
12
12
|
- tags
|
13
13
|
cache:
|
@@ -17,7 +17,7 @@ test:2.3:
|
|
17
17
|
|
18
18
|
test:2.4:
|
19
19
|
image: ruby:2.4
|
20
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
20
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
21
21
|
except:
|
22
22
|
- tags
|
23
23
|
cache:
|
@@ -27,7 +27,7 @@ test:2.4:
|
|
27
27
|
|
28
28
|
test:2.5:
|
29
29
|
image: ruby:2.5
|
30
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
30
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
31
31
|
except:
|
32
32
|
- tags
|
33
33
|
artifacts:
|
@@ -40,7 +40,7 @@ test:2.5:
|
|
40
40
|
|
41
41
|
test:2.6:
|
42
42
|
image: ruby:2.6
|
43
|
-
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri
|
43
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
44
44
|
except:
|
45
45
|
- tags
|
46
46
|
cache:
|
@@ -48,9 +48,19 @@ test:2.6:
|
|
48
48
|
paths:
|
49
49
|
- deps
|
50
50
|
|
51
|
+
test:2.7:
|
52
|
+
image: ruby:2.7
|
53
|
+
script: bundle exec rake test XML_BACKEND=REXML 2>/dev/null && bundle exec rake test XML_BACKEND=Oga 2>/dev/null && bundle exec rake test XML_BACKEND=Nokogiri 2>/dev/null
|
54
|
+
except:
|
55
|
+
- tags
|
56
|
+
cache:
|
57
|
+
key: ruby:2.7
|
58
|
+
paths:
|
59
|
+
- deps
|
60
|
+
|
51
61
|
pages:
|
52
62
|
stage: deploy
|
53
|
-
image: ruby:2.
|
63
|
+
image: ruby:2.7
|
54
64
|
dependencies:
|
55
65
|
- test:2.6
|
56
66
|
script:
|
@@ -63,6 +73,6 @@ pages:
|
|
63
73
|
only:
|
64
74
|
- master
|
65
75
|
cache:
|
66
|
-
key: ruby:2.
|
76
|
+
key: ruby:2.7
|
67
77
|
paths:
|
68
78
|
- deps
|
data/CHANGELOG.adoc
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
= CHANGELOG
|
2
2
|
|
3
|
+
== 0.4.2
|
4
|
+
|
5
|
+
* [BUG FIX]Fix a bug that epub-cover refers out of scope variable
|
6
|
+
|
7
|
+
* [BUG FIX]Fix a bug that `epubinfo` command with `--words` or `--chars` option causes load error if there is not Nokogiri in environment
|
8
|
+
* [BUG FIX]Fix a bug that REXML::Element#content ignores descendant nodes
|
9
|
+
|
3
10
|
== 0.4.1
|
4
11
|
|
5
12
|
* Add Oga backend for XML document
|
data/README.adoc
CHANGED
@@ -174,6 +174,13 @@ If you find other gems, please tell me or request a pull request.
|
|
174
174
|
|
175
175
|
== RECENT CHANGES
|
176
176
|
|
177
|
+
=== 0.4.2
|
178
|
+
|
179
|
+
* [BUG FIX]Fix a bug that epub-cover refers out of scope variable
|
180
|
+
|
181
|
+
* [BUG FIX]Fix a bug that `epubinfo` command with `--words` or `--chars` option causes load error if there is not Nokogiri in environment
|
182
|
+
* [BUG FIX]Fix a bug that REXML::Element#content ignores descendant nodes
|
183
|
+
|
177
184
|
=== 0.4.1
|
178
185
|
|
179
186
|
* Add Oga backend for XML document
|
@@ -182,18 +189,6 @@ If you find other gems, please tell me or request a pull request.
|
|
182
189
|
|
183
190
|
* [BUG FIX]Make epub:type a Set
|
184
191
|
|
185
|
-
=== 0.3.9
|
186
|
-
|
187
|
-
* [BUG FIX]Set {EPUB::Metadata::DCMES#lang} properly from xml:lang attribute
|
188
|
-
* Change default XML backend from REXML to Nokogiri
|
189
|
-
|
190
|
-
=== 0.3.8
|
191
|
-
|
192
|
-
* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
|
193
|
-
* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
|
194
|
-
* Make XML library switchable between REXML and Nokogiri
|
195
|
-
* Make REXML a default XML backend
|
196
|
-
|
197
192
|
See {file:CHANGELOG.adoc} for older changelogs and details.
|
198
193
|
|
199
194
|
== TODOS
|
data/Rakefile
CHANGED
@@ -3,7 +3,6 @@ require 'rake/testtask'
|
|
3
3
|
require 'rubygems/tasks'
|
4
4
|
require 'yard'
|
5
5
|
require 'rdoc/task'
|
6
|
-
require 'epub/parser/version'
|
7
6
|
require 'archive/zip'
|
8
7
|
require 'epub/maker'
|
9
8
|
require "tmpdir"
|
@@ -19,6 +18,8 @@ namespace :test do
|
|
19
18
|
|
20
19
|
file "test/fixtures/book.epub" => "test/fixtures/book" do |task|
|
21
20
|
EPUB::Maker.archive task.source
|
21
|
+
# We cannot include "CASE-SENSITIVE.xhtml" in Git repository because
|
22
|
+
# macOS remove it or case-sensitive.xhtml from file system.
|
22
23
|
small_file = File.read("#{task.source}/OPS/case-sensitive.xhtml")
|
23
24
|
Dir.mktmpdir do |dir|
|
24
25
|
upcase_file_path = File.join(dir, "CASE-SENSITIVE.xhtml")
|
data/bin/epub-cover
CHANGED
@@ -32,7 +32,7 @@ EOB
|
|
32
32
|
end
|
33
33
|
error "output not a directory" if options["output"] && !File.directory?(options["output"])
|
34
34
|
cover_image = EPUB::Parser.parse(path).cover_image
|
35
|
-
error "cover image not found" unless cover_image
|
35
|
+
error "cover image not found", option_parser.program_name, option_parser.help unless cover_image
|
36
36
|
path = File.basename(cover_image.href.to_s)
|
37
37
|
path = File.join(options["output"], path) if options["output"]
|
38
38
|
File.write path, cover_image.read
|
@@ -41,10 +41,12 @@ EOB
|
|
41
41
|
$stderr.puts ""
|
42
42
|
end
|
43
43
|
|
44
|
-
def error(message)
|
44
|
+
def error(message, program_name, help)
|
45
45
|
$stderr.puts "Error: #{message}"
|
46
46
|
$stderr.puts ""
|
47
|
-
$stderr.puts
|
47
|
+
$stderr.puts program_name
|
48
|
+
$stderr.puts "=" * program_name.length
|
49
|
+
$stderr.puts help
|
48
50
|
abort
|
49
51
|
end
|
50
52
|
|
data/bin/epub-open
CHANGED
data/bin/epubinfo
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'optparse'
|
2
2
|
require 'epub/parser'
|
3
3
|
|
4
|
+
using EPUB::Parser::XMLDocument::Refinements
|
5
|
+
|
4
6
|
options = {:format => :line}
|
5
7
|
opt = OptionParser.new do |opt|
|
6
8
|
opt.banner = <<EOB
|
@@ -51,12 +53,13 @@ end
|
|
51
53
|
|
52
54
|
counts = {:chars => 0, :words => 0}
|
53
55
|
if options[:words] or options[:chars]
|
56
|
+
namespaces = {"xhtml" => "http://www.w3.org/1999/xhtml"}
|
54
57
|
book.resources.select(&:xhtml?).each do |xhtml|
|
55
58
|
begin
|
56
|
-
doc = xhtml.
|
57
|
-
body = doc.
|
58
|
-
content = body.content
|
59
|
+
doc = EPUB::Parser::XMLDocument.new(xhtml.read)
|
60
|
+
body = doc.each_element_by_xpath('//xhtml:body', namespaces).first
|
59
61
|
if body
|
62
|
+
content = body.content
|
60
63
|
counts[:words] += content.scan(/\S+/).length
|
61
64
|
counts[:chars] += content.gsub(/\r|\n/, '').length
|
62
65
|
end
|
data/docs/Home.adoc
CHANGED
@@ -89,7 +89,7 @@ end
|
|
89
89
|
|
90
90
|
book = EPUB::Parser.parse(
|
91
91
|
'uploaded-book.epub',
|
92
|
-
:
|
92
|
+
class: YourBook # *************** pass YourBook class
|
93
93
|
)
|
94
94
|
book.instance_of? YourBook # => true
|
95
95
|
book.required = 'value for required field'
|
@@ -110,7 +110,7 @@ You are also able to find YourBook object for the first:
|
|
110
110
|
book = YourBook.find params[:id]
|
111
111
|
ret = EPUB::Parser.parse(
|
112
112
|
'uploaded-book.epub',
|
113
|
-
:
|
113
|
+
book: book # ******************* pass your book instance
|
114
114
|
) # => book
|
115
115
|
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
116
116
|
# do something with your book
|
data/lib/epub/parser.rb
CHANGED
@@ -12,14 +12,14 @@ module EPUB
|
|
12
12
|
#
|
13
13
|
# @example
|
14
14
|
# class MyBook
|
15
|
-
# include EPUB
|
15
|
+
# include EPUB::Book::Feature
|
16
16
|
# end
|
17
17
|
# book = MyBook.new
|
18
|
-
# parsed_book = EPUB::Parser.parse('path/to/book.epub', :
|
18
|
+
# parsed_book = EPUB::Parser.parse('path/to/book.epub', book: book) # => #<MyBook:0x000000019760e8 @epub_file=..>
|
19
19
|
# parsed_book.equal? book # => true
|
20
20
|
#
|
21
21
|
# @example
|
22
|
-
# book = EPUB::Parser.parse('path/to/book.epub', :
|
22
|
+
# book = EPUB::Parser.parse('path/to/book.epub', class: MyBook) # => #<MyBook:0x000000019b0568 @epub_file=...>
|
23
23
|
# book.instance_of? MyBook # => true
|
24
24
|
#
|
25
25
|
# @param [String] filepath
|
@@ -35,21 +35,21 @@ module EPUB
|
|
35
35
|
# When option :book passed, returns the same object whose attributes about EPUB are set.
|
36
36
|
# When option :class passed, returns the instance of the class.
|
37
37
|
# Otherwise returns {EPUB::Book} object.
|
38
|
-
def parse(filepath, **options)
|
39
|
-
new(filepath, options).parse
|
38
|
+
def parse(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
|
39
|
+
new(filepath, container_adapter: container_adapter, book: book, initialize_with: initialize_with, **options).parse
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
def initialize(filepath, **options)
|
44
|
-
path_is_uri = (
|
45
|
-
|
43
|
+
def initialize(filepath, container_adapter: nil, book: nil, initialize_with: nil, **options)
|
44
|
+
path_is_uri = (container_adapter == EPUB::OCF::PhysicalContainer::UnpackedURI or
|
45
|
+
container_adapter == :UnpackedURI or
|
46
46
|
EPUB::OCF::PhysicalContainer.adapter == EPUB::OCF::PhysicalContainer::UnpackedURI)
|
47
47
|
|
48
48
|
raise "File #{filepath} not found" if
|
49
49
|
!path_is_uri and !File.exist?(filepath)
|
50
50
|
|
51
51
|
@filepath = path_is_uri ? filepath : File.realpath(filepath)
|
52
|
-
@book = create_book(options)
|
52
|
+
@book = create_book(book: book, initialize_with: initialize_with, **options)
|
53
53
|
if path_is_uri
|
54
54
|
@book.container_adapter = :UnpackedURI
|
55
55
|
elsif File.directory? @filepath
|
@@ -77,13 +77,13 @@ module EPUB
|
|
77
77
|
|
78
78
|
private
|
79
79
|
|
80
|
-
def create_book(params)
|
80
|
+
def create_book(book: nil, initialize_with: nil, **params)
|
81
81
|
case
|
82
|
-
when
|
83
|
-
|
82
|
+
when book
|
83
|
+
book
|
84
84
|
when params[:class]
|
85
|
-
if
|
86
|
-
params[:class].new
|
85
|
+
if initialize_with
|
86
|
+
params[:class].new initialize_with
|
87
87
|
else
|
88
88
|
params[:class].new
|
89
89
|
end
|
data/lib/epub/parser/version.rb
CHANGED
@@ -27,7 +27,14 @@ module EPUB
|
|
27
27
|
alias namespace_uri namespace
|
28
28
|
|
29
29
|
def content
|
30
|
-
|
30
|
+
each_child.inject("") {|text, node|
|
31
|
+
case node.node_type
|
32
|
+
when :document, :element
|
33
|
+
text << node.content
|
34
|
+
when :text
|
35
|
+
text << node.value
|
36
|
+
end
|
37
|
+
}
|
31
38
|
end
|
32
39
|
end
|
33
40
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -447,7 +447,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
447
447
|
- !ruby/object:Gem::Version
|
448
448
|
version: '0'
|
449
449
|
requirements: []
|
450
|
-
rubygems_version: 3.
|
450
|
+
rubygems_version: 3.1.2
|
451
451
|
signing_key:
|
452
452
|
specification_version: 4
|
453
453
|
summary: EPUB 3 Parser
|