epub-parser 0.3.7 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +1 -1
- data/CHANGELOG.adoc +7 -0
- data/README.adoc +9 -11
- data/Rakefile +30 -7
- data/docs/Home.adoc +8 -2
- data/epub-parser.gemspec +2 -2
- data/lib/epub/content_document/xhtml.rb +2 -2
- data/lib/epub/parser.rb +1 -2
- data/lib/epub/parser/content_document.rb +28 -24
- data/lib/epub/parser/metadata.rb +9 -8
- data/lib/epub/parser/ocf.rb +9 -9
- data/lib/epub/parser/publication.rb +24 -25
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/parser/xml_document.rb +77 -0
- data/lib/epub/publication/package.rb +1 -0
- data/lib/epub/publication/package/manifest.rb +2 -4
- data/lib/epub/searcher/publication.rb +17 -8
- data/lib/epub/searcher/xhtml.rb +9 -7
- data/test/helper.rb +1 -0
- data/test/test_ocf_physical_container.rb +12 -8
- data/test/test_parser_ocf.rb +2 -7
- data/test/test_parser_publication.rb +1 -9
- data/test/test_searcher.rb +1 -1
- metadata +16 -16
- data/lib/epub/parser/utils.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02fa97ea55de70030b58276b77bfbba26d43f5f99a77c7bffb86aec8b6afaf9e
|
4
|
+
data.tar.gz: c07fe68a8715101082628bb10fee8f3869d3874467aeba8598df89e79a72e442
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c03dcea199c65a84f1c184bbfa07c6d42583167ab26398e3159ec421e2c3205b57e39200d006f22f7f3fbf6dbb7567a7b9424d309123e724dc59ce584ee790e
|
7
|
+
data.tar.gz: a04003670af41618c26ecb7acd1ec0cda0063961b40c3b5fa0c9d6de21e2c7462eaac7205d6a577df69a05056f04f85ec8cf9044262e2d6bd9c24933d2ca4c04
|
data/.gitlab-ci.yml
CHANGED
data/CHANGELOG.adoc
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
= CHANGELOG
|
2
2
|
|
3
|
+
== 0.3.8
|
4
|
+
|
5
|
+
* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
|
6
|
+
* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
|
7
|
+
* Make XML library switchable between REXML and Nokogiri
|
8
|
+
* Make REXML a default XML backend
|
9
|
+
|
3
10
|
== 0.3.7
|
4
11
|
|
5
12
|
* Strip leading and trailing white spaces from identifiers. See http://www.idpf.org/epub/31/spec/epub-packages.html#sec-opf-dcidentifier for details.
|
data/README.adoc
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
= {doctitle}
|
4
4
|
|
5
5
|
image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master", title="pipeline status"]
|
6
|
-
image:https://gemnasium.com/KitaitiMakoto/epub-parser.png[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master",title="Dependency Status"]
|
7
6
|
image:https://badge.fury.io/rb/epub-parser.svg[link="https://gemnasium.com/KitaitiMakoto/epub-parser",title="Gem Version"]
|
8
7
|
image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/coverage.svg[link="https://kitaitimakoto.gitlab.io/epub-parser/coverage/",title="coverage report"]
|
9
8
|
|
@@ -160,8 +159,6 @@ Then documentation will be available in `doc` directory.
|
|
160
159
|
== REQUIREMENTS
|
161
160
|
|
162
161
|
* Ruby 2.3.0 or later
|
163
|
-
* `patch` command to install Nokogiri
|
164
|
-
* C compiler to compile Nokogiri
|
165
162
|
|
166
163
|
== SIMILAR EFFORTS
|
167
164
|
|
@@ -177,6 +174,13 @@ If you find other gems, please tell me or request a pull request.
|
|
177
174
|
|
178
175
|
== RECENT CHANGES
|
179
176
|
|
177
|
+
=== 0.3.8
|
178
|
+
|
179
|
+
* [REFACTORING]Add {EPUB::Parser::NokogiriAttributeWithPrefix} and use `Nokogiri::XML::Node#attribute_with_prefix` instead of `EPUB::Parser::Utils#extract_attribute`
|
180
|
+
* Set default value for detect_encoding argument for {EPUB::Publication::Package::Manifest::Item#read} to false
|
181
|
+
* Make XML library switchable between REXML and Nokogiri
|
182
|
+
* Make REXML a default XML backend
|
183
|
+
|
180
184
|
=== 0.3.7
|
181
185
|
|
182
186
|
* Strip leading and trailing white spaces from identifiers
|
@@ -189,25 +193,18 @@ If you find other gems, please tell me or request a pull request.
|
|
189
193
|
* [BUG FIX]Ignore fragment when find item by relative IRI
|
190
194
|
* Disable https://github.com/ko1/pretty_backtrace[PrettyBacktrace] by default
|
191
195
|
|
192
|
-
=== 0.3.5
|
193
|
-
|
194
|
-
* [BUG FIX]Fix a bug that {EPUB::ContentDocument::Navigation::Item#item} is `nil` when `href` includes double dots(`..`)(Thanks https://gitlab.com/aelkiss[aelkiss]!)
|
195
|
-
|
196
196
|
See {file:CHANGELOG.adoc} for older changelogs and details.
|
197
197
|
|
198
198
|
== TODOS
|
199
199
|
|
200
200
|
* Consider to implement IRI feature instead of to use Addressable
|
201
|
-
* EPUB 3.
|
202
|
-
* EPUB 3.1
|
201
|
+
* EPUB 3.2
|
203
202
|
* Help features for `epub-open` tool
|
204
203
|
* Vocabulary Association Mechanisms
|
205
204
|
* Implementing navigation document and so on
|
206
205
|
* Media Overlays
|
207
206
|
* Content Document
|
208
207
|
* Digital Signature
|
209
|
-
* Using SAX on parsing
|
210
|
-
* Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
|
211
208
|
* Handle with encodings other than UTF-8
|
212
209
|
|
213
210
|
== DONE
|
@@ -221,6 +218,7 @@ See {file:CHANGELOG.adoc} for older changelogs and details.
|
|
221
218
|
* Archive library abstraction
|
222
219
|
* Extracting and organizing common behavior from some classes to modules
|
223
220
|
* Multiple rootfiles
|
221
|
+
* Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
|
224
222
|
|
225
223
|
== LICENSE
|
226
224
|
|
data/Rakefile
CHANGED
@@ -4,7 +4,8 @@ require 'rubygems/tasks'
|
|
4
4
|
require 'yard'
|
5
5
|
require 'rdoc/task'
|
6
6
|
require 'epub/parser/version'
|
7
|
-
require '
|
7
|
+
require 'archive/zip'
|
8
|
+
require 'stringio'
|
8
9
|
require 'epub/maker'
|
9
10
|
|
10
11
|
task :default => :test
|
@@ -21,16 +22,38 @@ namespace :test do
|
|
21
22
|
input_dir = 'test/fixtures/book'
|
22
23
|
EPUB::Maker.archive input_dir
|
23
24
|
small_file = File.read("#{input_dir}/OPS/case-sensitive.xhtml")
|
24
|
-
|
25
|
-
|
25
|
+
File.open "#{input_dir}.epub" do |archive_in|
|
26
|
+
File.open "#{input_dir}.epub.tmp", "w" do |archive_out|
|
27
|
+
Archive::Zip.open archive_in, :r do |z_in|
|
28
|
+
Archive::Zip.open archive_out, :w do |z_out|
|
29
|
+
z_in.each do |entry|
|
30
|
+
z_out << entry
|
31
|
+
end
|
32
|
+
entry = Archive::Zip::Entry::File.new("OPS/CASE-SENSITIVE.xhtml")
|
33
|
+
entry.file_data = StringIO.new(small_file.sub('small file name', 'LARGE FILE NAME'))
|
34
|
+
z_out << entry
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
26
38
|
end
|
39
|
+
File.rename "#{input_dir}.epub.tmp", "#{input_dir}.epub"
|
27
40
|
end
|
28
41
|
|
29
|
-
|
30
|
-
|
31
|
-
task.
|
32
|
-
|
42
|
+
# TODO: Test with both REXML and Nokogiri in testing framework
|
43
|
+
%w[REXML Nokogiri].each do |xml_backend|
|
44
|
+
task "set_xml_backend_#{xml_backend.downcase}" do
|
45
|
+
ENV["EPUB_PARSER_XML_BACKEND"] = xml_backend
|
46
|
+
end
|
47
|
+
|
48
|
+
Rake::TestTask.new "test_with_#{xml_backend.downcase}" do |task|
|
49
|
+
task.test_files = FileList['test/**/test_*.rb']
|
50
|
+
task.warning = true
|
51
|
+
task.options = '--no-show-detail-immediately --verbose'
|
52
|
+
EPUB::Parser::XMLDocument.backend = xml_backend
|
53
|
+
end
|
54
|
+
task "test_with_#{xml_backend.downcase}" => "set_xml_backend_#{xml_backend.downcase}"
|
33
55
|
end
|
56
|
+
task :test => [:test_with_rexml, :test_with_nokogiri]
|
34
57
|
end
|
35
58
|
|
36
59
|
task :doc => 'doc:default'
|
data/docs/Home.adoc
CHANGED
@@ -117,6 +117,14 @@ ret == book # => true; this API is not good I feel... Welcome suggestion!
|
|
117
117
|
# do something with your book
|
118
118
|
----
|
119
119
|
|
120
|
+
==== Switching XML Library
|
121
|
+
|
122
|
+
EPUB Parser uses https://ruby-doc.org/stdlib-2.5.3/libdoc/rexml/rdoc/index.html[REXML], a standard-bundled library, by default. You can use https://www.nokogiri.org/[Nokogiri], a Ruby bindings for http://xmlsoft.org/[Libxml2] and http://xmlsoft.org/XSLT/[Libxslt] and more if you have already installed Nokogiri gem by RubyGems or Bundler.
|
123
|
+
|
124
|
+
----
|
125
|
+
EPUB::Parser::XMLDocument.backend = :Nokogiri
|
126
|
+
----
|
127
|
+
|
120
128
|
==== Switching ZIP library
|
121
129
|
|
122
130
|
EPUB Parser uses https://github.com/javanthropus/archive-zip[Archive::Zip], a pure Ruby ZIP library, by default. You can use https://bitbucket.org/winebarrel/zip-ruby/wiki/Home[Zip/Ruby], a Ruby bindings for https://libzip.org/[libzip] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
|
@@ -197,8 +205,6 @@ Then documentation will be available in `doc` directory.
|
|
197
205
|
== Requirements
|
198
206
|
|
199
207
|
* Ruby 2.2.0 or later
|
200
|
-
* `patch` command to install Nokogiri
|
201
|
-
* C compiler to compile Zip/Ruby and Nokogiri
|
202
208
|
|
203
209
|
== History
|
204
210
|
|
data/epub-parser.gemspec
CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
|
27
27
|
s.add_development_dependency 'rake'
|
28
28
|
s.add_development_dependency 'rubygems-tasks'
|
29
|
-
s.add_development_dependency 'zipruby'
|
29
|
+
s.add_development_dependency 'zipruby' unless RUBY_PLATFORM.match /mingw/
|
30
30
|
s.add_development_dependency 'pry'
|
31
31
|
s.add_development_dependency 'pry-doc'
|
32
32
|
s.add_development_dependency 'test-unit'
|
@@ -39,9 +39,9 @@ Gem::Specification.new do |s|
|
|
39
39
|
s.add_development_dependency 'pretty_backtrace'
|
40
40
|
s.add_development_dependency 'epub-maker'
|
41
41
|
s.add_development_dependency 'asciidoctor'
|
42
|
+
s.add_development_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
|
42
43
|
|
43
44
|
s.add_runtime_dependency 'archive-zip'
|
44
|
-
s.add_runtime_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
|
45
45
|
s.add_runtime_dependency 'addressable', '>= 2.3.5'
|
46
46
|
s.add_runtime_dependency 'rchardet', '>= 1.6.1'
|
47
47
|
s.add_runtime_dependency 'epub-cfi'
|
@@ -18,7 +18,7 @@ module EPUB
|
|
18
18
|
# @return [String] Returns the value of title element.
|
19
19
|
# If none, returns empty string
|
20
20
|
def title
|
21
|
-
title_elem =
|
21
|
+
title_elem = rexml.get_elements('.//title').first
|
22
22
|
if title_elem
|
23
23
|
title_elem.text
|
24
24
|
else
|
@@ -29,12 +29,12 @@ module EPUB
|
|
29
29
|
|
30
30
|
# @return [REXML::Document] content as REXML::Document object
|
31
31
|
def rexml
|
32
|
-
require 'rexml/document'
|
33
32
|
@rexml ||= REXML::Document.new(raw_document)
|
34
33
|
end
|
35
34
|
|
36
35
|
# @return [Nokogiri::XML::Document] content as Nokogiri::XML::Document object
|
37
36
|
def nokogiri
|
37
|
+
require 'nokogiri'
|
38
38
|
@nokogiri ||= Nokogiri.XML(raw_document)
|
39
39
|
end
|
40
40
|
end
|
data/lib/epub/parser.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'epub'
|
2
2
|
require 'epub/constants'
|
3
3
|
require 'epub/book'
|
4
|
-
require 'nokogiri'
|
5
4
|
|
6
5
|
module EPUB
|
7
6
|
class Parser
|
@@ -96,7 +95,7 @@ module EPUB
|
|
96
95
|
end
|
97
96
|
|
98
97
|
require 'epub/parser/version'
|
99
|
-
require 'epub/parser/
|
98
|
+
require 'epub/parser/xml_document'
|
100
99
|
require 'epub/parser/ocf'
|
101
100
|
require 'epub/parser/publication'
|
102
101
|
require 'epub/parser/content_document'
|
@@ -1,12 +1,11 @@
|
|
1
1
|
require 'epub/content_document'
|
2
2
|
require 'epub/constants'
|
3
|
-
require 'epub/parser/
|
4
|
-
require 'nokogiri'
|
3
|
+
require 'epub/parser/xml_document'
|
5
4
|
|
6
5
|
module EPUB
|
7
6
|
class Parser
|
8
7
|
class ContentDocument
|
9
|
-
|
8
|
+
using XMLDocument::Refinements
|
10
9
|
|
11
10
|
# @param [EPUB::Publication::Package::Manifest::Item] item
|
12
11
|
def initialize(item)
|
@@ -28,7 +27,7 @@ module EPUB
|
|
28
27
|
end
|
29
28
|
return content_document if content_document.nil?
|
30
29
|
content_document.item = @item
|
31
|
-
document =
|
30
|
+
document = XMLDocument.new(@item.read)
|
32
31
|
# parse_content_document(document)
|
33
32
|
if @item.nav?
|
34
33
|
content_document.navigations = parse_navigations(document)
|
@@ -36,70 +35,75 @@ module EPUB
|
|
36
35
|
content_document
|
37
36
|
end
|
38
37
|
|
39
|
-
# @param [Nokogiri::HTML::Document] document HTML document or element including nav
|
38
|
+
# @param [XMLDocument, REXML::Document, Nokogiri::HTML::Document] document HTML document or element including nav
|
40
39
|
# @return [Array<EPUB::ContentDocument::Navigation::Nav>] navs array of Nav object
|
41
40
|
def parse_navigations(document)
|
42
|
-
document.
|
41
|
+
document.each_element_by_xpath('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
|
43
42
|
end
|
44
43
|
|
45
|
-
# @param [Nokogiri::XML::Element] element nav element
|
44
|
+
# @param [REXML::Element, Nokogiri::XML::Element] element nav element
|
46
45
|
# @return [EPUB::ContentDocument::Navigation::Nav] nav Nav object
|
47
46
|
def parse_navigation(element)
|
48
47
|
nav = EPUB::ContentDocument::Navigation::Navigation.new
|
49
48
|
nav.text = find_heading(element)
|
50
|
-
hidden =
|
49
|
+
hidden = element.attribute_with_prefix('hidden')
|
51
50
|
nav.hidden = hidden.nil? ? nil : true
|
52
|
-
nav.type =
|
53
|
-
element.
|
51
|
+
nav.type = element.attribute_with_prefix('type', 'epub')
|
52
|
+
element.each_element_by_xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
|
54
53
|
nav.items << parse_navigation_item(elem)
|
55
54
|
end
|
56
55
|
|
57
56
|
nav
|
58
57
|
end
|
59
58
|
|
60
|
-
# @param [Nokogiri::XML::Element] element li element
|
59
|
+
# @param [REXML::Element, Nokogiri::XML::Element] element li element
|
61
60
|
def parse_navigation_item(element)
|
62
61
|
item = EPUB::ContentDocument::Navigation::Item.new
|
63
|
-
a_or_span = element.
|
62
|
+
a_or_span = element.each_element_by_xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
|
64
63
|
return a_or_span if a_or_span.nil?
|
65
64
|
|
66
|
-
item.text = a_or_span.
|
65
|
+
item.text = a_or_span.content
|
67
66
|
if a_or_span.name == 'a'
|
68
67
|
if item.text.empty?
|
69
|
-
embedded_content = a_or_span.
|
68
|
+
embedded_content = a_or_span.each_element_by_xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
|
70
69
|
unless embedded_content.nil?
|
71
70
|
case embedded_content.name
|
72
71
|
when 'audio', 'canvas', 'embed', 'iframe'
|
73
|
-
item.text =
|
72
|
+
item.text = embedded_content.attribute_with_prefix('name') || embedded_content.attribute_with_prefix('srcdoc')
|
74
73
|
when 'img'
|
75
|
-
item.text =
|
74
|
+
item.text = embedded_content.attribute_with_prefix('alt')
|
76
75
|
when 'math', 'object'
|
77
|
-
item.text =
|
76
|
+
item.text = embedded_content.attribute_with_prefix('name')
|
78
77
|
when 'svg', 'video'
|
79
78
|
else
|
80
79
|
end
|
81
80
|
end
|
82
|
-
item.text =
|
81
|
+
item.text = a_or_span.attribute_with_prefix('title').to_s if item.text.nil? || item.text.empty?
|
83
82
|
end
|
84
|
-
item.href =
|
83
|
+
item.href = a_or_span.attribute_with_prefix('href')
|
85
84
|
item.item = @item.find_item_by_relative_iri(item.href)
|
86
85
|
end
|
87
|
-
item.items = element.
|
86
|
+
item.items = element.each_element_by_xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}
|
88
87
|
|
89
88
|
item
|
90
89
|
end
|
91
90
|
|
92
91
|
private
|
93
92
|
|
94
|
-
# @param [Nokogiri::XML::Element] element nav element
|
93
|
+
# @param [REXML::Element, Nokogiri::XML::Element] element nav element
|
95
94
|
# @return [String] heading heading text
|
96
95
|
def find_heading(element)
|
97
|
-
heading = element.
|
96
|
+
heading = element.each_element_by_xpath('./xhtml:h1|xhtml:h2|xhtml:h3|xhtml:h4|xhtml:h5|xhtml:h6|xhtml:hgroup', EPUB::NAMESPACES).first
|
98
97
|
|
99
98
|
return nil if heading.nil?
|
100
|
-
return heading.
|
99
|
+
return heading.content unless heading.name == 'hgroup'
|
101
100
|
|
102
|
-
(heading
|
101
|
+
(heading.each_element_by_xpath(".//xhtml:h1", EPUB::NAMESPACES) ||
|
102
|
+
heading.each_element_by_xpath(".//xhtml:h2", EPUB::NAMESPACES) ||
|
103
|
+
heading.each_element_by_xpath(".//xhtml:h3", EPUB::NAMESPACES) ||
|
104
|
+
heading.each_element_by_xpath(".//xhtml:h4", EPUB::NAMESPACES) ||
|
105
|
+
heading.each_element_by_xpath(".//xhtml:h5", EPUB::NAMESPACES) ||
|
106
|
+
heading.each_element_by_xpath(".//xhtml:h6", EPUB::NAMESPACES)).first.content
|
103
107
|
end
|
104
108
|
end
|
105
109
|
end
|
data/lib/epub/parser/metadata.rb
CHANGED
@@ -1,23 +1,24 @@
|
|
1
1
|
module EPUB
|
2
2
|
class Parser
|
3
3
|
module Metadata
|
4
|
+
using XMLDocument::Refinements
|
5
|
+
|
4
6
|
def parse_metadata(elem, unique_identifier_id, default_namespace)
|
5
7
|
metadata = EPUB::Publication::Package::Metadata.new
|
6
8
|
id_map = {}
|
7
9
|
|
8
10
|
default_namespace_uri = EPUB::NAMESPACES[default_namespace]
|
9
|
-
elem.
|
10
|
-
namespace_uri = child.namespace && child.namespace.href
|
11
|
+
elem.each_element do |child|
|
11
12
|
elem_name = child.name
|
12
13
|
|
13
14
|
model =
|
14
|
-
case namespace_uri
|
15
|
+
case child.namespace_uri
|
15
16
|
when EPUB::NAMESPACES['dc']
|
16
17
|
case elem_name
|
17
18
|
when 'identifier'
|
18
19
|
identifier = build_model(child, :Identifier, ['id'])
|
19
20
|
metadata.identifiers << identifier
|
20
|
-
identifier.scheme =
|
21
|
+
identifier.scheme = child.attribute_with_prefix('scheme', 'opf')
|
21
22
|
identifier
|
22
23
|
when 'title'
|
23
24
|
title = build_model(child, :Title)
|
@@ -44,8 +45,8 @@ module EPUB
|
|
44
45
|
when 'link'
|
45
46
|
link = build_model(child, :Link, %w[id media-type])
|
46
47
|
metadata.links << link
|
47
|
-
link.href =
|
48
|
-
link.rel = Set.new(
|
48
|
+
link.href = child.attribute_with_prefix('href')
|
49
|
+
link.rel = Set.new(child.attribute_with_prefix('rel').split(/\s+/))
|
49
50
|
link
|
50
51
|
else
|
51
52
|
build_unsupported_model(child)
|
@@ -65,7 +66,7 @@ module EPUB
|
|
65
66
|
id_map[model.id] = {refinee: model}
|
66
67
|
end
|
67
68
|
|
68
|
-
refines =
|
69
|
+
refines = child.attribute_with_prefix('refines')
|
69
70
|
if refines && refines.start_with?('#')
|
70
71
|
id = refines[1..-1]
|
71
72
|
id_map[id] ||= {}
|
@@ -87,7 +88,7 @@ module EPUB
|
|
87
88
|
model = EPUB::Metadata.const_get(klass).new
|
88
89
|
attributes.each do |attr|
|
89
90
|
writer_name = (attr == "content") ? "meta_content=" : "#{attr.gsub('-', '_')}="
|
90
|
-
model.__send__ writer_name,
|
91
|
+
model.__send__ writer_name, elem.attribute_with_prefix(attr)
|
91
92
|
end
|
92
93
|
model.content = elem.content unless klass == :Link
|
93
94
|
model.content.strip! if klass == :Identifier
|
data/lib/epub/parser/ocf.rb
CHANGED
@@ -2,12 +2,12 @@ require 'epub/constants'
|
|
2
2
|
require 'epub/ocf'
|
3
3
|
require 'epub/ocf/physical_container'
|
4
4
|
require 'epub/parser/metadata'
|
5
|
-
require
|
5
|
+
require "epub/parser/xml_document"
|
6
6
|
|
7
7
|
module EPUB
|
8
8
|
class Parser
|
9
9
|
class OCF
|
10
|
-
|
10
|
+
using XMLDocument::Refinements
|
11
11
|
include Metadata
|
12
12
|
|
13
13
|
DIRECTORY = 'META-INF'
|
@@ -37,11 +37,11 @@ module EPUB
|
|
37
37
|
|
38
38
|
def parse_container(xml)
|
39
39
|
container = EPUB::OCF::Container.new
|
40
|
-
doc =
|
41
|
-
doc.
|
40
|
+
doc = XMLDocument.new(xml)
|
41
|
+
doc.each_element_by_xpath "/ocf:container/ocf:rootfiles/ocf:rootfile", EPUB::NAMESPACES do |elem|
|
42
42
|
rootfile = EPUB::OCF::Container::Rootfile.new
|
43
|
-
rootfile.full_path = Addressable::URI.parse(
|
44
|
-
rootfile.media_type =
|
43
|
+
rootfile.full_path = Addressable::URI.parse(elem.attribute_with_prefix('full-path'))
|
44
|
+
rootfile.media_type = elem.attribute_with_prefix('media-type')
|
45
45
|
container.rootfiles << rootfile
|
46
46
|
end
|
47
47
|
|
@@ -59,14 +59,14 @@ module EPUB
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def parse_metadata(content)
|
62
|
-
doc =
|
62
|
+
doc = XMLDocument.new(content)
|
63
63
|
unless multiple_rendition_metadata?(doc)
|
64
64
|
warn "Not implemented: #{self.class}##{__method__}" if $VERBOSE
|
65
65
|
metadata = EPUB::OCF::UnknownFormatMetadata.new
|
66
66
|
metadata.content = content
|
67
67
|
return metadata
|
68
68
|
end
|
69
|
-
super(doc.root, doc.root
|
69
|
+
super(doc.root, doc.root.attribute_with_prefix('unique-identifier'), 'metadata')
|
70
70
|
end
|
71
71
|
|
72
72
|
def parse_rights(content)
|
@@ -82,7 +82,7 @@ module EPUB
|
|
82
82
|
def multiple_rendition_metadata?(doc)
|
83
83
|
doc.root &&
|
84
84
|
doc.root.name == 'metadata' &&
|
85
|
-
doc.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
|
85
|
+
doc.root.namespaces['xmlns'] == EPUB::NAMESPACES['metadata']
|
86
86
|
end
|
87
87
|
end
|
88
88
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'strscan'
|
2
|
-
require 'nokogiri'
|
3
2
|
require 'epub/publication'
|
4
3
|
require 'epub/constants'
|
5
4
|
require 'epub/parser/metadata'
|
@@ -7,7 +6,7 @@ require 'epub/parser/metadata'
|
|
7
6
|
module EPUB
|
8
7
|
class Parser
|
9
8
|
class Publication
|
10
|
-
|
9
|
+
using XMLDocument::Refinements
|
11
10
|
include Metadata
|
12
11
|
|
13
12
|
class << self
|
@@ -19,7 +18,7 @@ module EPUB
|
|
19
18
|
end
|
20
19
|
|
21
20
|
def initialize(opf)
|
22
|
-
@doc =
|
21
|
+
@doc = XMLDocument.new(opf)
|
23
22
|
end
|
24
23
|
|
25
24
|
def parse
|
@@ -36,33 +35,33 @@ module EPUB
|
|
36
35
|
package = EPUB::Publication::Package.new
|
37
36
|
elem = doc.root
|
38
37
|
%w[version xml:lang dir id].each do |attr|
|
39
|
-
package.__send__ "#{attr.gsub(/\:/, '_')}=",
|
38
|
+
package.__send__ "#{attr.gsub(/\:/, '_')}=", elem.attribute_with_prefix(attr)
|
40
39
|
end
|
41
|
-
package.prefix = parse_prefix(
|
40
|
+
package.prefix = parse_prefix(elem.attribute_with_prefix('prefix'))
|
42
41
|
EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY
|
43
42
|
|
44
43
|
package
|
45
44
|
end
|
46
45
|
|
47
46
|
def parse_metadata(doc)
|
48
|
-
super(doc.
|
47
|
+
super(doc.each_element_by_xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first, doc.root.attribute_with_prefix('unique-identifier'), 'opf')
|
49
48
|
end
|
50
49
|
|
51
50
|
def parse_manifest(doc)
|
52
51
|
manifest = EPUB::Publication::Package::Manifest.new
|
53
|
-
elem = doc.
|
54
|
-
manifest.id =
|
52
|
+
elem = doc.each_element_by_xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
|
53
|
+
manifest.id = elem.attribute_with_prefix('id')
|
55
54
|
|
56
55
|
fallback_map = {}
|
57
|
-
elem.
|
56
|
+
elem.each_element_by_xpath('./opf:item', EPUB::NAMESPACES).each do |e|
|
58
57
|
item = EPUB::Publication::Package::Manifest::Item.new
|
59
58
|
%w[id media-type media-overlay].each do |attr|
|
60
|
-
item.__send__ "#{attr.gsub(/-/, '_')}=",
|
59
|
+
item.__send__ "#{attr.gsub(/-/, '_')}=", e.attribute_with_prefix(attr)
|
61
60
|
end
|
62
|
-
item.href =
|
63
|
-
fallback =
|
61
|
+
item.href = e.attribute_with_prefix('href')
|
62
|
+
fallback = e.attribute_with_prefix('fallback')
|
64
63
|
fallback_map[fallback] = item if fallback
|
65
|
-
properties =
|
64
|
+
properties = e.attribute_with_prefix('properties')
|
66
65
|
item.properties = properties.split(' ') if properties
|
67
66
|
manifest << item
|
68
67
|
end
|
@@ -75,18 +74,18 @@ module EPUB
|
|
75
74
|
|
76
75
|
def parse_spine(doc)
|
77
76
|
spine = EPUB::Publication::Package::Spine.new
|
78
|
-
elem = doc.
|
77
|
+
elem = doc.each_element_by_xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
|
79
78
|
%w[id toc page-progression-direction].each do |attr|
|
80
|
-
spine.__send__ "#{attr.gsub(/-/, '_')}=",
|
79
|
+
spine.__send__ "#{attr.gsub(/-/, '_')}=", elem.attribute_with_prefix(attr)
|
81
80
|
end
|
82
81
|
|
83
|
-
elem.
|
82
|
+
elem.each_element_by_xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
|
84
83
|
itemref = EPUB::Publication::Package::Spine::Itemref.new
|
85
84
|
%w[idref id].each do |attr|
|
86
|
-
itemref.__send__ "#{attr}=",
|
85
|
+
itemref.__send__ "#{attr}=", e.attribute_with_prefix(attr)
|
87
86
|
end
|
88
|
-
itemref.linear = (
|
89
|
-
properties =
|
87
|
+
itemref.linear = (e.attribute_with_prefix('linear') != 'no')
|
88
|
+
properties = e.attribute_with_prefix('properties')
|
90
89
|
itemref.properties = properties.split(' ') if properties
|
91
90
|
spine << itemref
|
92
91
|
end
|
@@ -96,12 +95,12 @@ module EPUB
|
|
96
95
|
|
97
96
|
def parse_guide(doc)
|
98
97
|
guide = EPUB::Publication::Package::Guide.new
|
99
|
-
doc.
|
98
|
+
doc.each_element_by_xpath '/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES do |ref|
|
100
99
|
reference = EPUB::Publication::Package::Guide::Reference.new
|
101
100
|
%w[type title].each do |attr|
|
102
|
-
reference.__send__ "#{attr}=",
|
101
|
+
reference.__send__ "#{attr}=", ref.attribute_with_prefix(attr)
|
103
102
|
end
|
104
|
-
reference.href =
|
103
|
+
reference.href = ref.attribute_with_prefix('href')
|
105
104
|
guide << reference
|
106
105
|
end
|
107
106
|
|
@@ -110,10 +109,10 @@ module EPUB
|
|
110
109
|
|
111
110
|
def parse_bindings(doc, handler_map)
|
112
111
|
bindings = EPUB::Publication::Package::Bindings.new
|
113
|
-
doc.
|
112
|
+
doc.each_element_by_xpath '/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES do |elem|
|
114
113
|
media_type = EPUB::Publication::Package::Bindings::MediaType.new
|
115
|
-
media_type.media_type =
|
116
|
-
media_type.handler = handler_map[
|
114
|
+
media_type.media_type = elem.attribute_with_prefix('media-type')
|
115
|
+
media_type.handler = handler_map[elem.attribute_with_prefix('handler')]
|
117
116
|
bindings << media_type
|
118
117
|
end
|
119
118
|
|
data/lib/epub/parser/version.rb
CHANGED
@@ -0,0 +1,77 @@
|
|
1
|
+
require "rexml/document"
|
2
|
+
begin
|
3
|
+
require "nokogiri"
|
4
|
+
rescue LoadError
|
5
|
+
end
|
6
|
+
|
7
|
+
module EPUB
|
8
|
+
class Parser
|
9
|
+
class XMLDocument
|
10
|
+
class << self
|
11
|
+
attr_accessor :backend
|
12
|
+
|
13
|
+
def new(xml)
|
14
|
+
if backend == :Nokogiri
|
15
|
+
Nokogiri.XML(xml)
|
16
|
+
else
|
17
|
+
REXML::Document.new(xml)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Refinements
|
23
|
+
[REXML::Element, REXML::Text].each do |klass|
|
24
|
+
refine klass do
|
25
|
+
%i[document element text].each do |type|
|
26
|
+
define_method "#{type}?" do
|
27
|
+
node_type == type
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
refine REXML::Element do
|
34
|
+
def each_element_by_xpath(xpath, namespaces = nil, &block)
|
35
|
+
REXML::XPath.each self, xpath, namespaces, &block
|
36
|
+
end
|
37
|
+
|
38
|
+
def attribute_with_prefix(name, prefix = nil)
|
39
|
+
attribute(name, EPUB::NAMESPACES[prefix])&.value
|
40
|
+
end
|
41
|
+
|
42
|
+
alias namespace_uri namespace
|
43
|
+
|
44
|
+
def content
|
45
|
+
texts.join
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
refine REXML::Text do
|
50
|
+
alias content value
|
51
|
+
end
|
52
|
+
|
53
|
+
if const_defined? :Nokogiri
|
54
|
+
refine Nokogiri::XML::Node do
|
55
|
+
def each_element_by_xpath(xpath, namespaces = nil, &block)
|
56
|
+
xpath(xpath, namespaces).each &block
|
57
|
+
end
|
58
|
+
|
59
|
+
def attribute_with_prefix(name, prefix = nil)
|
60
|
+
attribute_with_ns(name, EPUB::NAMESPACES[prefix])&.value
|
61
|
+
end
|
62
|
+
|
63
|
+
def each_element(xpath = nil, &block)
|
64
|
+
element_children.each(&block)
|
65
|
+
end
|
66
|
+
|
67
|
+
alias elements element_children
|
68
|
+
|
69
|
+
def namespace_uri
|
70
|
+
namespace.href
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -136,14 +136,12 @@ module EPUB
|
|
136
136
|
|
137
137
|
# Read content from EPUB archive
|
138
138
|
#
|
139
|
-
# @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value
|
139
|
+
# @param detect_encoding [Boolean] Whether #read tries auto-detection of character encoding. The default value is +false+.
|
140
140
|
# @return [String] Content with encoding:
|
141
141
|
# US-ASCII when the content is not in text format such images.
|
142
142
|
# UTF-8 when the content is in text format and +detect_encoding+ is +false+.
|
143
143
|
# auto-detected encoding when the content is in text format and +detect_encoding+ is +true+.
|
144
|
-
def read(detect_encoding:
|
145
|
-
warn "[#{self.class}##{__method__}]Autodetection of character encoding is deprecated. Pass keyword argument detect_encoding with true explicitly." if detect_encoding
|
146
|
-
|
144
|
+
def read(detect_encoding: false)
|
147
145
|
raw_content = manifest.package.book.container_adapter.read(manifest.package.book.epub_file, entry_name)
|
148
146
|
|
149
147
|
unless media_type.start_with?('text/') or
|
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'epub/publication'
|
2
|
+
require "epub/parser/xml_document"
|
2
3
|
|
3
4
|
module EPUB
|
4
5
|
module Searcher
|
5
6
|
class Publication
|
7
|
+
using Parser::XMLDocument::Refinements
|
8
|
+
|
6
9
|
class << self
|
7
10
|
def search_text(package, word, **options)
|
8
11
|
new(package).search_text(word, options)
|
@@ -28,7 +31,7 @@ module EPUB
|
|
28
31
|
spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
|
29
32
|
spine.each_itemref.with_index do |itemref, index|
|
30
33
|
itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
|
31
|
-
XHTML::ALGORITHMS[algorithm].search_text(
|
34
|
+
XHTML::ALGORITHMS[algorithm].search_text(Parser::XMLDocument.new(itemref.item.read), word).each do |sub_result|
|
32
35
|
results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
|
33
36
|
end
|
34
37
|
end
|
@@ -38,7 +41,7 @@ module EPUB
|
|
38
41
|
|
39
42
|
# @todo: Refactoring
|
40
43
|
# @return [Array<Hash>] An array of rearch results. Each result is composed of:
|
41
|
-
# :element: [Nokogiri::XML::ELement] Found element
|
44
|
+
# :element: [REXML::Element, Nokogiri::XML::ELement] Found element
|
42
45
|
# :itemref: [EPUB::Publication::Package::Spine::Itemref] Itemref that element's document belongs to
|
43
46
|
# :location: [EPUB::CFI::Location] CFI that indicates the element
|
44
47
|
# :package: [EPUB::Publication::Package] Package that the element belongs to
|
@@ -55,10 +58,15 @@ module EPUB
|
|
55
58
|
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
|
56
59
|
content_document = itemref.item.content_document
|
57
60
|
next unless content_document
|
58
|
-
doc = content_document.nokogiri
|
59
61
|
elems = if xpath
|
60
|
-
doc.
|
62
|
+
doc = Parser::XMLDocument.new(content_document.read)
|
63
|
+
doc.each_element_by_xpath(xpath, namespaces)
|
61
64
|
else
|
65
|
+
begin
|
66
|
+
doc = content_document.nokogiri
|
67
|
+
rescue LoadError
|
68
|
+
raise "#{self.class}##{__method__} with `css` argument requires Nokogiri gem for now. Install Nokogiri and then try again."
|
69
|
+
end
|
62
70
|
doc.css(css)
|
63
71
|
end
|
64
72
|
elems.each do |elem|
|
@@ -96,13 +104,13 @@ module EPUB
|
|
96
104
|
current_node = doc.root
|
97
105
|
path_in_doc.steps.each do |step|
|
98
106
|
if step.element?
|
99
|
-
current_node = current_node.
|
107
|
+
current_node = current_node.elements.to_a[step.value / 2 - 1]
|
100
108
|
else
|
101
109
|
element_index = (step.value - 1) / 2 - 1
|
102
110
|
if element_index == -1
|
103
111
|
current_node = current_node.children.first
|
104
112
|
else
|
105
|
-
prev = current_node.
|
113
|
+
prev = current_node.elements.to_a[element_index]
|
106
114
|
break unless prev
|
107
115
|
current_node = prev.next_sibling
|
108
116
|
break unless current_node
|
@@ -120,8 +128,9 @@ module EPUB
|
|
120
128
|
def find_path(elem)
|
121
129
|
steps = []
|
122
130
|
until elem.parent.document?
|
123
|
-
index = elem.parent.
|
124
|
-
|
131
|
+
index = elem.parent.elements.to_a.index(elem)
|
132
|
+
id_attr = elem.attribute_with_prefix("id")
|
133
|
+
assertion = id_attr ? EPUB::CFI::IDAssertion.new(id_attr) : nil
|
125
134
|
steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
126
135
|
elem = elem.parent
|
127
136
|
end
|
data/lib/epub/searcher/xhtml.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
require 'epub'
|
2
|
-
require 'epub/parser/
|
2
|
+
require 'epub/parser/xml_document'
|
3
3
|
|
4
4
|
module EPUB
|
5
5
|
module Searcher
|
6
6
|
class XHTML
|
7
|
+
using Parser::XMLDocument::Refinements
|
8
|
+
|
7
9
|
ALGORITHMS = {}
|
8
10
|
|
9
11
|
class << self
|
10
|
-
# @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
|
12
|
+
# @param element [REXML::Element, REXML::Document, Nokogiri::XML::Element, Nokogiri::XML::Document]
|
11
13
|
# @param word [String]
|
12
14
|
# @return [Array<Result>]
|
13
15
|
def search_text(element, word)
|
@@ -21,7 +23,7 @@ module EPUB
|
|
21
23
|
end
|
22
24
|
|
23
25
|
class Restricted < self
|
24
|
-
# @param element [Nokogiri::XML::Element]
|
26
|
+
# @param element [REXML::Element, Nokogiri::XML::Element]
|
25
27
|
# @return [Array<Result>]
|
26
28
|
def search_text(word, element=nil)
|
27
29
|
results = []
|
@@ -29,9 +31,9 @@ module EPUB
|
|
29
31
|
elem_index = 0
|
30
32
|
(element || @element).children.each do |child|
|
31
33
|
if child.element?
|
32
|
-
child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id =>
|
34
|
+
child_step = Result::Step.new(:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')})
|
33
35
|
if child.name == 'img'
|
34
|
-
if
|
36
|
+
if child.attribute_with_prefix('alt').index(word)
|
35
37
|
results << Result.new([child_step], nil, nil)
|
36
38
|
end
|
37
39
|
else
|
@@ -76,10 +78,10 @@ module EPUB
|
|
76
78
|
elem_index = 0
|
77
79
|
element.children.each do |child|
|
78
80
|
if child.element?
|
79
|
-
child_step = [:element, elem_index, {:name => child.name, :id =>
|
81
|
+
child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}]
|
80
82
|
elem_index += 1
|
81
83
|
if child.name == 'img'
|
82
|
-
alt =
|
84
|
+
alt = child.attribute_with_prefix('alt')
|
83
85
|
next if alt.nil? || alt.empty?
|
84
86
|
indices[content.length] = [child_step]
|
85
87
|
content << alt
|
data/test/helper.rb
CHANGED
@@ -43,15 +43,19 @@ class TestOCFPhysicalContainer < Test::Unit::TestCase
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
begin
|
47
|
+
require 'epub/ocf/physical_container/zipruby'
|
48
|
+
class TestZipruby < self
|
49
|
+
include ConcreteContainer
|
50
|
+
|
51
|
+
def setup
|
52
|
+
super
|
53
|
+
@class = EPUB::OCF::PhysicalContainer::Zipruby
|
54
|
+
@container = @class.new(@container_path)
|
55
|
+
end
|
54
56
|
end
|
57
|
+
rescue LoadError
|
58
|
+
warn "Skip TestOPFPhysicalContainer::TestZipRuby"
|
55
59
|
end
|
56
60
|
|
57
61
|
class TestUnpackedDirectory < self
|
data/test/test_parser_ocf.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require File.expand_path 'helper', File.dirname(__FILE__)
|
3
|
-
require 'zipruby'
|
4
3
|
|
5
4
|
class TestParserOCF < Test::Unit::TestCase
|
6
5
|
def setup
|
@@ -8,12 +7,8 @@ class TestParserOCF < Test::Unit::TestCase
|
|
8
7
|
EPUB::OCF::PhysicalContainer.open(file) {|container|
|
9
8
|
@parser = EPUB::Parser::OCF.new(container)
|
10
9
|
}
|
11
|
-
@container_xml =
|
12
|
-
|
13
|
-
}
|
14
|
-
@metadata_xml = Zip::Archive.open(file) {|archive|
|
15
|
-
archive.fopen('META-INF/metadata.xml').read
|
16
|
-
}
|
10
|
+
@container_xml = File.read("test/fixtures/book/META-INF/container.xml")
|
11
|
+
@metadata_xml = File.read("test/fixtures/book/META-INF/metadata.xml")
|
17
12
|
end
|
18
13
|
|
19
14
|
def test_parsed_container_has_two_rootfiles
|
@@ -1,22 +1,14 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require File.expand_path 'helper', File.dirname(__FILE__)
|
3
|
-
require 'zipruby'
|
4
3
|
|
5
4
|
class TestParserPublication < Test::Unit::TestCase
|
6
5
|
def setup
|
7
|
-
|
8
|
-
rootfile = 'OPS/ルートファイル.opf'
|
9
|
-
@zip = Zip::Archive.open(file)
|
10
|
-
opf = @zip.fopen(rootfile).read
|
6
|
+
opf = File.read("test/fixtures/book/OPS/ルートファイル.opf")
|
11
7
|
@opf = Nokogiri.XML(opf)
|
12
8
|
@parser = EPUB::Parser::Publication.new(opf)
|
13
9
|
@package = @parser.parse_package(@opf)
|
14
10
|
end
|
15
11
|
|
16
|
-
def teardown
|
17
|
-
@zip.close
|
18
|
-
end
|
19
|
-
|
20
12
|
def test_parse_package
|
21
13
|
assert_equal '3.0', @package.version
|
22
14
|
end
|
data/test/test_searcher.rb
CHANGED
@@ -9,7 +9,7 @@ class TestSearcher < Test::Unit::TestCase
|
|
9
9
|
super
|
10
10
|
opf_path = File.expand_path('../fixtures/book/OPS/ルートファイル.opf', __FILE__)
|
11
11
|
nav_path = File.expand_path('../fixtures/book/OPS/nav.xhtml', __FILE__)
|
12
|
-
@package = EPUB::Parser::Publication.new(
|
12
|
+
@package = EPUB::Parser::Publication.new(File.read(opf_path)).parse
|
13
13
|
@package.spine.each_itemref do |itemref|
|
14
14
|
stub(itemref.item).read {
|
15
15
|
itemref.idref == 'nav' ? File.read(nav_path) : '<html></html>'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -221,39 +221,39 @@ dependencies:
|
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: '0'
|
223
223
|
- !ruby/object:Gem::Dependency
|
224
|
-
name:
|
224
|
+
name: nokogiri
|
225
225
|
requirement: !ruby/object:Gem::Requirement
|
226
226
|
requirements:
|
227
227
|
- - ">="
|
228
228
|
- !ruby/object:Gem::Version
|
229
|
-
version:
|
230
|
-
|
229
|
+
version: 1.6.0
|
230
|
+
- - "<"
|
231
|
+
- !ruby/object:Gem::Version
|
232
|
+
version: '1.9'
|
233
|
+
type: :development
|
231
234
|
prerelease: false
|
232
235
|
version_requirements: !ruby/object:Gem::Requirement
|
233
236
|
requirements:
|
234
237
|
- - ">="
|
235
238
|
- !ruby/object:Gem::Version
|
236
|
-
version:
|
239
|
+
version: 1.6.0
|
240
|
+
- - "<"
|
241
|
+
- !ruby/object:Gem::Version
|
242
|
+
version: '1.9'
|
237
243
|
- !ruby/object:Gem::Dependency
|
238
|
-
name:
|
244
|
+
name: archive-zip
|
239
245
|
requirement: !ruby/object:Gem::Requirement
|
240
246
|
requirements:
|
241
247
|
- - ">="
|
242
248
|
- !ruby/object:Gem::Version
|
243
|
-
version:
|
244
|
-
- - "<"
|
245
|
-
- !ruby/object:Gem::Version
|
246
|
-
version: '1.9'
|
249
|
+
version: '0'
|
247
250
|
type: :runtime
|
248
251
|
prerelease: false
|
249
252
|
version_requirements: !ruby/object:Gem::Requirement
|
250
253
|
requirements:
|
251
254
|
- - ">="
|
252
255
|
- !ruby/object:Gem::Version
|
253
|
-
version:
|
254
|
-
- - "<"
|
255
|
-
- !ruby/object:Gem::Version
|
256
|
-
version: '1.9'
|
256
|
+
version: '0'
|
257
257
|
- !ruby/object:Gem::Dependency
|
258
258
|
name: addressable
|
259
259
|
requirement: !ruby/object:Gem::Requirement
|
@@ -362,8 +362,8 @@ files:
|
|
362
362
|
- lib/epub/parser/metadata.rb
|
363
363
|
- lib/epub/parser/ocf.rb
|
364
364
|
- lib/epub/parser/publication.rb
|
365
|
-
- lib/epub/parser/utils.rb
|
366
365
|
- lib/epub/parser/version.rb
|
366
|
+
- lib/epub/parser/xml_document.rb
|
367
367
|
- lib/epub/publication.rb
|
368
368
|
- lib/epub/publication/fixed_layout.rb
|
369
369
|
- lib/epub/publication/package.rb
|
data/lib/epub/parser/utils.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module EPUB
|
2
|
-
class Parser
|
3
|
-
module Utils
|
4
|
-
# Extract the value of attribute of element
|
5
|
-
#
|
6
|
-
# @todo Refinement Nokogiri::XML::Node instead of use this method after Ruby 2.0 becomes popular
|
7
|
-
#
|
8
|
-
# @param [Nokogiri::XML::Element] element
|
9
|
-
# @param [String] name name of attribute excluding namespace prefix
|
10
|
-
# @param [String, nil] prefix XML namespace prefix in {EPUB::NAMESPACES} keys
|
11
|
-
# @return [String] value of attribute when the attribute exists
|
12
|
-
# @return nil when the attribute doesn't exist
|
13
|
-
def extract_attribute(element, name, prefix=nil)
|
14
|
-
attr = element.attribute_with_ns(name, EPUB::NAMESPACES[prefix])
|
15
|
-
attr.nil? ? nil : attr.value
|
16
|
-
end
|
17
|
-
module_function :extract_attribute
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|