epub-parser 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +19 -0
- data/.travis.yml +1 -1
- data/CHANGELOG.markdown +6 -0
- data/Gemfile +4 -0
- data/README.markdown +8 -7
- data/docs/Home.markdown +1 -1
- data/docs/Searcher.markdown +44 -29
- data/examples/find-elements-and-cfis.rb +0 -4
- data/lib/epub/content_document/navigation.rb +2 -0
- data/lib/epub/ocf/physical_container.rb +9 -1
- data/lib/epub/ocf/physical_container/unpacked_directory.rb +1 -3
- data/lib/epub/ocf/physical_container/unpacked_uri.rb +1 -3
- data/lib/epub/ocf/physical_container/zipruby.rb +2 -6
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +1 -0
- data/lib/epub/searcher.rb +6 -2
- data/lib/epub/searcher/publication.rb +57 -7
- data/lib/epub/searcher/result.rb +9 -20
- data/lib/epub/searcher/xhtml.rb +5 -5
- data/test/helper.rb +1 -0
- data/test/test_parser_cfi.rb +16 -1
- data/test/test_searcher.rb +58 -22
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84aac696d82b7eb55bf80b1448685ac4c210abdd
|
4
|
+
data.tar.gz: a32f0e557b928502e0825cbb6b2d808f0fc9dd1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a28c163db08c10c57f733d6ffc9a68a768b30a5fe84439bff1fbb03941448c822eb916f3338016b1cd8f2a96954f69a775be0f65bb629231767dfb995318934
|
7
|
+
data.tar.gz: f5535095748701b940555ca424f3488b4e7b007c39d6e2688cf16d00394a9e03c2c6f12290c820ca15aee2a11ce0ed43e5745225507c6baab9c2a7e33bf513ab
|
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
before_script:
|
2
|
+
- apt-get update -qq && apt-get install -y zip
|
3
|
+
- ruby -v
|
4
|
+
- which ruby
|
5
|
+
- gem install bundler --no-document
|
6
|
+
- bundle install --jobs=$(nproc) "${FLAGS[@]}"
|
7
|
+
|
8
|
+
test:2.2:
|
9
|
+
image: ruby:2.2
|
10
|
+
script:
|
11
|
+
- bundle exec rake test
|
12
|
+
|
13
|
+
test:2.3:
|
14
|
+
image: ruby:2.3
|
15
|
+
script: bundle exec rake test
|
16
|
+
|
17
|
+
test:2.4:
|
18
|
+
image: ruby:2.4
|
19
|
+
script: bundle exec rake test
|
data/.travis.yml
CHANGED
data/CHANGELOG.markdown
CHANGED
data/Gemfile
CHANGED
data/README.markdown
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
EPUB Parser
|
2
2
|
===========
|
3
3
|
[](http://travis-ci.org/KitaitiMakoto/epub-parser)
|
4
|
+
[](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
4
5
|
[](https://gemnasium.com/KitaitiMakoto/epub-parser)
|
5
6
|
[](http://badge.fury.io/rb/epub-parser)
|
6
7
|
|
@@ -31,7 +32,7 @@ USAGE
|
|
31
32
|
|
32
33
|
See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
|
33
34
|
|
34
|
-
[rubydoc]: http://rubydoc.info/gems/epub-parser
|
35
|
+
[rubydoc]: http://www.rubydoc.info/gems/epub-parser
|
35
36
|
|
36
37
|
### `epubinfo` command-line tool
|
37
38
|
|
@@ -114,7 +115,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
|
|
114
115
|
|
115
116
|
Or, generating by yardoc command is possible, too:
|
116
117
|
|
117
|
-
$ git clone https://
|
118
|
+
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
118
119
|
$ cd epub-parser
|
119
120
|
$ bundle install --path=deps
|
120
121
|
$ bundle exec rake doc:yard
|
@@ -151,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
|
|
151
152
|
RECENT CHANGES
|
152
153
|
--------------
|
153
154
|
|
155
|
+
### 0.2.8
|
156
|
+
|
157
|
+
* Change Searcher API: #search -> #search_text
|
158
|
+
* Add Searcher.search_element
|
159
|
+
|
154
160
|
### 0.2.7
|
155
161
|
|
156
162
|
* Add `EPUB::Metadata#children`
|
@@ -169,11 +175,6 @@ RECENT CHANGES
|
|
169
175
|
|
170
176
|
[multi-rendition]: http://www.idpf.org/epub/renditions/multiple/
|
171
177
|
|
172
|
-
### 0.2.5
|
173
|
-
|
174
|
-
* [BUG FIX]Don't load Zip/Ruby if unneccessary
|
175
|
-
* Add `EPUB::CFI::PhysicalContainer.find_adapter`
|
176
|
-
|
177
178
|
See {file:CHANGELOG.markdown} for older changelogs and details.
|
178
179
|
|
179
180
|
TODOS
|
data/docs/Home.markdown
CHANGED
@@ -122,7 +122,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
|
|
122
122
|
|
123
123
|
Or, generating yardoc command is possible, too:
|
124
124
|
|
125
|
-
$ git clone https://
|
125
|
+
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
126
126
|
$ cd epub-parser
|
127
127
|
$ bundle install --path=deps
|
128
128
|
$ bundle exec rake doc:yard
|
data/docs/Searcher.markdown
CHANGED
@@ -8,37 +8,37 @@ Searcher
|
|
8
8
|
Example
|
9
9
|
-------
|
10
10
|
|
11
|
-
epub = EPUB::Parser.parse('childrens-literature
|
11
|
+
epub = EPUB::Parser.parse('childrens-literature.epub')
|
12
12
|
search_word = 'INTRODUCTORY'
|
13
|
-
results = EPUB::Searcher.
|
14
|
-
# => [#<EPUB::Searcher::Result:
|
15
|
-
# @end_steps=[#<EPUB::Searcher::Result::Step:
|
13
|
+
results = EPUB::Searcher.search_text(epub, search_word)
|
14
|
+
# => [#<EPUB::Searcher::Result:0x007f80ccde9528
|
15
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9730 @index=12, @info={}, @type=:character>],
|
16
16
|
# @parent_steps=
|
17
|
-
# [#<EPUB::Searcher::Result::Step:
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# @start_steps=[#<EPUB::Searcher::Result::Step:
|
28
|
-
# #<EPUB::Searcher::Result:
|
29
|
-
# @end_steps=[#<EPUB::Searcher::Result::Step:
|
17
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
18
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccf3d3e8 @index=1, @info={:id=>nil}, @type=:itemref>,
|
19
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e88 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
20
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e38 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
|
21
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9de8 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
|
22
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d98 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
|
23
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d48 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
|
24
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9ca8 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
|
25
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9c08 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
|
26
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9bb8 @index=0, @info={}, @type=:text>],
|
27
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9af0 @index=0, @info={}, @type=:character>]>,
|
28
|
+
# #<EPUB::Searcher::Result:0x007f80ccebcb30
|
29
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebcdb0 @index=12, @info={}, @type=:character>],
|
30
30
|
# @parent_steps=
|
31
|
-
# [#<EPUB::Searcher::Result::Step:
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
# @start_steps=[#<EPUB::Searcher::Result::Step:
|
39
|
-
puts results.collect(&:
|
40
|
-
# /6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12
|
41
|
-
# /6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12
|
31
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
32
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde94b0 @index=2, @info={:id=>nil}, @type=:itemref>,
|
33
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd328 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
34
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd2d8 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
|
35
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd260 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
|
36
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd210 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
|
37
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd198 @index=0, @info={}, @type=:text>],
|
38
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebd0d0 @index=0, @info={}, @type=:character>]>]
|
39
|
+
puts results.collect(&:to_cfi).collect(&:to_fragment)
|
40
|
+
# epubcfi(/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12)
|
41
|
+
# epubcfi(/6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12)
|
42
42
|
# => nil
|
43
43
|
|
44
44
|
Search result
|
@@ -91,4 +91,19 @@ because the words 'search' and 'word' are not in the same element.
|
|
91
91
|
|
92
92
|
To use restricted searcher, specify `algorithm` option for `search` method:
|
93
93
|
|
94
|
-
results = EPUB::Searcher.
|
94
|
+
results = EPUB::Searcher.search_text(epub, search_word, algorithm: :restricted)
|
95
|
+
|
96
|
+
Element Searcher
|
97
|
+
----------------
|
98
|
+
|
99
|
+
You can search XHTML elements by CSS selector or XPath.
|
100
|
+
|
101
|
+
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
102
|
+
# => ["epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313])",
|
103
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/2[np-315])",
|
104
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317])",
|
105
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6)",
|
106
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319])",
|
107
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319]/4/2)",
|
108
|
+
# :
|
109
|
+
# :
|
@@ -23,10 +23,6 @@ def main(argv)
|
|
23
23
|
|
24
24
|
epub = EPUB::Parser.parse(epub_path)
|
25
25
|
epub.package.spine.each_itemref.with_index do |itemref, i|
|
26
|
-
itemref_step = {
|
27
|
-
:step => (i + 1) * 2,
|
28
|
-
:id => itemref.id
|
29
|
-
}
|
30
26
|
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
31
27
|
itemref_step = EPUB::CFI::Step.new((i + 1) * 2, assertion)
|
32
28
|
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
|
@@ -7,7 +7,15 @@ module EPUB
|
|
7
7
|
class OCF
|
8
8
|
# @todo: Make thread save
|
9
9
|
class PhysicalContainer
|
10
|
-
class NoEntry < StandardError
|
10
|
+
class NoEntry < StandardError
|
11
|
+
class << self
|
12
|
+
def from_error(error)
|
13
|
+
no_entry = new(error.message)
|
14
|
+
no_entry.set_backtrace error.backtrace
|
15
|
+
no_entry
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
11
19
|
|
12
20
|
@adapter = ArchiveZip
|
13
21
|
|
@@ -9,9 +9,7 @@ module EPUB
|
|
9
9
|
def read(path_name)
|
10
10
|
::File.read(::File.join(@container_path, path_name))
|
11
11
|
rescue ::Errno::ENOENT => error
|
12
|
-
|
13
|
-
no_entry.set_backtrace error.backtrace
|
14
|
-
raise no_entry
|
12
|
+
raise NoEntry.from_error(error)
|
15
13
|
end
|
16
14
|
end
|
17
15
|
end
|
@@ -20,9 +20,7 @@ module EPUB
|
|
20
20
|
def read(path_name)
|
21
21
|
(@container_path + path_name).read
|
22
22
|
rescue ::OpenURI::HTTPError => error
|
23
|
-
|
24
|
-
no_entry.set_backtrace error.backtrace
|
25
|
-
raise no_entry
|
23
|
+
raise NoEntry.from_error(error)
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
@@ -11,9 +11,7 @@ module EPUB
|
|
11
11
|
@archive = archive
|
12
12
|
yield self
|
13
13
|
rescue ::Zip::Error => error
|
14
|
-
|
15
|
-
no_entry.set_backtrace error.backtrace
|
16
|
-
raise no_entry
|
14
|
+
raise NoEntry.from_error(error)
|
17
15
|
ensure
|
18
16
|
@archive = nil
|
19
17
|
end
|
@@ -28,9 +26,7 @@ module EPUB
|
|
28
26
|
open {|container| container.read(path_name)}
|
29
27
|
end
|
30
28
|
rescue ::Zip::Error => error
|
31
|
-
|
32
|
-
no_entry.set_backtrace error.backtrace
|
33
|
-
raise no_entry
|
29
|
+
raise NoEntry.from_error(error)
|
34
30
|
ensure
|
35
31
|
@archive = nil
|
36
32
|
end
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/searcher.rb
CHANGED
@@ -5,8 +5,12 @@ require 'epub/searcher/xhtml'
|
|
5
5
|
module EPUB
|
6
6
|
module Searcher
|
7
7
|
class << self
|
8
|
-
def
|
9
|
-
Publication.
|
8
|
+
def search_text(epub, word, **options)
|
9
|
+
Publication.search_text(epub.package, word, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def search_element(epub, css: nil, xpath: nil, namespaces: {})
|
13
|
+
Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
@@ -4,29 +4,79 @@ module EPUB
|
|
4
4
|
module Searcher
|
5
5
|
class Publication
|
6
6
|
class << self
|
7
|
-
def
|
8
|
-
new(
|
7
|
+
def search_text(package, word, **options)
|
8
|
+
new(package).search_text(word, options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def search_element(package, css: nil, xpath: nil, namespaces: {})
|
12
|
+
new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
12
|
-
def initialize(
|
13
|
-
@
|
16
|
+
def initialize(package)
|
17
|
+
@package = package
|
14
18
|
end
|
15
19
|
|
16
|
-
def
|
20
|
+
def search_text(word, algorithm: :seamless)
|
17
21
|
results = []
|
18
22
|
|
19
|
-
spine = package.spine
|
23
|
+
spine = @package.spine
|
20
24
|
spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
|
21
25
|
spine.each_itemref.with_index do |itemref, index|
|
22
26
|
itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
|
23
|
-
XHTML::ALGORITHMS[algorithm].
|
27
|
+
XHTML::ALGORITHMS[algorithm].search_text(Nokogiri.XML(itemref.item.read), word).each do |sub_result|
|
24
28
|
results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
28
32
|
results
|
29
33
|
end
|
34
|
+
|
35
|
+
# @todo: Refactoring
|
36
|
+
def search_element(css: nil, xpath: nil, namespaces: {})
|
37
|
+
raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
|
38
|
+
|
39
|
+
namespaces = EPUB::NAMESPACES.merge(namespaces)
|
40
|
+
results = []
|
41
|
+
|
42
|
+
spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
|
43
|
+
@package.spine.each_itemref.with_index do |itemref, index|
|
44
|
+
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
45
|
+
itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
46
|
+
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
|
47
|
+
content_document = itemref.item.content_document
|
48
|
+
next unless content_document
|
49
|
+
doc = content_document.nokogiri
|
50
|
+
elems = if xpath
|
51
|
+
doc.xpath(xpath, namespaces)
|
52
|
+
else
|
53
|
+
doc.css(css)
|
54
|
+
end
|
55
|
+
elems.each do |elem|
|
56
|
+
path = find_path(elem)
|
57
|
+
results << {
|
58
|
+
location: EPUB::CFI::Location.new([path_to_itemref, path]),
|
59
|
+
package: @package,
|
60
|
+
element: elem
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
results
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def find_path(elem)
|
71
|
+
steps = []
|
72
|
+
until elem.parent.document?
|
73
|
+
index = elem.parent.element_children.index(elem)
|
74
|
+
assertion = elem["id"] ? EPUB::CFI::IDAssertion.new(elem["id"]) : nil
|
75
|
+
steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
76
|
+
elem = elem.parent
|
77
|
+
end
|
78
|
+
EPUB::CFI::Path.new(steps)
|
79
|
+
end
|
30
80
|
end
|
31
81
|
end
|
32
82
|
end
|
data/lib/epub/searcher/result.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'epub/cfi'
|
2
|
+
require 'epub/parser/cfi'
|
3
|
+
|
1
4
|
module EPUB
|
2
5
|
module Searcher
|
3
6
|
class Result
|
@@ -9,7 +12,7 @@ module EPUB
|
|
9
12
|
# # Note that c here is not included in the first element of returned value.
|
10
13
|
# @param steps1 [Array<Step>, Array<Array>]
|
11
14
|
# @param steps2 [Array<Step>, Array<Array>]
|
12
|
-
# @return [Array<Array<Array>>]
|
15
|
+
# @return [Array<Array<Array>>] Three arrays:
|
13
16
|
# 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
|
14
17
|
# 2. remaining steps of +steps1+
|
15
18
|
# 3. remaining steps of +steps2+
|
@@ -41,25 +44,11 @@ module EPUB
|
|
41
44
|
@parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
|
42
45
|
end
|
43
46
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
when :element
|
48
|
-
path + '/%s*[%d]' % [with_xmlns ? 'xhtml:' : nil, step.index + 1]
|
49
|
-
when :text
|
50
|
-
path + '/text()[%s]' % [step.index + 1]
|
51
|
-
else
|
52
|
-
path
|
53
|
-
end
|
54
|
-
}
|
55
|
-
|
56
|
-
[xpath, @start_steps.last.index]
|
57
|
-
end
|
58
|
-
|
59
|
-
def to_cfi_s
|
60
|
-
[@parent_steps, @start_steps, @end_steps].collect {|steps|
|
61
|
-
steps ? steps.collect(&:to_cfi_s).join : nil
|
47
|
+
def to_cfi
|
48
|
+
str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
|
49
|
+
steps ? steps.collect(&:to_cfi).join : nil
|
62
50
|
}.compact.join(',')
|
51
|
+
EPUB::CFI(str)
|
63
52
|
end
|
64
53
|
|
65
54
|
def ==(other)
|
@@ -80,7 +69,7 @@ module EPUB
|
|
80
69
|
self.info == other.info
|
81
70
|
end
|
82
71
|
|
83
|
-
def
|
72
|
+
def to_cfi
|
84
73
|
case type
|
85
74
|
when :element
|
86
75
|
'/%d%s' % [(index + 1) * 2, id_assertion]
|
data/lib/epub/searcher/xhtml.rb
CHANGED
@@ -10,8 +10,8 @@ module EPUB
|
|
10
10
|
# @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
|
11
11
|
# @param word [String]
|
12
12
|
# @return [Array<Result>]
|
13
|
-
def
|
14
|
-
new(element.respond_to?(:root) ? element.root : element).
|
13
|
+
def search_text(element, word)
|
14
|
+
new(element.respond_to?(:root) ? element.root : element).search_text(word)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -23,7 +23,7 @@ module EPUB
|
|
23
23
|
class Restricted < self
|
24
24
|
# @param element [Nokogiri::XML::Element]
|
25
25
|
# @return [Array<Result>]
|
26
|
-
def
|
26
|
+
def search_text(word, element=nil)
|
27
27
|
results = []
|
28
28
|
|
29
29
|
elem_index = 0
|
@@ -35,7 +35,7 @@ module EPUB
|
|
35
35
|
results << Result.new([child_step], nil, nil)
|
36
36
|
end
|
37
37
|
else
|
38
|
-
|
38
|
+
search_text(word, child).each do |sub_result|
|
39
39
|
results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
|
40
40
|
end
|
41
41
|
end
|
@@ -62,7 +62,7 @@ module EPUB
|
|
62
62
|
@indices = nil
|
63
63
|
end
|
64
64
|
|
65
|
-
def
|
65
|
+
def search_text(word)
|
66
66
|
unless @indices
|
67
67
|
@indices, @content = build_indices(@element)
|
68
68
|
end
|
data/test/helper.rb
CHANGED
data/test/test_parser_cfi.rb
CHANGED
@@ -18,6 +18,7 @@ class TestParserCFI < Test::Unit::TestCase
|
|
18
18
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[,y])',
|
19
19
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[;s=b])',
|
20
20
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[yyy;s=b])',
|
21
|
+
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[^(;s=b])',
|
21
22
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2[;s=b])',
|
22
23
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:10)',
|
23
24
|
'epubcfi(/6/4[chap01ref]!/4[body01]/16[svgimg])',
|
@@ -25,7 +26,8 @@ class TestParserCFI < Test::Unit::TestCase
|
|
25
26
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:0)',
|
26
27
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3)',
|
27
28
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05],/2/1:1,/3:4)',
|
28
|
-
'epubcfi(/6,:1,:3)'
|
29
|
+
'epubcfi(/6,:1,:3)',
|
30
|
+
'epubcfi(/6/4[chap01ref]!/4[body01]/10[mov01]~23.5@5.75:97.6)'
|
29
31
|
].reduce({}) {|data, cfi|
|
30
32
|
data[cfi] = cfi
|
31
33
|
data
|
@@ -35,4 +37,17 @@ class TestParserCFI < Test::Unit::TestCase
|
|
35
37
|
@parser.parse(cfi)
|
36
38
|
end
|
37
39
|
end
|
40
|
+
|
41
|
+
data([
|
42
|
+
'/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[(;s=b]',
|
43
|
+
'/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[);s=b]'
|
44
|
+
].reduce({}) {|data, cfi|
|
45
|
+
data[cfi] = cfi
|
46
|
+
data
|
47
|
+
})
|
48
|
+
def test_raise_error_on_parsing_invalid_cfi(cfi)
|
49
|
+
assert_raise Racc::ParseError do
|
50
|
+
EPUB::CFI(cfi)
|
51
|
+
end
|
52
|
+
end
|
38
53
|
end
|
data/test/test_searcher.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require_relative 'helper'
|
3
3
|
require 'epub/searcher'
|
4
|
+
require 'epub/parser/cfi'
|
4
5
|
|
5
6
|
class TestSearcher < Test::Unit::TestCase
|
6
7
|
class TestPublication < self
|
@@ -17,7 +18,7 @@ class TestSearcher < Test::Unit::TestCase
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def test_no_result
|
20
|
-
assert_empty EPUB::Searcher::Publication.
|
21
|
+
assert_empty EPUB::Searcher::Publication.search_text(@package, 'no result')
|
21
22
|
end
|
22
23
|
|
23
24
|
def test_simple
|
@@ -26,13 +27,53 @@ class TestSearcher < Test::Unit::TestCase
|
|
26
27
|
[[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 0, {:name => 'head', :id => nil}], [:element, 0, {:name => 'title', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]],
|
27
28
|
[[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 1, {:name => 'body', :id => nil}], [:element, 0, {:name => 'div', :id => nil}], [:element, 0, {:name => 'nav', :id => 'idid'}], [:element, 0, {:name => 'hgroup', :id => nil}], [:element, 1, {:name => 'h1', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]]
|
28
29
|
]),
|
29
|
-
EPUB::Searcher::Publication.
|
30
|
+
EPUB::Searcher::Publication.search_text(@package, 'Content')
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_search_element_xpath_without_namespaces
|
35
|
+
assert_equal(
|
36
|
+
[
|
37
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
|
38
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
|
39
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
|
40
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
|
41
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
|
42
|
+
],
|
43
|
+
EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_search_element_xpath_with_namespaces
|
48
|
+
assert_equal(
|
49
|
+
[
|
50
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
|
51
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
|
52
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
|
53
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
|
54
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
|
55
|
+
],
|
56
|
+
EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_search_element_css_selector
|
61
|
+
assert_equal(
|
62
|
+
[
|
63
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2)",
|
64
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4)",
|
65
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2)",
|
66
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4)",
|
67
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6)",
|
68
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8)"
|
69
|
+
],
|
70
|
+
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
30
71
|
)
|
31
72
|
end
|
32
73
|
|
33
74
|
class TesetResult < self
|
34
|
-
def
|
35
|
-
assert_equal '/6/2!/4/2/2[idid]/2/4/1,:9,:16', EPUB::Searcher::Publication.
|
75
|
+
def test_to_cfi
|
76
|
+
assert_equal 'epubcfi(/6/2!/4/2/2[idid]/2/4/1,:9,:16)', EPUB::Searcher::Publication.search_text(@package, 'Content').last.to_cfi.to_fragment
|
36
77
|
end
|
37
78
|
end
|
38
79
|
end
|
@@ -48,35 +89,35 @@ class TestSearcher < Test::Unit::TestCase
|
|
48
89
|
|
49
90
|
module TestSearch
|
50
91
|
def test_no_result
|
51
|
-
assert_empty @searcher.
|
92
|
+
assert_empty @searcher.search_text(@h1, 'no result')
|
52
93
|
end
|
53
94
|
|
54
95
|
def test_simple
|
55
|
-
assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.
|
96
|
+
assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search_text(@h1, 'Content')
|
56
97
|
end
|
57
98
|
|
58
99
|
def test_multiple_text_result
|
59
|
-
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.
|
100
|
+
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search_text(@h1, 'o')
|
60
101
|
end
|
61
102
|
|
62
103
|
def test_text_after_element
|
63
104
|
elem = Nokogiri.XML('<root><elem>inner</elem>after</root>')
|
64
105
|
|
65
|
-
assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.
|
106
|
+
assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search_text(elem, 'after')
|
66
107
|
end
|
67
108
|
|
68
109
|
def test_entity_reference
|
69
110
|
elem = Nokogiri.XML('<root>before<after</root>')
|
70
111
|
|
71
|
-
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.
|
112
|
+
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search_text(elem, '<')
|
72
113
|
end
|
73
114
|
|
74
115
|
def test_nested_result
|
75
|
-
assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.
|
116
|
+
assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search_text(@nav, '第二節')
|
76
117
|
end
|
77
118
|
|
78
119
|
def test_img
|
79
|
-
assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.
|
120
|
+
assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search_text(@nav, '第三節')
|
80
121
|
end
|
81
122
|
end
|
82
123
|
|
@@ -99,27 +140,22 @@ class TestSearcher < Test::Unit::TestCase
|
|
99
140
|
|
100
141
|
def test_seamless
|
101
142
|
elem = Nokogiri.XML('<root>This <em>includes</em> a child element.</root>')
|
102
|
-
assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.
|
143
|
+
assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search_text(elem, 'This includes a child element.')
|
103
144
|
end
|
104
145
|
end
|
105
146
|
|
106
147
|
class TestResult < self
|
107
148
|
def setup
|
108
149
|
super
|
109
|
-
@result = EPUB::Searcher::XHTML::Restricted.
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_to_xpath_and_offset
|
113
|
-
assert_equal ['./*[2]/*[1]/*[1]/*[2]/*[2]/*[2]/*[2]/*[1]/text()[1]', 0], @result.to_xpath_and_offset
|
114
|
-
assert_equal ['./xhtml:*[2]/xhtml:*[1]/xhtml:*[1]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[1]/text()[1]', 0], @result.to_xpath_and_offset(true)
|
150
|
+
@result = EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第二節').first
|
115
151
|
end
|
116
152
|
|
117
|
-
def
|
118
|
-
assert_equal '/4/2/2[idid]/4/4/4/4/2/1,:0,:3', @result.
|
153
|
+
def test_to_cfi
|
154
|
+
assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/4/2/1,:0,:3)', @result.to_cfi.to_fragment
|
119
155
|
end
|
120
156
|
|
121
|
-
def
|
122
|
-
assert_equal '/4/2/2[idid]/4/4/4/6/2/2', EPUB::Searcher::XHTML::Restricted.
|
157
|
+
def test_to_cfi_img
|
158
|
+
assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/6/2/2)', EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第三節').first.to_cfi.to_fragment
|
123
159
|
end
|
124
160
|
end
|
125
161
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -301,6 +301,7 @@ extra_rdoc_files: []
|
|
301
301
|
files:
|
302
302
|
- ".gemtest"
|
303
303
|
- ".gitignore"
|
304
|
+
- ".gitlab-ci.yml"
|
304
305
|
- ".gitmodules"
|
305
306
|
- ".travis.yml"
|
306
307
|
- ".yardopts"
|
@@ -419,7 +420,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
419
420
|
version: '0'
|
420
421
|
requirements: []
|
421
422
|
rubyforge_project:
|
422
|
-
rubygems_version: 2.
|
423
|
+
rubygems_version: 2.6.8
|
423
424
|
signing_key:
|
424
425
|
specification_version: 4
|
425
426
|
summary: EPUB 3 Parser
|