epub-parser 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +19 -0
- data/.travis.yml +1 -1
- data/CHANGELOG.markdown +6 -0
- data/Gemfile +4 -0
- data/README.markdown +8 -7
- data/docs/Home.markdown +1 -1
- data/docs/Searcher.markdown +44 -29
- data/examples/find-elements-and-cfis.rb +0 -4
- data/lib/epub/content_document/navigation.rb +2 -0
- data/lib/epub/ocf/physical_container.rb +9 -1
- data/lib/epub/ocf/physical_container/unpacked_directory.rb +1 -3
- data/lib/epub/ocf/physical_container/unpacked_uri.rb +1 -3
- data/lib/epub/ocf/physical_container/zipruby.rb +2 -6
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +1 -0
- data/lib/epub/searcher.rb +6 -2
- data/lib/epub/searcher/publication.rb +57 -7
- data/lib/epub/searcher/result.rb +9 -20
- data/lib/epub/searcher/xhtml.rb +5 -5
- data/test/helper.rb +1 -0
- data/test/test_parser_cfi.rb +16 -1
- data/test/test_searcher.rb +58 -22
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84aac696d82b7eb55bf80b1448685ac4c210abdd
|
4
|
+
data.tar.gz: a32f0e557b928502e0825cbb6b2d808f0fc9dd1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a28c163db08c10c57f733d6ffc9a68a768b30a5fe84439bff1fbb03941448c822eb916f3338016b1cd8f2a96954f69a775be0f65bb629231767dfb995318934
|
7
|
+
data.tar.gz: f5535095748701b940555ca424f3488b4e7b007c39d6e2688cf16d00394a9e03c2c6f12290c820ca15aee2a11ce0ed43e5745225507c6baab9c2a7e33bf513ab
|
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
before_script:
|
2
|
+
- apt-get update -qq && apt-get install -y zip
|
3
|
+
- ruby -v
|
4
|
+
- which ruby
|
5
|
+
- gem install bundler --no-document
|
6
|
+
- bundle install --jobs=$(nproc) "${FLAGS[@]}"
|
7
|
+
|
8
|
+
test:2.2:
|
9
|
+
image: ruby:2.2
|
10
|
+
script:
|
11
|
+
- bundle exec rake test
|
12
|
+
|
13
|
+
test:2.3:
|
14
|
+
image: ruby:2.3
|
15
|
+
script: bundle exec rake test
|
16
|
+
|
17
|
+
test:2.4:
|
18
|
+
image: ruby:2.4
|
19
|
+
script: bundle exec rake test
|
data/.travis.yml
CHANGED
data/CHANGELOG.markdown
CHANGED
data/Gemfile
CHANGED
data/README.markdown
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
EPUB Parser
|
2
2
|
===========
|
3
3
|
[![Build Status](https://secure.travis-ci.org/KitaitiMakoto/epub-parser.png?branch=master)](http://travis-ci.org/KitaitiMakoto/epub-parser)
|
4
|
+
[![build status](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
4
5
|
[![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
|
5
6
|
[![Gem Version](https://badge.fury.io/rb/epub-parser.svg)](http://badge.fury.io/rb/epub-parser)
|
6
7
|
|
@@ -31,7 +32,7 @@ USAGE
|
|
31
32
|
|
32
33
|
See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
|
33
34
|
|
34
|
-
[rubydoc]: http://rubydoc.info/gems/epub-parser
|
35
|
+
[rubydoc]: http://www.rubydoc.info/gems/epub-parser
|
35
36
|
|
36
37
|
### `epubinfo` command-line tool
|
37
38
|
|
@@ -114,7 +115,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
|
|
114
115
|
|
115
116
|
Or, generating by yardoc command is possible, too:
|
116
117
|
|
117
|
-
$ git clone https://
|
118
|
+
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
118
119
|
$ cd epub-parser
|
119
120
|
$ bundle install --path=deps
|
120
121
|
$ bundle exec rake doc:yard
|
@@ -151,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
|
|
151
152
|
RECENT CHANGES
|
152
153
|
--------------
|
153
154
|
|
155
|
+
### 0.2.8
|
156
|
+
|
157
|
+
* Change Searcher API: #search -> #search_text
|
158
|
+
* Add Searcher.search_element
|
159
|
+
|
154
160
|
### 0.2.7
|
155
161
|
|
156
162
|
* Add `EPUB::Metadata#children`
|
@@ -169,11 +175,6 @@ RECENT CHANGES
|
|
169
175
|
|
170
176
|
[multi-rendition]: http://www.idpf.org/epub/renditions/multiple/
|
171
177
|
|
172
|
-
### 0.2.5
|
173
|
-
|
174
|
-
* [BUG FIX]Don't load Zip/Ruby if unneccessary
|
175
|
-
* Add `EPUB::CFI::PhysicalContainer.find_adapter`
|
176
|
-
|
177
178
|
See {file:CHANGELOG.markdown} for older changelogs and details.
|
178
179
|
|
179
180
|
TODOS
|
data/docs/Home.markdown
CHANGED
@@ -122,7 +122,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
|
|
122
122
|
|
123
123
|
Or, generating yardoc command is possible, too:
|
124
124
|
|
125
|
-
$ git clone https://
|
125
|
+
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
126
126
|
$ cd epub-parser
|
127
127
|
$ bundle install --path=deps
|
128
128
|
$ bundle exec rake doc:yard
|
data/docs/Searcher.markdown
CHANGED
@@ -8,37 +8,37 @@ Searcher
|
|
8
8
|
Example
|
9
9
|
-------
|
10
10
|
|
11
|
-
epub = EPUB::Parser.parse('childrens-literature
|
11
|
+
epub = EPUB::Parser.parse('childrens-literature.epub')
|
12
12
|
search_word = 'INTRODUCTORY'
|
13
|
-
results = EPUB::Searcher.
|
14
|
-
# => [#<EPUB::Searcher::Result:
|
15
|
-
# @end_steps=[#<EPUB::Searcher::Result::Step:
|
13
|
+
results = EPUB::Searcher.search_text(epub, search_word)
|
14
|
+
# => [#<EPUB::Searcher::Result:0x007f80ccde9528
|
15
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9730 @index=12, @info={}, @type=:character>],
|
16
16
|
# @parent_steps=
|
17
|
-
# [#<EPUB::Searcher::Result::Step:
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# @start_steps=[#<EPUB::Searcher::Result::Step:
|
28
|
-
# #<EPUB::Searcher::Result:
|
29
|
-
# @end_steps=[#<EPUB::Searcher::Result::Step:
|
17
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
18
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccf3d3e8 @index=1, @info={:id=>nil}, @type=:itemref>,
|
19
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e88 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
20
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e38 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
|
21
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9de8 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
|
22
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d98 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
|
23
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d48 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
|
24
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9ca8 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
|
25
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9c08 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
|
26
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9bb8 @index=0, @info={}, @type=:text>],
|
27
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9af0 @index=0, @info={}, @type=:character>]>,
|
28
|
+
# #<EPUB::Searcher::Result:0x007f80ccebcb30
|
29
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebcdb0 @index=12, @info={}, @type=:character>],
|
30
30
|
# @parent_steps=
|
31
|
-
# [#<EPUB::Searcher::Result::Step:
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
# @start_steps=[#<EPUB::Searcher::Result::Step:
|
39
|
-
puts results.collect(&:
|
40
|
-
# /6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12
|
41
|
-
# /6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12
|
31
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
32
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde94b0 @index=2, @info={:id=>nil}, @type=:itemref>,
|
33
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd328 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
34
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd2d8 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
|
35
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd260 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
|
36
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd210 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
|
37
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd198 @index=0, @info={}, @type=:text>],
|
38
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebd0d0 @index=0, @info={}, @type=:character>]>]
|
39
|
+
puts results.collect(&:to_cfi).collect(&:to_fragment)
|
40
|
+
# epubcfi(/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12)
|
41
|
+
# epubcfi(/6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12)
|
42
42
|
# => nil
|
43
43
|
|
44
44
|
Search result
|
@@ -91,4 +91,19 @@ because the words 'search' and 'word' are not in the same element.
|
|
91
91
|
|
92
92
|
To use restricted searcher, specify `algorithm` option for `search` method:
|
93
93
|
|
94
|
-
results = EPUB::Searcher.
|
94
|
+
results = EPUB::Searcher.search_text(epub, search_word, algorithm: :restricted)
|
95
|
+
|
96
|
+
Element Searcher
|
97
|
+
----------------
|
98
|
+
|
99
|
+
You can search XHTML elements by CSS selector or XPath.
|
100
|
+
|
101
|
+
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
102
|
+
# => ["epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313])",
|
103
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/2[np-315])",
|
104
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317])",
|
105
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6)",
|
106
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319])",
|
107
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319]/4/2)",
|
108
|
+
# :
|
109
|
+
# :
|
@@ -23,10 +23,6 @@ def main(argv)
|
|
23
23
|
|
24
24
|
epub = EPUB::Parser.parse(epub_path)
|
25
25
|
epub.package.spine.each_itemref.with_index do |itemref, i|
|
26
|
-
itemref_step = {
|
27
|
-
:step => (i + 1) * 2,
|
28
|
-
:id => itemref.id
|
29
|
-
}
|
30
26
|
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
31
27
|
itemref_step = EPUB::CFI::Step.new((i + 1) * 2, assertion)
|
32
28
|
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
|
@@ -7,7 +7,15 @@ module EPUB
|
|
7
7
|
class OCF
|
8
8
|
# @todo: Make thread save
|
9
9
|
class PhysicalContainer
|
10
|
-
class NoEntry < StandardError
|
10
|
+
class NoEntry < StandardError
|
11
|
+
class << self
|
12
|
+
def from_error(error)
|
13
|
+
no_entry = new(error.message)
|
14
|
+
no_entry.set_backtrace error.backtrace
|
15
|
+
no_entry
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
11
19
|
|
12
20
|
@adapter = ArchiveZip
|
13
21
|
|
@@ -9,9 +9,7 @@ module EPUB
|
|
9
9
|
def read(path_name)
|
10
10
|
::File.read(::File.join(@container_path, path_name))
|
11
11
|
rescue ::Errno::ENOENT => error
|
12
|
-
|
13
|
-
no_entry.set_backtrace error.backtrace
|
14
|
-
raise no_entry
|
12
|
+
raise NoEntry.from_error(error)
|
15
13
|
end
|
16
14
|
end
|
17
15
|
end
|
@@ -20,9 +20,7 @@ module EPUB
|
|
20
20
|
def read(path_name)
|
21
21
|
(@container_path + path_name).read
|
22
22
|
rescue ::OpenURI::HTTPError => error
|
23
|
-
|
24
|
-
no_entry.set_backtrace error.backtrace
|
25
|
-
raise no_entry
|
23
|
+
raise NoEntry.from_error(error)
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
@@ -11,9 +11,7 @@ module EPUB
|
|
11
11
|
@archive = archive
|
12
12
|
yield self
|
13
13
|
rescue ::Zip::Error => error
|
14
|
-
|
15
|
-
no_entry.set_backtrace error.backtrace
|
16
|
-
raise no_entry
|
14
|
+
raise NoEntry.from_error(error)
|
17
15
|
ensure
|
18
16
|
@archive = nil
|
19
17
|
end
|
@@ -28,9 +26,7 @@ module EPUB
|
|
28
26
|
open {|container| container.read(path_name)}
|
29
27
|
end
|
30
28
|
rescue ::Zip::Error => error
|
31
|
-
|
32
|
-
no_entry.set_backtrace error.backtrace
|
33
|
-
raise no_entry
|
29
|
+
raise NoEntry.from_error(error)
|
34
30
|
ensure
|
35
31
|
@archive = nil
|
36
32
|
end
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/searcher.rb
CHANGED
@@ -5,8 +5,12 @@ require 'epub/searcher/xhtml'
|
|
5
5
|
module EPUB
|
6
6
|
module Searcher
|
7
7
|
class << self
|
8
|
-
def
|
9
|
-
Publication.
|
8
|
+
def search_text(epub, word, **options)
|
9
|
+
Publication.search_text(epub.package, word, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def search_element(epub, css: nil, xpath: nil, namespaces: {})
|
13
|
+
Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
|
10
14
|
end
|
11
15
|
end
|
12
16
|
end
|
@@ -4,29 +4,79 @@ module EPUB
|
|
4
4
|
module Searcher
|
5
5
|
class Publication
|
6
6
|
class << self
|
7
|
-
def
|
8
|
-
new(
|
7
|
+
def search_text(package, word, **options)
|
8
|
+
new(package).search_text(word, options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def search_element(package, css: nil, xpath: nil, namespaces: {})
|
12
|
+
new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
12
|
-
def initialize(
|
13
|
-
@
|
16
|
+
def initialize(package)
|
17
|
+
@package = package
|
14
18
|
end
|
15
19
|
|
16
|
-
def
|
20
|
+
def search_text(word, algorithm: :seamless)
|
17
21
|
results = []
|
18
22
|
|
19
|
-
spine = package.spine
|
23
|
+
spine = @package.spine
|
20
24
|
spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
|
21
25
|
spine.each_itemref.with_index do |itemref, index|
|
22
26
|
itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
|
23
|
-
XHTML::ALGORITHMS[algorithm].
|
27
|
+
XHTML::ALGORITHMS[algorithm].search_text(Nokogiri.XML(itemref.item.read), word).each do |sub_result|
|
24
28
|
results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
28
32
|
results
|
29
33
|
end
|
34
|
+
|
35
|
+
# @todo: Refactoring
|
36
|
+
def search_element(css: nil, xpath: nil, namespaces: {})
|
37
|
+
raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
|
38
|
+
|
39
|
+
namespaces = EPUB::NAMESPACES.merge(namespaces)
|
40
|
+
results = []
|
41
|
+
|
42
|
+
spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
|
43
|
+
@package.spine.each_itemref.with_index do |itemref, index|
|
44
|
+
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
45
|
+
itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
46
|
+
path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
|
47
|
+
content_document = itemref.item.content_document
|
48
|
+
next unless content_document
|
49
|
+
doc = content_document.nokogiri
|
50
|
+
elems = if xpath
|
51
|
+
doc.xpath(xpath, namespaces)
|
52
|
+
else
|
53
|
+
doc.css(css)
|
54
|
+
end
|
55
|
+
elems.each do |elem|
|
56
|
+
path = find_path(elem)
|
57
|
+
results << {
|
58
|
+
location: EPUB::CFI::Location.new([path_to_itemref, path]),
|
59
|
+
package: @package,
|
60
|
+
element: elem
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
results
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def find_path(elem)
|
71
|
+
steps = []
|
72
|
+
until elem.parent.document?
|
73
|
+
index = elem.parent.element_children.index(elem)
|
74
|
+
assertion = elem["id"] ? EPUB::CFI::IDAssertion.new(elem["id"]) : nil
|
75
|
+
steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
76
|
+
elem = elem.parent
|
77
|
+
end
|
78
|
+
EPUB::CFI::Path.new(steps)
|
79
|
+
end
|
30
80
|
end
|
31
81
|
end
|
32
82
|
end
|
data/lib/epub/searcher/result.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'epub/cfi'
|
2
|
+
require 'epub/parser/cfi'
|
3
|
+
|
1
4
|
module EPUB
|
2
5
|
module Searcher
|
3
6
|
class Result
|
@@ -9,7 +12,7 @@ module EPUB
|
|
9
12
|
# # Note that c here is not included in the first element of returned value.
|
10
13
|
# @param steps1 [Array<Step>, Array<Array>]
|
11
14
|
# @param steps2 [Array<Step>, Array<Array>]
|
12
|
-
# @return [Array<Array<Array>>]
|
15
|
+
# @return [Array<Array<Array>>] Three arrays:
|
13
16
|
# 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
|
14
17
|
# 2. remaining steps of +steps1+
|
15
18
|
# 3. remaining steps of +steps2+
|
@@ -41,25 +44,11 @@ module EPUB
|
|
41
44
|
@parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
|
42
45
|
end
|
43
46
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
when :element
|
48
|
-
path + '/%s*[%d]' % [with_xmlns ? 'xhtml:' : nil, step.index + 1]
|
49
|
-
when :text
|
50
|
-
path + '/text()[%s]' % [step.index + 1]
|
51
|
-
else
|
52
|
-
path
|
53
|
-
end
|
54
|
-
}
|
55
|
-
|
56
|
-
[xpath, @start_steps.last.index]
|
57
|
-
end
|
58
|
-
|
59
|
-
def to_cfi_s
|
60
|
-
[@parent_steps, @start_steps, @end_steps].collect {|steps|
|
61
|
-
steps ? steps.collect(&:to_cfi_s).join : nil
|
47
|
+
def to_cfi
|
48
|
+
str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
|
49
|
+
steps ? steps.collect(&:to_cfi).join : nil
|
62
50
|
}.compact.join(',')
|
51
|
+
EPUB::CFI(str)
|
63
52
|
end
|
64
53
|
|
65
54
|
def ==(other)
|
@@ -80,7 +69,7 @@ module EPUB
|
|
80
69
|
self.info == other.info
|
81
70
|
end
|
82
71
|
|
83
|
-
def
|
72
|
+
def to_cfi
|
84
73
|
case type
|
85
74
|
when :element
|
86
75
|
'/%d%s' % [(index + 1) * 2, id_assertion]
|
data/lib/epub/searcher/xhtml.rb
CHANGED
@@ -10,8 +10,8 @@ module EPUB
|
|
10
10
|
# @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
|
11
11
|
# @param word [String]
|
12
12
|
# @return [Array<Result>]
|
13
|
-
def
|
14
|
-
new(element.respond_to?(:root) ? element.root : element).
|
13
|
+
def search_text(element, word)
|
14
|
+
new(element.respond_to?(:root) ? element.root : element).search_text(word)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -23,7 +23,7 @@ module EPUB
|
|
23
23
|
class Restricted < self
|
24
24
|
# @param element [Nokogiri::XML::Element]
|
25
25
|
# @return [Array<Result>]
|
26
|
-
def
|
26
|
+
def search_text(word, element=nil)
|
27
27
|
results = []
|
28
28
|
|
29
29
|
elem_index = 0
|
@@ -35,7 +35,7 @@ module EPUB
|
|
35
35
|
results << Result.new([child_step], nil, nil)
|
36
36
|
end
|
37
37
|
else
|
38
|
-
|
38
|
+
search_text(word, child).each do |sub_result|
|
39
39
|
results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
|
40
40
|
end
|
41
41
|
end
|
@@ -62,7 +62,7 @@ module EPUB
|
|
62
62
|
@indices = nil
|
63
63
|
end
|
64
64
|
|
65
|
-
def
|
65
|
+
def search_text(word)
|
66
66
|
unless @indices
|
67
67
|
@indices, @content = build_indices(@element)
|
68
68
|
end
|
data/test/helper.rb
CHANGED
data/test/test_parser_cfi.rb
CHANGED
@@ -18,6 +18,7 @@ class TestParserCFI < Test::Unit::TestCase
|
|
18
18
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[,y])',
|
19
19
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[;s=b])',
|
20
20
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[yyy;s=b])',
|
21
|
+
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[^(;s=b])',
|
21
22
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2[;s=b])',
|
22
23
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:10)',
|
23
24
|
'epubcfi(/6/4[chap01ref]!/4[body01]/16[svgimg])',
|
@@ -25,7 +26,8 @@ class TestParserCFI < Test::Unit::TestCase
|
|
25
26
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:0)',
|
26
27
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3)',
|
27
28
|
'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05],/2/1:1,/3:4)',
|
28
|
-
'epubcfi(/6,:1,:3)'
|
29
|
+
'epubcfi(/6,:1,:3)',
|
30
|
+
'epubcfi(/6/4[chap01ref]!/4[body01]/10[mov01]~23.5@5.75:97.6)'
|
29
31
|
].reduce({}) {|data, cfi|
|
30
32
|
data[cfi] = cfi
|
31
33
|
data
|
@@ -35,4 +37,17 @@ class TestParserCFI < Test::Unit::TestCase
|
|
35
37
|
@parser.parse(cfi)
|
36
38
|
end
|
37
39
|
end
|
40
|
+
|
41
|
+
data([
|
42
|
+
'/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[(;s=b]',
|
43
|
+
'/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[);s=b]'
|
44
|
+
].reduce({}) {|data, cfi|
|
45
|
+
data[cfi] = cfi
|
46
|
+
data
|
47
|
+
})
|
48
|
+
def test_raise_error_on_parsing_invalid_cfi(cfi)
|
49
|
+
assert_raise Racc::ParseError do
|
50
|
+
EPUB::CFI(cfi)
|
51
|
+
end
|
52
|
+
end
|
38
53
|
end
|
data/test/test_searcher.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require_relative 'helper'
|
3
3
|
require 'epub/searcher'
|
4
|
+
require 'epub/parser/cfi'
|
4
5
|
|
5
6
|
class TestSearcher < Test::Unit::TestCase
|
6
7
|
class TestPublication < self
|
@@ -17,7 +18,7 @@ class TestSearcher < Test::Unit::TestCase
|
|
17
18
|
end
|
18
19
|
|
19
20
|
def test_no_result
|
20
|
-
assert_empty EPUB::Searcher::Publication.
|
21
|
+
assert_empty EPUB::Searcher::Publication.search_text(@package, 'no result')
|
21
22
|
end
|
22
23
|
|
23
24
|
def test_simple
|
@@ -26,13 +27,53 @@ class TestSearcher < Test::Unit::TestCase
|
|
26
27
|
[[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 0, {:name => 'head', :id => nil}], [:element, 0, {:name => 'title', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]],
|
27
28
|
[[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 1, {:name => 'body', :id => nil}], [:element, 0, {:name => 'div', :id => nil}], [:element, 0, {:name => 'nav', :id => 'idid'}], [:element, 0, {:name => 'hgroup', :id => nil}], [:element, 1, {:name => 'h1', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]]
|
28
29
|
]),
|
29
|
-
EPUB::Searcher::Publication.
|
30
|
+
EPUB::Searcher::Publication.search_text(@package, 'Content')
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_search_element_xpath_without_namespaces
|
35
|
+
assert_equal(
|
36
|
+
[
|
37
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
|
38
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
|
39
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
|
40
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
|
41
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
|
42
|
+
],
|
43
|
+
EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_search_element_xpath_with_namespaces
|
48
|
+
assert_equal(
|
49
|
+
[
|
50
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
|
51
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
|
52
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
|
53
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
|
54
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
|
55
|
+
],
|
56
|
+
EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
|
57
|
+
)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_search_element_css_selector
|
61
|
+
assert_equal(
|
62
|
+
[
|
63
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/2)",
|
64
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4)",
|
65
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/2)",
|
66
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/4)",
|
67
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/6)",
|
68
|
+
"epubcfi(/4/2!/4/2/2[idid]/4/4/4/8)"
|
69
|
+
],
|
70
|
+
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
30
71
|
)
|
31
72
|
end
|
32
73
|
|
33
74
|
class TesetResult < self
|
34
|
-
def
|
35
|
-
assert_equal '/6/2!/4/2/2[idid]/2/4/1,:9,:16', EPUB::Searcher::Publication.
|
75
|
+
def test_to_cfi
|
76
|
+
assert_equal 'epubcfi(/6/2!/4/2/2[idid]/2/4/1,:9,:16)', EPUB::Searcher::Publication.search_text(@package, 'Content').last.to_cfi.to_fragment
|
36
77
|
end
|
37
78
|
end
|
38
79
|
end
|
@@ -48,35 +89,35 @@ class TestSearcher < Test::Unit::TestCase
|
|
48
89
|
|
49
90
|
module TestSearch
|
50
91
|
def test_no_result
|
51
|
-
assert_empty @searcher.
|
92
|
+
assert_empty @searcher.search_text(@h1, 'no result')
|
52
93
|
end
|
53
94
|
|
54
95
|
def test_simple
|
55
|
-
assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.
|
96
|
+
assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search_text(@h1, 'Content')
|
56
97
|
end
|
57
98
|
|
58
99
|
def test_multiple_text_result
|
59
|
-
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.
|
100
|
+
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search_text(@h1, 'o')
|
60
101
|
end
|
61
102
|
|
62
103
|
def test_text_after_element
|
63
104
|
elem = Nokogiri.XML('<root><elem>inner</elem>after</root>')
|
64
105
|
|
65
|
-
assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.
|
106
|
+
assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search_text(elem, 'after')
|
66
107
|
end
|
67
108
|
|
68
109
|
def test_entity_reference
|
69
110
|
elem = Nokogiri.XML('<root>before<after</root>')
|
70
111
|
|
71
|
-
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.
|
112
|
+
assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search_text(elem, '<')
|
72
113
|
end
|
73
114
|
|
74
115
|
def test_nested_result
|
75
|
-
assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.
|
116
|
+
assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search_text(@nav, '第二節')
|
76
117
|
end
|
77
118
|
|
78
119
|
def test_img
|
79
|
-
assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.
|
120
|
+
assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search_text(@nav, '第三節')
|
80
121
|
end
|
81
122
|
end
|
82
123
|
|
@@ -99,27 +140,22 @@ class TestSearcher < Test::Unit::TestCase
|
|
99
140
|
|
100
141
|
def test_seamless
|
101
142
|
elem = Nokogiri.XML('<root>This <em>includes</em> a child element.</root>')
|
102
|
-
assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.
|
143
|
+
assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search_text(elem, 'This includes a child element.')
|
103
144
|
end
|
104
145
|
end
|
105
146
|
|
106
147
|
class TestResult < self
|
107
148
|
def setup
|
108
149
|
super
|
109
|
-
@result = EPUB::Searcher::XHTML::Restricted.
|
110
|
-
end
|
111
|
-
|
112
|
-
def test_to_xpath_and_offset
|
113
|
-
assert_equal ['./*[2]/*[1]/*[1]/*[2]/*[2]/*[2]/*[2]/*[1]/text()[1]', 0], @result.to_xpath_and_offset
|
114
|
-
assert_equal ['./xhtml:*[2]/xhtml:*[1]/xhtml:*[1]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[1]/text()[1]', 0], @result.to_xpath_and_offset(true)
|
150
|
+
@result = EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第二節').first
|
115
151
|
end
|
116
152
|
|
117
|
-
def
|
118
|
-
assert_equal '/4/2/2[idid]/4/4/4/4/2/1,:0,:3', @result.
|
153
|
+
def test_to_cfi
|
154
|
+
assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/4/2/1,:0,:3)', @result.to_cfi.to_fragment
|
119
155
|
end
|
120
156
|
|
121
|
-
def
|
122
|
-
assert_equal '/4/2/2[idid]/4/4/4/6/2/2', EPUB::Searcher::XHTML::Restricted.
|
157
|
+
def test_to_cfi_img
|
158
|
+
assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/6/2/2)', EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第三節').first.to_cfi.to_fragment
|
123
159
|
end
|
124
160
|
end
|
125
161
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -301,6 +301,7 @@ extra_rdoc_files: []
|
|
301
301
|
files:
|
302
302
|
- ".gemtest"
|
303
303
|
- ".gitignore"
|
304
|
+
- ".gitlab-ci.yml"
|
304
305
|
- ".gitmodules"
|
305
306
|
- ".travis.yml"
|
306
307
|
- ".yardopts"
|
@@ -419,7 +420,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
419
420
|
version: '0'
|
420
421
|
requirements: []
|
421
422
|
rubyforge_project:
|
422
|
-
rubygems_version: 2.
|
423
|
+
rubygems_version: 2.6.8
|
423
424
|
signing_key:
|
424
425
|
specification_version: 4
|
425
426
|
summary: EPUB 3 Parser
|