epub-parser 0.2.8 → 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.markdown +6 -0
- data/README.markdown +7 -2
- data/epub-parser.gemspec +0 -1
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/searcher.rb +4 -0
- data/lib/epub/searcher/publication.rb +50 -1
- data/test/test_searcher.rb +16 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2347178e16da38f1e13e9886ef7e54ba0357288
|
4
|
+
data.tar.gz: f5b32b2028baca3868b4a7a63925c6a6c446511b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 268416ae749188d638b04fb912dec3b2204f0a721de59072f58f812c365f5339c698769bba021f3547638090178fdd6da593e7f3e04b4e30c7d2e17637099313
|
7
|
+
data.tar.gz: dbf6ca7dafed30e6a950f737f2bec8b96588199a4cac14fd9d9e78398698a80eadcbf8f0e627dcd07fade31b4d133ccfe8fc49c97bbd8e05c170dea741f57692
|
data/CHANGELOG.markdown
CHANGED
data/README.markdown
CHANGED
@@ -138,8 +138,8 @@ REQUIREMENTS
|
|
138
138
|
* `patch` command to install Nokogiri
|
139
139
|
* C compiler to compile Nokogiri
|
140
140
|
|
141
|
-
|
142
|
-
|
141
|
+
Similar Efforts
|
142
|
+
---------------
|
143
143
|
* [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
|
144
144
|
* [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
|
145
145
|
* [ReVIEW](https://github.com/kmuto/review) - ReVIEW is a easy-to-use digital publishing system for books and ebooks.
|
@@ -152,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
|
|
152
152
|
RECENT CHANGES
|
153
153
|
--------------
|
154
154
|
|
155
|
+
### 0.2.9
|
156
|
+
|
157
|
+
* Fix a bug that `Searcher.search_element` returns wrong CFI
|
158
|
+
* Add `Searcher.search_by_cfi`
|
159
|
+
|
155
160
|
### 0.2.8
|
156
161
|
|
157
162
|
* Change Searcher API: #search -> #search_text
|
data/epub-parser.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
|
25
25
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
26
|
s.require_paths = ["lib"]
|
27
|
-
s.has_rdoc = 'yard'
|
28
27
|
|
29
28
|
s.add_development_dependency 'rake'
|
30
29
|
s.add_development_dependency 'rubygems-tasks'
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/searcher.rb
CHANGED
@@ -12,6 +12,10 @@ module EPUB
|
|
12
12
|
def search_element(epub, css: nil, xpath: nil, namespaces: {})
|
13
13
|
Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
|
14
14
|
end
|
15
|
+
|
16
|
+
def search_by_cfi(epub, cfi)
|
17
|
+
Publication.search_by_cfi(epub.package, cfi)
|
18
|
+
end
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|
@@ -11,6 +11,10 @@ module EPUB
|
|
11
11
|
def search_element(package, css: nil, xpath: nil, namespaces: {})
|
12
12
|
new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
|
13
13
|
end
|
14
|
+
|
15
|
+
def search_by_cfi(package, cfi)
|
16
|
+
new(package).search_by_cfi(cfi)
|
17
|
+
end
|
14
18
|
end
|
15
19
|
|
16
20
|
def initialize(package)
|
@@ -33,13 +37,17 @@ module EPUB
|
|
33
37
|
end
|
34
38
|
|
35
39
|
# @todo: Refactoring
|
40
|
+
# @return [Array<Hash>] An array of rearch results. Each result is composed of:
|
41
|
+
# :element: [Nokogiri::XML::ELement] Found element
|
42
|
+
# :location: [EPUB::CFI::Location] CFI that indicates the element
|
43
|
+
# :package: [EPUB::Publication::Package] Package that the element belongs to
|
36
44
|
def search_element(css: nil, xpath: nil, namespaces: {})
|
37
45
|
raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
|
38
46
|
|
39
47
|
namespaces = EPUB::NAMESPACES.merge(namespaces)
|
40
48
|
results = []
|
41
49
|
|
42
|
-
spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
|
50
|
+
spine_step = EPUB::CFI::Step.new((EPUB::Publication::Package::CONTENT_MODELS.index(:spine) + 1) * 2)
|
43
51
|
@package.spine.each_itemref.with_index do |itemref, index|
|
44
52
|
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
45
53
|
itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
@@ -65,6 +73,47 @@ module EPUB
|
|
65
73
|
results
|
66
74
|
end
|
67
75
|
|
76
|
+
# @note Currenty can handle only location CFI without offset
|
77
|
+
# @todo Use XHTML module
|
78
|
+
# @todo Handle CFI with offset
|
79
|
+
# @todo Handle range CFI
|
80
|
+
# @param [EPUB::CFI] cfi
|
81
|
+
# @return [Array] Path in EPUB Rendition
|
82
|
+
def search_by_cfi(cfi)
|
83
|
+
# steal from pirka's find_item_and_element
|
84
|
+
path_in_package = cfi.paths.first
|
85
|
+
spine = @package.spine
|
86
|
+
model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
|
87
|
+
raise NotImplementedError, "Currently, #{__method__} supports spine only(#{cfi})" unless model == spine
|
88
|
+
raise ArgumentError, "Cannot identify <itemref>'s child" if path_in_package.steps.length > 2
|
89
|
+
|
90
|
+
step_to_itemref = path_in_package.steps[1]
|
91
|
+
itemref = spine.itemrefs[step_to_itemref.value / 2 - 1]
|
92
|
+
|
93
|
+
doc = itemref.item.content_document.nokogiri
|
94
|
+
path_in_doc = cfi.paths[1]
|
95
|
+
current_node = doc.root
|
96
|
+
path_in_doc.steps.each do |step|
|
97
|
+
if step.element?
|
98
|
+
current_node = current_node.element_children[step.value / 2 - 1]
|
99
|
+
else
|
100
|
+
element_index = (step.value - 1) / 2 - 1
|
101
|
+
if element_index == -1
|
102
|
+
current_node = current_node.children.first
|
103
|
+
else
|
104
|
+
prev = current_node.element_children[element_index]
|
105
|
+
break unless prev
|
106
|
+
current_node = prev.next_sibling
|
107
|
+
break unless current_node
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
raise NotImplementedError, "Currently, #{__method__} doesn't support deeper DOM tree such as including <iframe>" if cfi.paths[2]
|
113
|
+
|
114
|
+
[itemref, current_node]
|
115
|
+
end
|
116
|
+
|
68
117
|
private
|
69
118
|
|
70
119
|
def find_path(elem)
|
data/test/test_searcher.rb
CHANGED
@@ -34,11 +34,11 @@ class TestSearcher < Test::Unit::TestCase
|
|
34
34
|
def test_search_element_xpath_without_namespaces
|
35
35
|
assert_equal(
|
36
36
|
[
|
37
|
-
"epubcfi(/
|
38
|
-
"epubcfi(/
|
39
|
-
"epubcfi(/
|
40
|
-
"epubcfi(/
|
41
|
-
"epubcfi(/
|
37
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2/2)",
|
38
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2/2)",
|
39
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4/2)",
|
40
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6/2)",
|
41
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8/2)"
|
42
42
|
],
|
43
43
|
EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
|
44
44
|
)
|
@@ -47,11 +47,11 @@ class TestSearcher < Test::Unit::TestCase
|
|
47
47
|
def test_search_element_xpath_with_namespaces
|
48
48
|
assert_equal(
|
49
49
|
[
|
50
|
-
"epubcfi(/
|
51
|
-
"epubcfi(/
|
52
|
-
"epubcfi(/
|
53
|
-
"epubcfi(/
|
54
|
-
"epubcfi(/
|
50
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2/2)",
|
51
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2/2)",
|
52
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4/2)",
|
53
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6/2)",
|
54
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8/2)"
|
55
55
|
],
|
56
56
|
EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
|
57
57
|
)
|
@@ -60,12 +60,12 @@ class TestSearcher < Test::Unit::TestCase
|
|
60
60
|
def test_search_element_css_selector
|
61
61
|
assert_equal(
|
62
62
|
[
|
63
|
-
"epubcfi(/
|
64
|
-
"epubcfi(/
|
65
|
-
"epubcfi(/
|
66
|
-
"epubcfi(/
|
67
|
-
"epubcfi(/
|
68
|
-
"epubcfi(/
|
63
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2)",
|
64
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4)",
|
65
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2)",
|
66
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4)",
|
67
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6)",
|
68
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8)"
|
69
69
|
],
|
70
70
|
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
71
71
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|