epub-parser 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.markdown +6 -0
- data/README.markdown +7 -2
- data/epub-parser.gemspec +0 -1
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/searcher.rb +4 -0
- data/lib/epub/searcher/publication.rb +50 -1
- data/test/test_searcher.rb +16 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2347178e16da38f1e13e9886ef7e54ba0357288
|
4
|
+
data.tar.gz: f5b32b2028baca3868b4a7a63925c6a6c446511b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 268416ae749188d638b04fb912dec3b2204f0a721de59072f58f812c365f5339c698769bba021f3547638090178fdd6da593e7f3e04b4e30c7d2e17637099313
|
7
|
+
data.tar.gz: dbf6ca7dafed30e6a950f737f2bec8b96588199a4cac14fd9d9e78398698a80eadcbf8f0e627dcd07fade31b4d133ccfe8fc49c97bbd8e05c170dea741f57692
|
data/CHANGELOG.markdown
CHANGED
data/README.markdown
CHANGED
@@ -138,8 +138,8 @@ REQUIREMENTS
|
|
138
138
|
* `patch` command to install Nokogiri
|
139
139
|
* C compiler to compile Nokogiri
|
140
140
|
|
141
|
-
|
142
|
-
|
141
|
+
Similar Efforts
|
142
|
+
---------------
|
143
143
|
* [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
|
144
144
|
* [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
|
145
145
|
* [ReVIEW](https://github.com/kmuto/review) - ReVIEW is a easy-to-use digital publishing system for books and ebooks.
|
@@ -152,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
|
|
152
152
|
RECENT CHANGES
|
153
153
|
--------------
|
154
154
|
|
155
|
+
### 0.2.9
|
156
|
+
|
157
|
+
* Fix a bug that `Searcher.search_element` returns wrong CFI
|
158
|
+
* Add `Searcher.search_by_cfi`
|
159
|
+
|
155
160
|
### 0.2.8
|
156
161
|
|
157
162
|
* Change Searcher API: #search -> #search_text
|
data/epub-parser.gemspec
CHANGED
@@ -24,7 +24,6 @@ Gem::Specification.new do |s|
|
|
24
24
|
s.test_files = s.files & Dir['{test,spec,features}/**/*.{rb,feature}']
|
25
25
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
26
|
s.require_paths = ["lib"]
|
27
|
-
s.has_rdoc = 'yard'
|
28
27
|
|
29
28
|
s.add_development_dependency 'rake'
|
30
29
|
s.add_development_dependency 'rubygems-tasks'
|
data/lib/epub/parser/version.rb
CHANGED
data/lib/epub/searcher.rb
CHANGED
@@ -12,6 +12,10 @@ module EPUB
|
|
12
12
|
def search_element(epub, css: nil, xpath: nil, namespaces: {})
|
13
13
|
Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
|
14
14
|
end
|
15
|
+
|
16
|
+
def search_by_cfi(epub, cfi)
|
17
|
+
Publication.search_by_cfi(epub.package, cfi)
|
18
|
+
end
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|
@@ -11,6 +11,10 @@ module EPUB
|
|
11
11
|
def search_element(package, css: nil, xpath: nil, namespaces: {})
|
12
12
|
new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
|
13
13
|
end
|
14
|
+
|
15
|
+
def search_by_cfi(package, cfi)
|
16
|
+
new(package).search_by_cfi(cfi)
|
17
|
+
end
|
14
18
|
end
|
15
19
|
|
16
20
|
def initialize(package)
|
@@ -33,13 +37,17 @@ module EPUB
|
|
33
37
|
end
|
34
38
|
|
35
39
|
# @todo: Refactoring
|
40
|
+
# @return [Array<Hash>] An array of rearch results. Each result is composed of:
|
41
|
+
# :element: [Nokogiri::XML::ELement] Found element
|
42
|
+
# :location: [EPUB::CFI::Location] CFI that indicates the element
|
43
|
+
# :package: [EPUB::Publication::Package] Package that the element belongs to
|
36
44
|
def search_element(css: nil, xpath: nil, namespaces: {})
|
37
45
|
raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
|
38
46
|
|
39
47
|
namespaces = EPUB::NAMESPACES.merge(namespaces)
|
40
48
|
results = []
|
41
49
|
|
42
|
-
spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
|
50
|
+
spine_step = EPUB::CFI::Step.new((EPUB::Publication::Package::CONTENT_MODELS.index(:spine) + 1) * 2)
|
43
51
|
@package.spine.each_itemref.with_index do |itemref, index|
|
44
52
|
assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
|
45
53
|
itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
|
@@ -65,6 +73,47 @@ module EPUB
|
|
65
73
|
results
|
66
74
|
end
|
67
75
|
|
76
|
+
# @note Currenty can handle only location CFI without offset
|
77
|
+
# @todo Use XHTML module
|
78
|
+
# @todo Handle CFI with offset
|
79
|
+
# @todo Handle range CFI
|
80
|
+
# @param [EPUB::CFI] cfi
|
81
|
+
# @return [Array] Path in EPUB Rendition
|
82
|
+
def search_by_cfi(cfi)
|
83
|
+
# steal from pirka's find_item_and_element
|
84
|
+
path_in_package = cfi.paths.first
|
85
|
+
spine = @package.spine
|
86
|
+
model = [@package.metadata, @package.manifest, spine, @package.guide, @package.bindings].compact[path_in_package.steps.first.value / 2 - 1]
|
87
|
+
raise NotImplementedError, "Currently, #{__method__} supports spine only(#{cfi})" unless model == spine
|
88
|
+
raise ArgumentError, "Cannot identify <itemref>'s child" if path_in_package.steps.length > 2
|
89
|
+
|
90
|
+
step_to_itemref = path_in_package.steps[1]
|
91
|
+
itemref = spine.itemrefs[step_to_itemref.value / 2 - 1]
|
92
|
+
|
93
|
+
doc = itemref.item.content_document.nokogiri
|
94
|
+
path_in_doc = cfi.paths[1]
|
95
|
+
current_node = doc.root
|
96
|
+
path_in_doc.steps.each do |step|
|
97
|
+
if step.element?
|
98
|
+
current_node = current_node.element_children[step.value / 2 - 1]
|
99
|
+
else
|
100
|
+
element_index = (step.value - 1) / 2 - 1
|
101
|
+
if element_index == -1
|
102
|
+
current_node = current_node.children.first
|
103
|
+
else
|
104
|
+
prev = current_node.element_children[element_index]
|
105
|
+
break unless prev
|
106
|
+
current_node = prev.next_sibling
|
107
|
+
break unless current_node
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
raise NotImplementedError, "Currently, #{__method__} doesn't support deeper DOM tree such as including <iframe>" if cfi.paths[2]
|
113
|
+
|
114
|
+
[itemref, current_node]
|
115
|
+
end
|
116
|
+
|
68
117
|
private
|
69
118
|
|
70
119
|
def find_path(elem)
|
data/test/test_searcher.rb
CHANGED
@@ -34,11 +34,11 @@ class TestSearcher < Test::Unit::TestCase
|
|
34
34
|
def test_search_element_xpath_without_namespaces
|
35
35
|
assert_equal(
|
36
36
|
[
|
37
|
-
"epubcfi(/
|
38
|
-
"epubcfi(/
|
39
|
-
"epubcfi(/
|
40
|
-
"epubcfi(/
|
41
|
-
"epubcfi(/
|
37
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2/2)",
|
38
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2/2)",
|
39
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4/2)",
|
40
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6/2)",
|
41
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8/2)"
|
42
42
|
],
|
43
43
|
EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
|
44
44
|
)
|
@@ -47,11 +47,11 @@ class TestSearcher < Test::Unit::TestCase
|
|
47
47
|
def test_search_element_xpath_with_namespaces
|
48
48
|
assert_equal(
|
49
49
|
[
|
50
|
-
"epubcfi(/
|
51
|
-
"epubcfi(/
|
52
|
-
"epubcfi(/
|
53
|
-
"epubcfi(/
|
54
|
-
"epubcfi(/
|
50
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2/2)",
|
51
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2/2)",
|
52
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4/2)",
|
53
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6/2)",
|
54
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8/2)"
|
55
55
|
],
|
56
56
|
EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
|
57
57
|
)
|
@@ -60,12 +60,12 @@ class TestSearcher < Test::Unit::TestCase
|
|
60
60
|
def test_search_element_css_selector
|
61
61
|
assert_equal(
|
62
62
|
[
|
63
|
-
"epubcfi(/
|
64
|
-
"epubcfi(/
|
65
|
-
"epubcfi(/
|
66
|
-
"epubcfi(/
|
67
|
-
"epubcfi(/
|
68
|
-
"epubcfi(/
|
63
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/2)",
|
64
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4)",
|
65
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/2)",
|
66
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/4)",
|
67
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/6)",
|
68
|
+
"epubcfi(/6/2!/4/2/2[idid]/4/4/4/8)"
|
69
69
|
],
|
70
70
|
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
71
71
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|