epub-parser 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e92165c76652996a441e9996bb9bdee8bb0e7b04
4
- data.tar.gz: c7a62b70d282f9b8343c0b850db21d7f078961d9
3
+ metadata.gz: 84aac696d82b7eb55bf80b1448685ac4c210abdd
4
+ data.tar.gz: a32f0e557b928502e0825cbb6b2d808f0fc9dd1d
5
5
  SHA512:
6
- metadata.gz: f56160b8148faf112e6ff8941a380223f1ce4d0ad114b2375f8e9793fe4eb0d7d9e18dda540f36c1ce702c90c7791006f5fa65c270498d5d6e5b341862e6aa37
7
- data.tar.gz: f139ae34069e6bcfe85d23ae5b4db32cecd99ae90cfc1fbe12ba1b2541f31d93329a6215cf099074b0fc0a3b03996ea6fca2508ad9b3165f27522be5fb8c759e
6
+ metadata.gz: 5a28c163db08c10c57f733d6ffc9a68a768b30a5fe84439bff1fbb03941448c822eb916f3338016b1cd8f2a96954f69a775be0f65bb629231767dfb995318934
7
+ data.tar.gz: f5535095748701b940555ca424f3488b4e7b007c39d6e2688cf16d00394a9e03c2c6f12290c820ca15aee2a11ce0ed43e5745225507c6baab9c2a7e33bf513ab
@@ -0,0 +1,19 @@
1
+ before_script:
2
+ - apt-get update -qq && apt-get install -y zip
3
+ - ruby -v
4
+ - which ruby
5
+ - gem install bundler --no-document
6
+ - bundle install --jobs=$(nproc) "${FLAGS[@]}"
7
+
8
+ test:2.2:
9
+ image: ruby:2.2
10
+ script:
11
+ - bundle exec rake test
12
+
13
+ test:2.3:
14
+ image: ruby:2.3
15
+ script: bundle exec rake test
16
+
17
+ test:2.4:
18
+ image: ruby:2.4
19
+ script: bundle exec rake test
@@ -1,4 +1,4 @@
1
1
  rvm:
2
- - "2.1.10"
3
2
  - "2.2.5"
4
3
  - "2.3.1"
4
+ - "2.4.0"
@@ -1,6 +1,12 @@
1
1
  CHANGELOG
2
2
  =========
3
3
 
4
+ 0.2.8
5
+ -----
6
+
7
+ * Change Searcher API: #search -> #search_text
8
+ * Add Searcher.search_element
9
+
4
10
  0.2.7
5
11
  -----
6
12
 
data/Gemfile CHANGED
@@ -1,2 +1,6 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
+
4
+ if RUBY_PLATFORM.match /darwin/
5
+ gem 'terminal-notifier'
6
+ end
@@ -1,6 +1,7 @@
1
1
  EPUB Parser
2
2
  ===========
3
3
  [![Build Status](https://secure.travis-ci.org/KitaitiMakoto/epub-parser.png?branch=master)](http://travis-ci.org/KitaitiMakoto/epub-parser)
4
+ [![build status](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
4
5
  [![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
5
6
  [![Gem Version](https://badge.fury.io/rb/epub-parser.svg)](http://badge.fury.io/rb/epub-parser)
6
7
 
@@ -31,7 +32,7 @@ USAGE
31
32
 
32
33
  See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
33
34
 
34
- [rubydoc]: http://rubydoc.info/gems/epub-parser
35
+ [rubydoc]: http://www.rubydoc.info/gems/epub-parser
35
36
 
36
37
  ### `epubinfo` command-line tool
37
38
 
@@ -114,7 +115,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
114
115
 
115
116
  Or, generating by yardoc command is possible, too:
116
117
 
117
- $ git clone https://github.com/KitaitiMakoto/epub-parser.git
118
+ $ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
118
119
  $ cd epub-parser
119
120
  $ bundle install --path=deps
120
121
  $ bundle exec rake doc:yard
@@ -151,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
151
152
  RECENT CHANGES
152
153
  --------------
153
154
 
155
+ ### 0.2.8
156
+
157
+ * Change Searcher API: #search -> #search_text
158
+ * Add Searcher.search_element
159
+
154
160
  ### 0.2.7
155
161
 
156
162
  * Add `EPUB::Metadata#children`
@@ -169,11 +175,6 @@ RECENT CHANGES
169
175
 
170
176
  [multi-rendition]: http://www.idpf.org/epub/renditions/multiple/
171
177
 
172
- ### 0.2.5
173
-
174
- * [BUG FIX]Don't load Zip/Ruby if unneccessary
175
- * Add `EPUB::CFI::PhysicalContainer.find_adapter`
176
-
177
178
  See {file:CHANGELOG.markdown} for older changelogs and details.
178
179
 
179
180
  TODOS
@@ -122,7 +122,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
122
122
 
123
123
  Or, generating yardoc command is possible, too:
124
124
 
125
- $ git clone https://github.com/KitaitiMakoto/epub-parser.git
125
+ $ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
126
126
  $ cd epub-parser
127
127
  $ bundle install --path=deps
128
128
  $ bundle exec rake doc:yard
@@ -8,37 +8,37 @@ Searcher
8
8
  Example
9
9
  -------
10
10
 
11
- epub = EPUB::Parser.parse('childrens-literature-20130206.epub')
11
+ epub = EPUB::Parser.parse('childrens-literature.epub')
12
12
  search_word = 'INTRODUCTORY'
13
- results = EPUB::Searcher.search(epub, search_word)
14
- # => [#<EPUB::Searcher::Result:0x007f938ed517a8
15
- # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f938ed51a50 @index=12, @info={}, @type=:character>],
13
+ results = EPUB::Searcher.search_text(epub, search_word)
14
+ # => [#<EPUB::Searcher::Result:0x007f80ccde9528
15
+ # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9730 @index=12, @info={}, @type=:character>],
16
16
  # @parent_steps=
17
- # [#<EPUB::Searcher::Result::Step:0x007f938f1c1e78 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
18
- # #<EPUB::Searcher::Result::Step:0x007f938f1caa78 @index=1, @info={:id=>nil}, @type=:itemref>,
19
- # #<EPUB::Searcher::Result::Step:0x007f938ed521d0 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
20
- # #<EPUB::Searcher::Result::Step:0x007f938ed52158 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
21
- # #<EPUB::Searcher::Result::Step:0x007f938ed52108 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
22
- # #<EPUB::Searcher::Result::Step:0x007f938ed52090 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
23
- # #<EPUB::Searcher::Result::Step:0x007f938ed52040 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
24
- # #<EPUB::Searcher::Result::Step:0x007f938ed51ff0 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
25
- # #<EPUB::Searcher::Result::Step:0x007f938ed51f78 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
26
- # #<EPUB::Searcher::Result::Step:0x007f938ed51f28 @index=0, @info={}, @type=:text>],
27
- # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f938ed51e88 @index=0, @info={}, @type=:character>]>,
28
- # #<EPUB::Searcher::Result:0x007f938ef8f5d8
29
- # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f938ef8f808 @index=12, @info={}, @type=:character>],
17
+ # [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
18
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccf3d3e8 @index=1, @info={:id=>nil}, @type=:itemref>,
19
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9e88 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
20
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9e38 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
21
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9de8 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
22
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9d98 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
23
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9d48 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
24
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9ca8 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
25
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9c08 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
26
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9bb8 @index=0, @info={}, @type=:text>],
27
+ # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9af0 @index=0, @info={}, @type=:character>]>,
28
+ # #<EPUB::Searcher::Result:0x007f80ccebcb30
29
+ # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebcdb0 @index=12, @info={}, @type=:character>],
30
30
  # @parent_steps=
31
- # [#<EPUB::Searcher::Result::Step:0x007f938f1c1e78 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
32
- # #<EPUB::Searcher::Result::Step:0x007f938ed51730 @index=2, @info={:id=>nil}, @type=:itemref>,
33
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fce0 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
34
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fc90 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
35
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fc40 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
36
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fbf0 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
37
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fb28 @index=0, @info={}, @type=:text>],
38
- # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f938ef8fa88 @index=0, @info={}, @type=:character>]>]
39
- puts results.collect(&:to_cfi_s)
40
- # /6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12
41
- # /6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12
31
+ # [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
32
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde94b0 @index=2, @info={:id=>nil}, @type=:itemref>,
33
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd328 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
34
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd2d8 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
35
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd260 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
36
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd210 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
37
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd198 @index=0, @info={}, @type=:text>],
38
+ # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebd0d0 @index=0, @info={}, @type=:character>]>]
39
+ puts results.collect(&:to_cfi).collect(&:to_fragment)
40
+ # epubcfi(/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12)
41
+ # epubcfi(/6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12)
42
42
  # => nil
43
43
 
44
44
  Search result
@@ -91,4 +91,19 @@ because the words 'search' and 'word' are not in the same element.
91
91
 
92
92
  To use restricted searcher, specify `algorithm` option for `search` method:
93
93
 
94
- results = EPUB::Searcher.search(epub, search_word, algorithm: :restricted)
94
+ results = EPUB::Searcher.search_text(epub, search_word, algorithm: :restricted)
95
+
96
+ Element Searcher
97
+ ----------------
98
+
99
+ You can search XHTML elements by CSS selector or XPath.
100
+
101
+ EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
102
+ # => ["epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313])",
103
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/2[np-315])",
104
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317])",
105
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6)",
106
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319])",
107
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319]/4/2)",
108
+ # :
109
+ # :
@@ -23,10 +23,6 @@ def main(argv)
23
23
 
24
24
  epub = EPUB::Parser.parse(epub_path)
25
25
  epub.package.spine.each_itemref.with_index do |itemref, i|
26
- itemref_step = {
27
- :step => (i + 1) * 2,
28
- :id => itemref.id
29
- }
30
26
  assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
31
27
  itemref_step = EPUB::CFI::Step.new((i + 1) * 2, assertion)
32
28
  path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
@@ -5,6 +5,8 @@ module EPUB
5
5
 
6
6
  def initialize
7
7
  @navigations = []
8
+ @hidden = false
9
+ @parent = nil
8
10
  super
9
11
  end
10
12
 
@@ -7,7 +7,15 @@ module EPUB
7
7
  class OCF
8
8
  # @todo: Make thread save
9
9
  class PhysicalContainer
10
- class NoEntry < StandardError; end
10
+ class NoEntry < StandardError
11
+ class << self
12
+ def from_error(error)
13
+ no_entry = new(error.message)
14
+ no_entry.set_backtrace error.backtrace
15
+ no_entry
16
+ end
17
+ end
18
+ end
11
19
 
12
20
  @adapter = ArchiveZip
13
21
 
@@ -9,9 +9,7 @@ module EPUB
9
9
  def read(path_name)
10
10
  ::File.read(::File.join(@container_path, path_name))
11
11
  rescue ::Errno::ENOENT => error
12
- no_entry = NoEntry.new(error.message)
13
- no_entry.set_backtrace error.backtrace
14
- raise no_entry
12
+ raise NoEntry.from_error(error)
15
13
  end
16
14
  end
17
15
  end
@@ -20,9 +20,7 @@ module EPUB
20
20
  def read(path_name)
21
21
  (@container_path + path_name).read
22
22
  rescue ::OpenURI::HTTPError => error
23
- no_entry = NoEntry.new(error.message)
24
- no_entry.set_backtrace error.backtrace
25
- raise no_entry
23
+ raise NoEntry.from_error(error)
26
24
  end
27
25
  end
28
26
  end
@@ -11,9 +11,7 @@ module EPUB
11
11
  @archive = archive
12
12
  yield self
13
13
  rescue ::Zip::Error => error
14
- no_entry = NoEntry.new(error.message)
15
- no_entry.set_backtrace error.backtrace
16
- raise no_entry
14
+ raise NoEntry.from_error(error)
17
15
  ensure
18
16
  @archive = nil
19
17
  end
@@ -28,9 +26,7 @@ module EPUB
28
26
  open {|container| container.read(path_name)}
29
27
  end
30
28
  rescue ::Zip::Error => error
31
- no_entry = NoEntry.new(error.message)
32
- no_entry.set_backtrace error.backtrace
33
- raise no_entry
29
+ raise NoEntry.from_error(error)
34
30
  ensure
35
31
  @archive = nil
36
32
  end
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.2.7"
3
+ VERSION = "0.2.8"
4
4
  end
5
5
  end
@@ -101,6 +101,7 @@ module EPUB
101
101
 
102
102
  def initialize
103
103
  @properties = Set.new
104
+ @full_path = nil
104
105
  end
105
106
 
106
107
  def properties=(props)
@@ -5,8 +5,12 @@ require 'epub/searcher/xhtml'
5
5
  module EPUB
6
6
  module Searcher
7
7
  class << self
8
- def search(epub, word, **options)
9
- Publication.search(epub.package, word, options)
8
+ def search_text(epub, word, **options)
9
+ Publication.search_text(epub.package, word, options)
10
+ end
11
+
12
+ def search_element(epub, css: nil, xpath: nil, namespaces: {})
13
+ Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
10
14
  end
11
15
  end
12
16
  end
@@ -4,29 +4,79 @@ module EPUB
4
4
  module Searcher
5
5
  class Publication
6
6
  class << self
7
- def search(package, word, **options)
8
- new(word).search(package, options)
7
+ def search_text(package, word, **options)
8
+ new(package).search_text(word, options)
9
+ end
10
+
11
+ def search_element(package, css: nil, xpath: nil, namespaces: {})
12
+ new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
9
13
  end
10
14
  end
11
15
 
12
- def initialize(word)
13
- @word = word
16
+ def initialize(package)
17
+ @package = package
14
18
  end
15
19
 
16
- def search(package, algorithm: :seamless)
20
+ def search_text(word, algorithm: :seamless)
17
21
  results = []
18
22
 
19
- spine = package.spine
23
+ spine = @package.spine
20
24
  spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
21
25
  spine.each_itemref.with_index do |itemref, index|
22
26
  itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
23
- XHTML::ALGORITHMS[algorithm].search(Nokogiri.XML(itemref.item.read), @word).each do |sub_result|
27
+ XHTML::ALGORITHMS[algorithm].search_text(Nokogiri.XML(itemref.item.read), word).each do |sub_result|
24
28
  results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
25
29
  end
26
30
  end
27
31
 
28
32
  results
29
33
  end
34
+
35
+ # @todo: Refactoring
36
+ def search_element(css: nil, xpath: nil, namespaces: {})
37
+ raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
38
+
39
+ namespaces = EPUB::NAMESPACES.merge(namespaces)
40
+ results = []
41
+
42
+ spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
43
+ @package.spine.each_itemref.with_index do |itemref, index|
44
+ assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
45
+ itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
46
+ path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
47
+ content_document = itemref.item.content_document
48
+ next unless content_document
49
+ doc = content_document.nokogiri
50
+ elems = if xpath
51
+ doc.xpath(xpath, namespaces)
52
+ else
53
+ doc.css(css)
54
+ end
55
+ elems.each do |elem|
56
+ path = find_path(elem)
57
+ results << {
58
+ location: EPUB::CFI::Location.new([path_to_itemref, path]),
59
+ package: @package,
60
+ element: elem
61
+ }
62
+ end
63
+ end
64
+
65
+ results
66
+ end
67
+
68
+ private
69
+
70
+ def find_path(elem)
71
+ steps = []
72
+ until elem.parent.document?
73
+ index = elem.parent.element_children.index(elem)
74
+ assertion = elem["id"] ? EPUB::CFI::IDAssertion.new(elem["id"]) : nil
75
+ steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
76
+ elem = elem.parent
77
+ end
78
+ EPUB::CFI::Path.new(steps)
79
+ end
30
80
  end
31
81
  end
32
82
  end
@@ -1,3 +1,6 @@
1
+ require 'epub/cfi'
2
+ require 'epub/parser/cfi'
3
+
1
4
  module EPUB
2
5
  module Searcher
3
6
  class Result
@@ -9,7 +12,7 @@ module EPUB
9
12
  # # Note that c here is not included in the first element of returned value.
10
13
  # @param steps1 [Array<Step>, Array<Array>]
11
14
  # @param steps2 [Array<Step>, Array<Array>]
12
- # @return [Array<Array<Array>>] Thee arrays:
15
+ # @return [Array<Array<Array>>] Three arrays:
13
16
  # 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
14
17
  # 2. remaining steps of +steps1+
15
18
  # 3. remaining steps of +steps2+
@@ -41,25 +44,11 @@ module EPUB
41
44
  @parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
42
45
  end
43
46
 
44
- def to_xpath_and_offset(with_xmlns=false)
45
- xpath = (@parent_steps + @start_steps).reduce('.') {|path, step|
46
- case step.type
47
- when :element
48
- path + '/%s*[%d]' % [with_xmlns ? 'xhtml:' : nil, step.index + 1]
49
- when :text
50
- path + '/text()[%s]' % [step.index + 1]
51
- else
52
- path
53
- end
54
- }
55
-
56
- [xpath, @start_steps.last.index]
57
- end
58
-
59
- def to_cfi_s
60
- [@parent_steps, @start_steps, @end_steps].collect {|steps|
61
- steps ? steps.collect(&:to_cfi_s).join : nil
47
+ def to_cfi
48
+ str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
49
+ steps ? steps.collect(&:to_cfi).join : nil
62
50
  }.compact.join(',')
51
+ EPUB::CFI(str)
63
52
  end
64
53
 
65
54
  def ==(other)
@@ -80,7 +69,7 @@ module EPUB
80
69
  self.info == other.info
81
70
  end
82
71
 
83
- def to_cfi_s
72
+ def to_cfi
84
73
  case type
85
74
  when :element
86
75
  '/%d%s' % [(index + 1) * 2, id_assertion]
@@ -10,8 +10,8 @@ module EPUB
10
10
  # @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
11
11
  # @param word [String]
12
12
  # @return [Array<Result>]
13
- def search(element, word)
14
- new(element.respond_to?(:root) ? element.root : element).search(word)
13
+ def search_text(element, word)
14
+ new(element.respond_to?(:root) ? element.root : element).search_text(word)
15
15
  end
16
16
  end
17
17
 
@@ -23,7 +23,7 @@ module EPUB
23
23
  class Restricted < self
24
24
  # @param element [Nokogiri::XML::Element]
25
25
  # @return [Array<Result>]
26
- def search(word, element=nil)
26
+ def search_text(word, element=nil)
27
27
  results = []
28
28
 
29
29
  elem_index = 0
@@ -35,7 +35,7 @@ module EPUB
35
35
  results << Result.new([child_step], nil, nil)
36
36
  end
37
37
  else
38
- search(word, child).each do |sub_result|
38
+ search_text(word, child).each do |sub_result|
39
39
  results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
40
40
  end
41
41
  end
@@ -62,7 +62,7 @@ module EPUB
62
62
  @indices = nil
63
63
  end
64
64
 
65
- def search(word)
65
+ def search_text(word)
66
66
  unless @indices
67
67
  @indices, @content = build_indices(@element)
68
68
  end
@@ -1,3 +1,4 @@
1
+ Encoding.default_external = 'UTF-8'
1
2
  require 'simplecov'
2
3
  SimpleCov.start do
3
4
  add_filter '/test|deps/'
@@ -18,6 +18,7 @@ class TestParserCFI < Test::Unit::TestCase
18
18
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[,y])',
19
19
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[;s=b])',
20
20
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[yyy;s=b])',
21
+ 'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[^(;s=b])',
21
22
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2[;s=b])',
22
23
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:10)',
23
24
  'epubcfi(/6/4[chap01ref]!/4[body01]/16[svgimg])',
@@ -25,7 +26,8 @@ class TestParserCFI < Test::Unit::TestCase
25
26
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:0)',
26
27
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3)',
27
28
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05],/2/1:1,/3:4)',
28
- 'epubcfi(/6,:1,:3)'
29
+ 'epubcfi(/6,:1,:3)',
30
+ 'epubcfi(/6/4[chap01ref]!/4[body01]/10[mov01]~23.5@5.75:97.6)'
29
31
  ].reduce({}) {|data, cfi|
30
32
  data[cfi] = cfi
31
33
  data
@@ -35,4 +37,17 @@ class TestParserCFI < Test::Unit::TestCase
35
37
  @parser.parse(cfi)
36
38
  end
37
39
  end
40
+
41
+ data([
42
+ '/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[(;s=b]',
43
+ '/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[);s=b]'
44
+ ].reduce({}) {|data, cfi|
45
+ data[cfi] = cfi
46
+ data
47
+ })
48
+ def test_raise_error_on_parsing_invalid_cfi(cfi)
49
+ assert_raise Racc::ParseError do
50
+ EPUB::CFI(cfi)
51
+ end
52
+ end
38
53
  end
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require_relative 'helper'
3
3
  require 'epub/searcher'
4
+ require 'epub/parser/cfi'
4
5
 
5
6
  class TestSearcher < Test::Unit::TestCase
6
7
  class TestPublication < self
@@ -17,7 +18,7 @@ class TestSearcher < Test::Unit::TestCase
17
18
  end
18
19
 
19
20
  def test_no_result
20
- assert_empty EPUB::Searcher::Publication.search(@package, 'no result')
21
+ assert_empty EPUB::Searcher::Publication.search_text(@package, 'no result')
21
22
  end
22
23
 
23
24
  def test_simple
@@ -26,13 +27,53 @@ class TestSearcher < Test::Unit::TestCase
26
27
  [[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 0, {:name => 'head', :id => nil}], [:element, 0, {:name => 'title', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]],
27
28
  [[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 1, {:name => 'body', :id => nil}], [:element, 0, {:name => 'div', :id => nil}], [:element, 0, {:name => 'nav', :id => 'idid'}], [:element, 0, {:name => 'hgroup', :id => nil}], [:element, 1, {:name => 'h1', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]]
28
29
  ]),
29
- EPUB::Searcher::Publication.search(@package, 'Content')
30
+ EPUB::Searcher::Publication.search_text(@package, 'Content')
31
+ )
32
+ end
33
+
34
+ def test_search_element_xpath_without_namespaces
35
+ assert_equal(
36
+ [
37
+ "epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
38
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
39
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
40
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
41
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
42
+ ],
43
+ EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
44
+ )
45
+ end
46
+
47
+ def test_search_element_xpath_with_namespaces
48
+ assert_equal(
49
+ [
50
+ "epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
51
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
52
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
53
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
54
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
55
+ ],
56
+ EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
57
+ )
58
+ end
59
+
60
+ def test_search_element_css_selector
61
+ assert_equal(
62
+ [
63
+ "epubcfi(/4/2!/4/2/2[idid]/4/2)",
64
+ "epubcfi(/4/2!/4/2/2[idid]/4/4)",
65
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2)",
66
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4)",
67
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6)",
68
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8)"
69
+ ],
70
+ EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
30
71
  )
31
72
  end
32
73
 
33
74
  class TesetResult < self
34
- def test_to_cfi_s
35
- assert_equal '/6/2!/4/2/2[idid]/2/4/1,:9,:16', EPUB::Searcher::Publication.search(@package, 'Content').last.to_cfi_s
75
+ def test_to_cfi
76
+ assert_equal 'epubcfi(/6/2!/4/2/2[idid]/2/4/1,:9,:16)', EPUB::Searcher::Publication.search_text(@package, 'Content').last.to_cfi.to_fragment
36
77
  end
37
78
  end
38
79
  end
@@ -48,35 +89,35 @@ class TestSearcher < Test::Unit::TestCase
48
89
 
49
90
  module TestSearch
50
91
  def test_no_result
51
- assert_empty @searcher.search(@h1, 'no result')
92
+ assert_empty @searcher.search_text(@h1, 'no result')
52
93
  end
53
94
 
54
95
  def test_simple
55
- assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search(@h1, 'Content')
96
+ assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search_text(@h1, 'Content')
56
97
  end
57
98
 
58
99
  def test_multiple_text_result
59
- assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search(@h1, 'o')
100
+ assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search_text(@h1, 'o')
60
101
  end
61
102
 
62
103
  def test_text_after_element
63
104
  elem = Nokogiri.XML('<root><elem>inner</elem>after</root>')
64
105
 
65
- assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search(elem, 'after')
106
+ assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search_text(elem, 'after')
66
107
  end
67
108
 
68
109
  def test_entity_reference
69
110
  elem = Nokogiri.XML('<root>before&lt;after</root>')
70
111
 
71
- assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search(elem, '<')
112
+ assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search_text(elem, '<')
72
113
  end
73
114
 
74
115
  def test_nested_result
75
- assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search(@nav, '第二節')
116
+ assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search_text(@nav, '第二節')
76
117
  end
77
118
 
78
119
  def test_img
79
- assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search(@nav, '第三節')
120
+ assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search_text(@nav, '第三節')
80
121
  end
81
122
  end
82
123
 
@@ -99,27 +140,22 @@ class TestSearcher < Test::Unit::TestCase
99
140
 
100
141
  def test_seamless
101
142
  elem = Nokogiri.XML('<root>This <em>includes</em> a child element.</root>')
102
- assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search(elem, 'This includes a child element.')
143
+ assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search_text(elem, 'This includes a child element.')
103
144
  end
104
145
  end
105
146
 
106
147
  class TestResult < self
107
148
  def setup
108
149
  super
109
- @result = EPUB::Searcher::XHTML::Restricted.search(@doc, '第二節').first
110
- end
111
-
112
- def test_to_xpath_and_offset
113
- assert_equal ['./*[2]/*[1]/*[1]/*[2]/*[2]/*[2]/*[2]/*[1]/text()[1]', 0], @result.to_xpath_and_offset
114
- assert_equal ['./xhtml:*[2]/xhtml:*[1]/xhtml:*[1]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[1]/text()[1]', 0], @result.to_xpath_and_offset(true)
150
+ @result = EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第二節').first
115
151
  end
116
152
 
117
- def test_to_cfi_s
118
- assert_equal '/4/2/2[idid]/4/4/4/4/2/1,:0,:3', @result.to_cfi_s
153
+ def test_to_cfi
154
+ assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/4/2/1,:0,:3)', @result.to_cfi.to_fragment
119
155
  end
120
156
 
121
- def test_to_cfi_s_img
122
- assert_equal '/4/2/2[idid]/4/4/4/6/2/2', EPUB::Searcher::XHTML::Restricted.search(@doc, '第三節').first.to_cfi_s
157
+ def test_to_cfi_img
158
+ assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/6/2/2)', EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第三節').first.to_cfi.to_fragment
123
159
  end
124
160
  end
125
161
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - KITAITI Makoto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-30 00:00:00.000000000 Z
11
+ date: 2017-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -301,6 +301,7 @@ extra_rdoc_files: []
301
301
  files:
302
302
  - ".gemtest"
303
303
  - ".gitignore"
304
+ - ".gitlab-ci.yml"
304
305
  - ".gitmodules"
305
306
  - ".travis.yml"
306
307
  - ".yardopts"
@@ -419,7 +420,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
419
420
  version: '0'
420
421
  requirements: []
421
422
  rubyforge_project:
422
- rubygems_version: 2.5.1
423
+ rubygems_version: 2.6.8
423
424
  signing_key:
424
425
  specification_version: 4
425
426
  summary: EPUB 3 Parser