epub-parser 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e92165c76652996a441e9996bb9bdee8bb0e7b04
4
- data.tar.gz: c7a62b70d282f9b8343c0b850db21d7f078961d9
3
+ metadata.gz: 84aac696d82b7eb55bf80b1448685ac4c210abdd
4
+ data.tar.gz: a32f0e557b928502e0825cbb6b2d808f0fc9dd1d
5
5
  SHA512:
6
- metadata.gz: f56160b8148faf112e6ff8941a380223f1ce4d0ad114b2375f8e9793fe4eb0d7d9e18dda540f36c1ce702c90c7791006f5fa65c270498d5d6e5b341862e6aa37
7
- data.tar.gz: f139ae34069e6bcfe85d23ae5b4db32cecd99ae90cfc1fbe12ba1b2541f31d93329a6215cf099074b0fc0a3b03996ea6fca2508ad9b3165f27522be5fb8c759e
6
+ metadata.gz: 5a28c163db08c10c57f733d6ffc9a68a768b30a5fe84439bff1fbb03941448c822eb916f3338016b1cd8f2a96954f69a775be0f65bb629231767dfb995318934
7
+ data.tar.gz: f5535095748701b940555ca424f3488b4e7b007c39d6e2688cf16d00394a9e03c2c6f12290c820ca15aee2a11ce0ed43e5745225507c6baab9c2a7e33bf513ab
@@ -0,0 +1,19 @@
1
+ before_script:
2
+ - apt-get update -qq && apt-get install -y zip
3
+ - ruby -v
4
+ - which ruby
5
+ - gem install bundler --no-document
6
+ - bundle install --jobs=$(nproc) "${FLAGS[@]}"
7
+
8
+ test:2.2:
9
+ image: ruby:2.2
10
+ script:
11
+ - bundle exec rake test
12
+
13
+ test:2.3:
14
+ image: ruby:2.3
15
+ script: bundle exec rake test
16
+
17
+ test:2.4:
18
+ image: ruby:2.4
19
+ script: bundle exec rake test
@@ -1,4 +1,4 @@
1
1
  rvm:
2
- - "2.1.10"
3
2
  - "2.2.5"
4
3
  - "2.3.1"
4
+ - "2.4.0"
@@ -1,6 +1,12 @@
1
1
  CHANGELOG
2
2
  =========
3
3
 
4
+ 0.2.8
5
+ -----
6
+
7
+ * Change Searcher API: #search -> #search_text
8
+ * Add Searcher.search_element
9
+
4
10
  0.2.7
5
11
  -----
6
12
 
data/Gemfile CHANGED
@@ -1,2 +1,6 @@
1
1
  source 'https://rubygems.org'
2
2
  gemspec
3
+
4
+ if RUBY_PLATFORM.match /darwin/
5
+ gem 'terminal-notifier'
6
+ end
@@ -1,6 +1,7 @@
1
1
  EPUB Parser
2
2
  ===========
3
3
  [![Build Status](https://secure.travis-ci.org/KitaitiMakoto/epub-parser.png?branch=master)](http://travis-ci.org/KitaitiMakoto/epub-parser)
4
+ [![build status](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
4
5
  [![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
5
6
  [![Gem Version](https://badge.fury.io/rb/epub-parser.svg)](http://badge.fury.io/rb/epub-parser)
6
7
 
@@ -31,7 +32,7 @@ USAGE
31
32
 
32
33
  See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
33
34
 
34
- [rubydoc]: http://rubydoc.info/gems/epub-parser
35
+ [rubydoc]: http://www.rubydoc.info/gems/epub-parser
35
36
 
36
37
  ### `epubinfo` command-line tool
37
38
 
@@ -114,7 +115,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
114
115
 
115
116
  Or, generating by yardoc command is possible, too:
116
117
 
117
- $ git clone https://github.com/KitaitiMakoto/epub-parser.git
118
+ $ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
118
119
  $ cd epub-parser
119
120
  $ bundle install --path=deps
120
121
  $ bundle exec rake doc:yard
@@ -151,6 +152,11 @@ If you find other gems, please tell me or request a pull request.
151
152
  RECENT CHANGES
152
153
  --------------
153
154
 
155
+ ### 0.2.8
156
+
157
+ * Change Searcher API: #search -> #search_text
158
+ * Add Searcher.search_element
159
+
154
160
  ### 0.2.7
155
161
 
156
162
  * Add `EPUB::Metadata#children`
@@ -169,11 +175,6 @@ RECENT CHANGES
169
175
 
170
176
  [multi-rendition]: http://www.idpf.org/epub/renditions/multiple/
171
177
 
172
- ### 0.2.5
173
-
174
- * [BUG FIX]Don't load Zip/Ruby if unneccessary
175
- * Add `EPUB::CFI::PhysicalContainer.find_adapter`
176
-
177
178
  See {file:CHANGELOG.markdown} for older changelogs and details.
178
179
 
179
180
  TODOS
@@ -122,7 +122,7 @@ It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/do
122
122
 
123
123
  Or, generating yardoc command is possible, too:
124
124
 
125
- $ git clone https://github.com/KitaitiMakoto/epub-parser.git
125
+ $ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
126
126
  $ cd epub-parser
127
127
  $ bundle install --path=deps
128
128
  $ bundle exec rake doc:yard
@@ -8,37 +8,37 @@ Searcher
8
8
  Example
9
9
  -------
10
10
 
11
- epub = EPUB::Parser.parse('childrens-literature-20130206.epub')
11
+ epub = EPUB::Parser.parse('childrens-literature.epub')
12
12
  search_word = 'INTRODUCTORY'
13
- results = EPUB::Searcher.search(epub, search_word)
14
- # => [#<EPUB::Searcher::Result:0x007f938ed517a8
15
- # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f938ed51a50 @index=12, @info={}, @type=:character>],
13
+ results = EPUB::Searcher.search_text(epub, search_word)
14
+ # => [#<EPUB::Searcher::Result:0x007f80ccde9528
15
+ # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9730 @index=12, @info={}, @type=:character>],
16
16
  # @parent_steps=
17
- # [#<EPUB::Searcher::Result::Step:0x007f938f1c1e78 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
18
- # #<EPUB::Searcher::Result::Step:0x007f938f1caa78 @index=1, @info={:id=>nil}, @type=:itemref>,
19
- # #<EPUB::Searcher::Result::Step:0x007f938ed521d0 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
20
- # #<EPUB::Searcher::Result::Step:0x007f938ed52158 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
21
- # #<EPUB::Searcher::Result::Step:0x007f938ed52108 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
22
- # #<EPUB::Searcher::Result::Step:0x007f938ed52090 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
23
- # #<EPUB::Searcher::Result::Step:0x007f938ed52040 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
24
- # #<EPUB::Searcher::Result::Step:0x007f938ed51ff0 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
25
- # #<EPUB::Searcher::Result::Step:0x007f938ed51f78 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
26
- # #<EPUB::Searcher::Result::Step:0x007f938ed51f28 @index=0, @info={}, @type=:text>],
27
- # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f938ed51e88 @index=0, @info={}, @type=:character>]>,
28
- # #<EPUB::Searcher::Result:0x007f938ef8f5d8
29
- # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f938ef8f808 @index=12, @info={}, @type=:character>],
17
+ # [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
18
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccf3d3e8 @index=1, @info={:id=>nil}, @type=:itemref>,
19
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9e88 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
20
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9e38 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
21
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9de8 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
22
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9d98 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
23
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9d48 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
24
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9ca8 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
25
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9c08 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
26
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde9bb8 @index=0, @info={}, @type=:text>],
27
+ # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9af0 @index=0, @info={}, @type=:character>]>,
28
+ # #<EPUB::Searcher::Result:0x007f80ccebcb30
29
+ # @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebcdb0 @index=12, @info={}, @type=:character>],
30
30
  # @parent_steps=
31
- # [#<EPUB::Searcher::Result::Step:0x007f938f1c1e78 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
32
- # #<EPUB::Searcher::Result::Step:0x007f938ed51730 @index=2, @info={:id=>nil}, @type=:itemref>,
33
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fce0 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
34
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fc90 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
35
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fc40 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
36
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fbf0 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
37
- # #<EPUB::Searcher::Result::Step:0x007f938ef8fb28 @index=0, @info={}, @type=:text>],
38
- # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f938ef8fa88 @index=0, @info={}, @type=:character>]>]
39
- puts results.collect(&:to_cfi_s)
40
- # /6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12
41
- # /6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12
31
+ # [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
32
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccde94b0 @index=2, @info={:id=>nil}, @type=:itemref>,
33
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd328 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
34
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd2d8 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
35
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd260 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
36
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd210 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
37
+ # ##<EPUB::Searcher::Result::Step:0x007f80ccebd198 @index=0, @info={}, @type=:text>],
38
+ # @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebd0d0 @index=0, @info={}, @type=:character>]>]
39
+ puts results.collect(&:to_cfi).collect(&:to_fragment)
40
+ # epubcfi(/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12)
41
+ # epubcfi(/6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12)
42
42
  # => nil
43
43
 
44
44
  Search result
@@ -91,4 +91,19 @@ because the words 'search' and 'word' are not in the same element.
91
91
 
92
92
  To use restricted searcher, specify `algorithm` option for `search` method:
93
93
 
94
- results = EPUB::Searcher.search(epub, search_word, algorithm: :restricted)
94
+ results = EPUB::Searcher.search_text(epub, search_word, algorithm: :restricted)
95
+
96
+ Element Searcher
97
+ ----------------
98
+
99
+ You can search XHTML elements by CSS selector or XPath.
100
+
101
+ EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
102
+ # => ["epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313])",
103
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/2[np-315])",
104
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317])",
105
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6)",
106
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319])",
107
+ # "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319]/4/2)",
108
+ # :
109
+ # :
@@ -23,10 +23,6 @@ def main(argv)
23
23
 
24
24
  epub = EPUB::Parser.parse(epub_path)
25
25
  epub.package.spine.each_itemref.with_index do |itemref, i|
26
- itemref_step = {
27
- :step => (i + 1) * 2,
28
- :id => itemref.id
29
- }
30
26
  assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
31
27
  itemref_step = EPUB::CFI::Step.new((i + 1) * 2, assertion)
32
28
  path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
@@ -5,6 +5,8 @@ module EPUB
5
5
 
6
6
  def initialize
7
7
  @navigations = []
8
+ @hidden = false
9
+ @parent = nil
8
10
  super
9
11
  end
10
12
 
@@ -7,7 +7,15 @@ module EPUB
7
7
  class OCF
8
8
  # @todo: Make thread save
9
9
  class PhysicalContainer
10
- class NoEntry < StandardError; end
10
+ class NoEntry < StandardError
11
+ class << self
12
+ def from_error(error)
13
+ no_entry = new(error.message)
14
+ no_entry.set_backtrace error.backtrace
15
+ no_entry
16
+ end
17
+ end
18
+ end
11
19
 
12
20
  @adapter = ArchiveZip
13
21
 
@@ -9,9 +9,7 @@ module EPUB
9
9
  def read(path_name)
10
10
  ::File.read(::File.join(@container_path, path_name))
11
11
  rescue ::Errno::ENOENT => error
12
- no_entry = NoEntry.new(error.message)
13
- no_entry.set_backtrace error.backtrace
14
- raise no_entry
12
+ raise NoEntry.from_error(error)
15
13
  end
16
14
  end
17
15
  end
@@ -20,9 +20,7 @@ module EPUB
20
20
  def read(path_name)
21
21
  (@container_path + path_name).read
22
22
  rescue ::OpenURI::HTTPError => error
23
- no_entry = NoEntry.new(error.message)
24
- no_entry.set_backtrace error.backtrace
25
- raise no_entry
23
+ raise NoEntry.from_error(error)
26
24
  end
27
25
  end
28
26
  end
@@ -11,9 +11,7 @@ module EPUB
11
11
  @archive = archive
12
12
  yield self
13
13
  rescue ::Zip::Error => error
14
- no_entry = NoEntry.new(error.message)
15
- no_entry.set_backtrace error.backtrace
16
- raise no_entry
14
+ raise NoEntry.from_error(error)
17
15
  ensure
18
16
  @archive = nil
19
17
  end
@@ -28,9 +26,7 @@ module EPUB
28
26
  open {|container| container.read(path_name)}
29
27
  end
30
28
  rescue ::Zip::Error => error
31
- no_entry = NoEntry.new(error.message)
32
- no_entry.set_backtrace error.backtrace
33
- raise no_entry
29
+ raise NoEntry.from_error(error)
34
30
  ensure
35
31
  @archive = nil
36
32
  end
@@ -1,5 +1,5 @@
1
1
  module EPUB
2
2
  class Parser
3
- VERSION = "0.2.7"
3
+ VERSION = "0.2.8"
4
4
  end
5
5
  end
@@ -101,6 +101,7 @@ module EPUB
101
101
 
102
102
  def initialize
103
103
  @properties = Set.new
104
+ @full_path = nil
104
105
  end
105
106
 
106
107
  def properties=(props)
@@ -5,8 +5,12 @@ require 'epub/searcher/xhtml'
5
5
  module EPUB
6
6
  module Searcher
7
7
  class << self
8
- def search(epub, word, **options)
9
- Publication.search(epub.package, word, options)
8
+ def search_text(epub, word, **options)
9
+ Publication.search_text(epub.package, word, options)
10
+ end
11
+
12
+ def search_element(epub, css: nil, xpath: nil, namespaces: {})
13
+ Publication.search_element(epub.package, css: css, xpath: xpath, namespaces: namespaces)
10
14
  end
11
15
  end
12
16
  end
@@ -4,29 +4,79 @@ module EPUB
4
4
  module Searcher
5
5
  class Publication
6
6
  class << self
7
- def search(package, word, **options)
8
- new(word).search(package, options)
7
+ def search_text(package, word, **options)
8
+ new(package).search_text(word, options)
9
+ end
10
+
11
+ def search_element(package, css: nil, xpath: nil, namespaces: {})
12
+ new(package).search_element(css: css, xpath: xpath, namespaces: namespaces)
9
13
  end
10
14
  end
11
15
 
12
- def initialize(word)
13
- @word = word
16
+ def initialize(package)
17
+ @package = package
14
18
  end
15
19
 
16
- def search(package, algorithm: :seamless)
20
+ def search_text(word, algorithm: :seamless)
17
21
  results = []
18
22
 
19
- spine = package.spine
23
+ spine = @package.spine
20
24
  spine_step = Result::Step.new(:element, 2, {:name => 'spine', :id => spine.id})
21
25
  spine.each_itemref.with_index do |itemref, index|
22
26
  itemref_step = Result::Step.new(:itemref, index, {:id => itemref.id})
23
- XHTML::ALGORITHMS[algorithm].search(Nokogiri.XML(itemref.item.read), @word).each do |sub_result|
27
+ XHTML::ALGORITHMS[algorithm].search_text(Nokogiri.XML(itemref.item.read), word).each do |sub_result|
24
28
  results << Result.new([spine_step, itemref_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
25
29
  end
26
30
  end
27
31
 
28
32
  results
29
33
  end
34
+
35
+ # @todo: Refactoring
36
+ def search_element(css: nil, xpath: nil, namespaces: {})
37
+ raise ArgumentError, 'Both css and xpath are nil' if css.nil? && xpath.nil?
38
+
39
+ namespaces = EPUB::NAMESPACES.merge(namespaces)
40
+ results = []
41
+
42
+ spine_step = EPUB::CFI::Step.new(EPUB::Publication::Package::CONTENT_MODELS.index(:spine) * 2)
43
+ @package.spine.each_itemref.with_index do |itemref, index|
44
+ assertion = itemref.id ? EPUB::CFI::IDAssertion.new(itemref.id) : nil
45
+ itemref_step = EPUB::CFI::Step.new((index + 1) * 2, assertion)
46
+ path_to_itemref = EPUB::CFI::Path.new([spine_step, itemref_step])
47
+ content_document = itemref.item.content_document
48
+ next unless content_document
49
+ doc = content_document.nokogiri
50
+ elems = if xpath
51
+ doc.xpath(xpath, namespaces)
52
+ else
53
+ doc.css(css)
54
+ end
55
+ elems.each do |elem|
56
+ path = find_path(elem)
57
+ results << {
58
+ location: EPUB::CFI::Location.new([path_to_itemref, path]),
59
+ package: @package,
60
+ element: elem
61
+ }
62
+ end
63
+ end
64
+
65
+ results
66
+ end
67
+
68
+ private
69
+
70
+ def find_path(elem)
71
+ steps = []
72
+ until elem.parent.document?
73
+ index = elem.parent.element_children.index(elem)
74
+ assertion = elem["id"] ? EPUB::CFI::IDAssertion.new(elem["id"]) : nil
75
+ steps.unshift EPUB::CFI::Step.new((index + 1) * 2, assertion)
76
+ elem = elem.parent
77
+ end
78
+ EPUB::CFI::Path.new(steps)
79
+ end
30
80
  end
31
81
  end
32
82
  end
@@ -1,3 +1,6 @@
1
+ require 'epub/cfi'
2
+ require 'epub/parser/cfi'
3
+
1
4
  module EPUB
2
5
  module Searcher
3
6
  class Result
@@ -9,7 +12,7 @@ module EPUB
9
12
  # # Note that c here is not included in the first element of returned value.
10
13
  # @param steps1 [Array<Step>, Array<Array>]
11
14
  # @param steps2 [Array<Step>, Array<Array>]
12
- # @return [Array<Array<Array>>] Thee arrays:
15
+ # @return [Array<Array<Array>>] Three arrays:
13
16
  # 1. "intersection" of +steps1+ and +steps2+. "intersection" here is not the term of mathmatics
14
17
  # 2. remaining steps of +steps1+
15
18
  # 3. remaining steps of +steps2+
@@ -41,25 +44,11 @@ module EPUB
41
44
  @parent_steps, @start_steps, @end_steps = parent_steps, start_steps, end_steps
42
45
  end
43
46
 
44
- def to_xpath_and_offset(with_xmlns=false)
45
- xpath = (@parent_steps + @start_steps).reduce('.') {|path, step|
46
- case step.type
47
- when :element
48
- path + '/%s*[%d]' % [with_xmlns ? 'xhtml:' : nil, step.index + 1]
49
- when :text
50
- path + '/text()[%s]' % [step.index + 1]
51
- else
52
- path
53
- end
54
- }
55
-
56
- [xpath, @start_steps.last.index]
57
- end
58
-
59
- def to_cfi_s
60
- [@parent_steps, @start_steps, @end_steps].collect {|steps|
61
- steps ? steps.collect(&:to_cfi_s).join : nil
47
+ def to_cfi
48
+ str = [@parent_steps, @start_steps, @end_steps].collect {|steps|
49
+ steps ? steps.collect(&:to_cfi).join : nil
62
50
  }.compact.join(',')
51
+ EPUB::CFI(str)
63
52
  end
64
53
 
65
54
  def ==(other)
@@ -80,7 +69,7 @@ module EPUB
80
69
  self.info == other.info
81
70
  end
82
71
 
83
- def to_cfi_s
72
+ def to_cfi
84
73
  case type
85
74
  when :element
86
75
  '/%d%s' % [(index + 1) * 2, id_assertion]
@@ -10,8 +10,8 @@ module EPUB
10
10
  # @param element [Nokogiri::XML::Element, Nokogiri::XML::Document]
11
11
  # @param word [String]
12
12
  # @return [Array<Result>]
13
- def search(element, word)
14
- new(element.respond_to?(:root) ? element.root : element).search(word)
13
+ def search_text(element, word)
14
+ new(element.respond_to?(:root) ? element.root : element).search_text(word)
15
15
  end
16
16
  end
17
17
 
@@ -23,7 +23,7 @@ module EPUB
23
23
  class Restricted < self
24
24
  # @param element [Nokogiri::XML::Element]
25
25
  # @return [Array<Result>]
26
- def search(word, element=nil)
26
+ def search_text(word, element=nil)
27
27
  results = []
28
28
 
29
29
  elem_index = 0
@@ -35,7 +35,7 @@ module EPUB
35
35
  results << Result.new([child_step], nil, nil)
36
36
  end
37
37
  else
38
- search(word, child).each do |sub_result|
38
+ search_text(word, child).each do |sub_result|
39
39
  results << Result.new([child_step] + sub_result.parent_steps, sub_result.start_steps, sub_result.end_steps)
40
40
  end
41
41
  end
@@ -62,7 +62,7 @@ module EPUB
62
62
  @indices = nil
63
63
  end
64
64
 
65
- def search(word)
65
+ def search_text(word)
66
66
  unless @indices
67
67
  @indices, @content = build_indices(@element)
68
68
  end
@@ -1,3 +1,4 @@
1
+ Encoding.default_external = 'UTF-8'
1
2
  require 'simplecov'
2
3
  SimpleCov.start do
3
4
  add_filter '/test|deps/'
@@ -18,6 +18,7 @@ class TestParserCFI < Test::Unit::TestCase
18
18
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[,y])',
19
19
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[;s=b])',
20
20
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[yyy;s=b])',
21
+ 'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[^(;s=b])',
21
22
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2[;s=b])',
22
23
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:10)',
23
24
  'epubcfi(/6/4[chap01ref]!/4[body01]/16[svgimg])',
@@ -25,7 +26,8 @@ class TestParserCFI < Test::Unit::TestCase
25
26
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:0)',
26
27
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3)',
27
28
  'epubcfi(/6/4[chap01ref]!/4[body01]/10[para05],/2/1:1,/3:4)',
28
- 'epubcfi(/6,:1,:3)'
29
+ 'epubcfi(/6,:1,:3)',
30
+ 'epubcfi(/6/4[chap01ref]!/4[body01]/10[mov01]~23.5@5.75:97.6)'
29
31
  ].reduce({}) {|data, cfi|
30
32
  data[cfi] = cfi
31
33
  data
@@ -35,4 +37,17 @@ class TestParserCFI < Test::Unit::TestCase
35
37
  @parser.parse(cfi)
36
38
  end
37
39
  end
40
+
41
+ data([
42
+ '/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[(;s=b]',
43
+ '/6/4[chap01ref]!/4[body01]/10[para05]/2/1:3[);s=b]'
44
+ ].reduce({}) {|data, cfi|
45
+ data[cfi] = cfi
46
+ data
47
+ })
48
+ def test_raise_error_on_parsing_invalid_cfi(cfi)
49
+ assert_raise Racc::ParseError do
50
+ EPUB::CFI(cfi)
51
+ end
52
+ end
38
53
  end
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require_relative 'helper'
3
3
  require 'epub/searcher'
4
+ require 'epub/parser/cfi'
4
5
 
5
6
  class TestSearcher < Test::Unit::TestCase
6
7
  class TestPublication < self
@@ -17,7 +18,7 @@ class TestSearcher < Test::Unit::TestCase
17
18
  end
18
19
 
19
20
  def test_no_result
20
- assert_empty EPUB::Searcher::Publication.search(@package, 'no result')
21
+ assert_empty EPUB::Searcher::Publication.search_text(@package, 'no result')
21
22
  end
22
23
 
23
24
  def test_simple
@@ -26,13 +27,53 @@ class TestSearcher < Test::Unit::TestCase
26
27
  [[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 0, {:name => 'head', :id => nil}], [:element, 0, {:name => 'title', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]],
27
28
  [[[:element, 2, {:name => 'spine', :id => nil}], [:itemref, 0, {:id => nil}], [:element, 1, {:name => 'body', :id => nil}], [:element, 0, {:name => 'div', :id => nil}], [:element, 0, {:name => 'nav', :id => 'idid'}], [:element, 0, {:name => 'hgroup', :id => nil}], [:element, 1, {:name => 'h1', :id => nil}], [:text, 0]], [[:character, 9]], [[:character, 16]]]
28
29
  ]),
29
- EPUB::Searcher::Publication.search(@package, 'Content')
30
+ EPUB::Searcher::Publication.search_text(@package, 'Content')
31
+ )
32
+ end
33
+
34
+ def test_search_element_xpath_without_namespaces
35
+ assert_equal(
36
+ [
37
+ "epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
38
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
39
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
40
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
41
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
42
+ ],
43
+ EPUB::Searcher::Publication.search_element(@package, xpath: './/xhtml:a').collect {|result| result[:location]}.map(&:to_fragment)
44
+ )
45
+ end
46
+
47
+ def test_search_element_xpath_with_namespaces
48
+ assert_equal(
49
+ [
50
+ "epubcfi(/4/2!/4/2/2[idid]/4/2/2)",
51
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2/2)",
52
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4/2)",
53
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6/2)",
54
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8/2)"
55
+ ],
56
+ EPUB::Searcher::Publication.search_element(@package, xpath: './/customnamespace:a', namespaces: {'customnamespace' => 'http://www.w3.org/1999/xhtml'}).collect {|result| result[:location]}.map(&:to_fragment)
57
+ )
58
+ end
59
+
60
+ def test_search_element_css_selector
61
+ assert_equal(
62
+ [
63
+ "epubcfi(/4/2!/4/2/2[idid]/4/2)",
64
+ "epubcfi(/4/2!/4/2/2[idid]/4/4)",
65
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/2)",
66
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/4)",
67
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/6)",
68
+ "epubcfi(/4/2!/4/2/2[idid]/4/4/4/8)"
69
+ ],
70
+ EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
30
71
  )
31
72
  end
32
73
 
33
74
  class TesetResult < self
34
- def test_to_cfi_s
35
- assert_equal '/6/2!/4/2/2[idid]/2/4/1,:9,:16', EPUB::Searcher::Publication.search(@package, 'Content').last.to_cfi_s
75
+ def test_to_cfi
76
+ assert_equal 'epubcfi(/6/2!/4/2/2[idid]/2/4/1,:9,:16)', EPUB::Searcher::Publication.search_text(@package, 'Content').last.to_cfi.to_fragment
36
77
  end
37
78
  end
38
79
  end
@@ -48,35 +89,35 @@ class TestSearcher < Test::Unit::TestCase
48
89
 
49
90
  module TestSearch
50
91
  def test_no_result
51
- assert_empty @searcher.search(@h1, 'no result')
92
+ assert_empty @searcher.search_text(@h1, 'no result')
52
93
  end
53
94
 
54
95
  def test_simple
55
- assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search(@h1, 'Content')
96
+ assert_equal results([[[[:text, 0]], [[:character, 9]], [[:character, 16]]]]), @searcher.search_text(@h1, 'Content')
56
97
  end
57
98
 
58
99
  def test_multiple_text_result
59
- assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search(@h1, 'o')
100
+ assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]], [[[:text, 0]], [[:character, 10]], [[:character, 11]]]]), @searcher.search_text(@h1, 'o')
60
101
  end
61
102
 
62
103
  def test_text_after_element
63
104
  elem = Nokogiri.XML('<root><elem>inner</elem>after</root>')
64
105
 
65
- assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search(elem, 'after')
106
+ assert_equal results([[[[:text, 1]], [[:character, 0]], [[:character, 5]]]]), @searcher.search_text(elem, 'after')
66
107
  end
67
108
 
68
109
  def test_entity_reference
69
110
  elem = Nokogiri.XML('<root>before&lt;after</root>')
70
111
 
71
- assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search(elem, '<')
112
+ assert_equal results([[[[:text, 0]], [[:character, 6]], [[:character, 7]]]]), @searcher.search_text(elem, '<')
72
113
  end
73
114
 
74
115
  def test_nested_result
75
- assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search(@nav, '第二節')
116
+ assert_equal results([[[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:text, 0]], [[:character, 0]], [[:character, 3]]]]), @searcher.search_text(@nav, '第二節')
76
117
  end
77
118
 
78
119
  def test_img
79
- assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search(@nav, '第三節')
120
+ assert_equal [result([[[:element, 1, {:name => 'ol', :id => nil}], [:element, 1, {:name => 'li', :id => nil}], [:element, 1, {:name => 'ol', :id => nil}], [:element, 2, {:name => 'li', :id => nil}], [:element, 0, {:name => 'a', :id => nil}], [:element, 0, {:name => 'img', :id => nil}]], nil, nil])], @searcher.search_text(@nav, '第三節')
80
121
  end
81
122
  end
82
123
 
@@ -99,27 +140,22 @@ class TestSearcher < Test::Unit::TestCase
99
140
 
100
141
  def test_seamless
101
142
  elem = Nokogiri.XML('<root>This <em>includes</em> a child element.</root>')
102
- assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search(elem, 'This includes a child element.')
143
+ assert_equal results([[[], [[:text, 0], [:character, 0]], [[:text, 1], [:character, 17]]]]), @searcher.search_text(elem, 'This includes a child element.')
103
144
  end
104
145
  end
105
146
 
106
147
  class TestResult < self
107
148
  def setup
108
149
  super
109
- @result = EPUB::Searcher::XHTML::Restricted.search(@doc, '第二節').first
110
- end
111
-
112
- def test_to_xpath_and_offset
113
- assert_equal ['./*[2]/*[1]/*[1]/*[2]/*[2]/*[2]/*[2]/*[1]/text()[1]', 0], @result.to_xpath_and_offset
114
- assert_equal ['./xhtml:*[2]/xhtml:*[1]/xhtml:*[1]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[2]/xhtml:*[1]/text()[1]', 0], @result.to_xpath_and_offset(true)
150
+ @result = EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第二節').first
115
151
  end
116
152
 
117
- def test_to_cfi_s
118
- assert_equal '/4/2/2[idid]/4/4/4/4/2/1,:0,:3', @result.to_cfi_s
153
+ def test_to_cfi
154
+ assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/4/2/1,:0,:3)', @result.to_cfi.to_fragment
119
155
  end
120
156
 
121
- def test_to_cfi_s_img
122
- assert_equal '/4/2/2[idid]/4/4/4/6/2/2', EPUB::Searcher::XHTML::Restricted.search(@doc, '第三節').first.to_cfi_s
157
+ def test_to_cfi_img
158
+ assert_equal 'epubcfi(/4/2/2[idid]/4/4/4/6/2/2)', EPUB::Searcher::XHTML::Restricted.search_text(@doc, '第三節').first.to_cfi.to_fragment
123
159
  end
124
160
  end
125
161
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: epub-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - KITAITI Makoto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-30 00:00:00.000000000 Z
11
+ date: 2017-02-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -301,6 +301,7 @@ extra_rdoc_files: []
301
301
  files:
302
302
  - ".gemtest"
303
303
  - ".gitignore"
304
+ - ".gitlab-ci.yml"
304
305
  - ".gitmodules"
305
306
  - ".travis.yml"
306
307
  - ".yardopts"
@@ -419,7 +420,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
419
420
  version: '0'
420
421
  requirements: []
421
422
  rubyforge_project:
422
- rubygems_version: 2.5.1
423
+ rubygems_version: 2.6.8
423
424
  signing_key:
424
425
  specification_version: 4
425
426
  summary: EPUB 3 Parser