epub-parser 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +51 -1
- data/.yardopts +5 -3
- data/{CHANGELOG.markdown → CHANGELOG.adoc} +49 -84
- data/README.adoc +228 -0
- data/Rakefile +3 -1
- data/bin/epub-cover +51 -0
- data/docs/EpubCover.adoc +46 -0
- data/docs/Examples.adoc +9 -0
- data/docs/Home.adoc +224 -0
- data/docs/Searcher.adoc +132 -0
- data/epub-parser.gemspec +2 -1
- data/lib/epub/book/features.rb +7 -1
- data/lib/epub/metadata.rb +9 -1
- data/lib/epub/parser/metadata.rb +4 -2
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +1 -1
- data/lib/epub/searcher/xhtml.rb +1 -0
- data/test/helper.rb +1 -1
- metadata +26 -8
- data/README.markdown +0 -219
- data/docs/Home.markdown +0 -196
- data/docs/Searcher.markdown +0 -109
data/Rakefile
CHANGED
@@ -41,8 +41,10 @@ namespace :doc do
|
|
41
41
|
YARD::Rake::YardocTask.new
|
42
42
|
Rake::RDocTask.new do |rdoc|
|
43
43
|
rdoc.rdoc_files = FileList['lib/**/*.rb']
|
44
|
-
rdoc.rdoc_files.include 'README.
|
44
|
+
rdoc.rdoc_files.include 'README.adoc'
|
45
|
+
rdoc.rdoc_files.include 'CHANGELOG.adoc'
|
45
46
|
rdoc.rdoc_files.include 'MIT-LICENSE'
|
47
|
+
rdoc.rdoc_files.include 'docs/**/*.adoc'
|
46
48
|
rdoc.rdoc_files.include 'docs/**/*.md'
|
47
49
|
end
|
48
50
|
end
|
data/bin/epub-cover
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require "optparse"
|
2
|
+
require "uri"
|
3
|
+
require "epub/parser"
|
4
|
+
|
5
|
+
def main(argv)
|
6
|
+
option_parser = OptionParser.new {|opt|
|
7
|
+
opt.banner = <<EOB
|
8
|
+
Extract cover image.
|
9
|
+
Image is put to current directory with the same name in EPUB.
|
10
|
+
It is put to specified directory when `--output' option is given.
|
11
|
+
|
12
|
+
Usage: #{opt.program_name} [options] EPUBFILE
|
13
|
+
|
14
|
+
EOB
|
15
|
+
opt.separator "Options:"
|
16
|
+
opt.on "-o", "--output=DIR", "Directory to put image file"
|
17
|
+
}
|
18
|
+
options = option_parser.getopts(argv)
|
19
|
+
path = argv.shift
|
20
|
+
error "EPUBFILE not given" unless path
|
21
|
+
unless File.file? path
|
22
|
+
if File.directory? path
|
23
|
+
EPUB::OCF::PhysicalContainer.adapter = :UnpackedDirectory
|
24
|
+
else
|
25
|
+
path = URI.parse(path) rescue nil
|
26
|
+
if path
|
27
|
+
EPUB::OCF::PhysicalContainer.adapter = :UnpackedURI
|
28
|
+
else
|
29
|
+
error "EPUBFILE not a file"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
error "output not a directory" if options["output"] && !File.directory?(options["output"])
|
34
|
+
cover_image = EPUB::Parser.parse(path).cover_image
|
35
|
+
error "cover image not found" unless cover_image
|
36
|
+
path = File.basename(cover_image.href.to_s)
|
37
|
+
path = File.join(options["output"], path) if options["output"]
|
38
|
+
File.write path, cover_image.read
|
39
|
+
$stderr.print "Cover image output to "
|
40
|
+
print path
|
41
|
+
$stderr.puts ""
|
42
|
+
end
|
43
|
+
|
44
|
+
def error(message)
|
45
|
+
$stderr.puts "Error: #{message}"
|
46
|
+
$stderr.puts ""
|
47
|
+
$stderr.puts option_parser.help
|
48
|
+
abort
|
49
|
+
end
|
50
|
+
|
51
|
+
main(ARGV)
|
data/docs/EpubCover.adoc
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
{file:docs/Home} > *{file:docs/EpubCover.adoc}*
|
2
|
+
|
3
|
+
= `epub-cover` command-line tool
|
4
|
+
|
5
|
+
`epub-cover` tool extract cover image from EPUB book.
|
6
|
+
|
7
|
+
== Usage
|
8
|
+
|
9
|
+
----
|
10
|
+
% epub-cover --help
|
11
|
+
Extract cover image.
|
12
|
+
Image is put to current directory with the same name in EPUB.
|
13
|
+
It is put to specified directory when `--output' option is given.
|
14
|
+
|
15
|
+
Usage: epub-cover [options] EPUBFILE
|
16
|
+
|
17
|
+
Options:
|
18
|
+
-o, --output=DIR Directory to put image file
|
19
|
+
----
|
20
|
+
|
21
|
+
Example:
|
22
|
+
|
23
|
+
----
|
24
|
+
% epub-cover childrens-literature.epub
|
25
|
+
Cover image output to cover.png
|
26
|
+
----
|
27
|
+
|
28
|
+
As output indicates, cover image file is output to current directory. The file name is the same to one in EPUB file.
|
29
|
+
|
30
|
+
=== Output directory
|
31
|
+
|
32
|
+
You can specify a directory to output the cover file by `--output` option.
|
33
|
+
|
34
|
+
----
|
35
|
+
% epub-cover --output=/tmp childrens-literature.epub
|
36
|
+
Cover image output to /tmp/cover.png
|
37
|
+
----
|
38
|
+
|
39
|
+
=== Extract from the web
|
40
|
+
|
41
|
+
`epub-open` accepts URI instead of file path.
|
42
|
+
|
43
|
+
----
|
44
|
+
% epub-cover https://raw.githubusercontent.com/IDPF/epub3-samples/master/30/page-blanche/
|
45
|
+
Cover image output to cover.jpg
|
46
|
+
----
|
data/docs/Examples.adoc
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
= Examples
|
2
|
+
|
3
|
+
= {doctitle}
|
4
|
+
|
5
|
+
There are examples helping you find how to use EPUB parser gem.
|
6
|
+
|
7
|
+
* {file:docs/AggregateContentsFromWeb.markdown Aggregate Contents From the Web}
|
8
|
+
* {file:examples/exctract-content-using-cfi.rb Extract contents from EPUB files using EPUB CFI(identifier for EPUB)}
|
9
|
+
* {file:examples/find-elements-and-cfis.rb Find elements and CFIs}
|
data/docs/Home.adoc
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
= EPUB Parser
|
2
|
+
|
3
|
+
= {doctitle}
|
4
|
+
|
5
|
+
EPUB Parser gem parses EPUB 3 book loosely.
|
6
|
+
|
7
|
+
image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master", title="pipeline status"]
|
8
|
+
image:https://gemnasium.com/KitaitiMakoto/epub-parser.png[link="https://gitlab.com/KitaitiMakoto/epub-parser/commits/master",title="Dependency Status"]
|
9
|
+
image:https://badge.fury.io/rb/epub-parser.svg[link="https://gemnasium.com/KitaitiMakoto/epub-parser",title="Gem Version"]
|
10
|
+
image:https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/coverage.svg[link="https://kitaitimakoto.gitlab.io/epub-parser/coverage/",title="coverage report"]
|
11
|
+
|
12
|
+
* https://kitaitimakoto.gitlab.io/epub-parser/file.Home.html[Homepage]
|
13
|
+
* https://kitaitimakoto.gitlab.io/epub-parser/[Documentation]
|
14
|
+
* https://gitlab.com/KitaitiMakoto/epub-parser[Source Code]
|
15
|
+
* https://kitaitimakoto.gitlab.io/epub-parser/coverage/[Test Coverage]
|
16
|
+
|
17
|
+
== Installation
|
18
|
+
|
19
|
+
gem install epub-parser
|
20
|
+
|
21
|
+
== Usage
|
22
|
+
|
23
|
+
=== As command-line tools
|
24
|
+
|
25
|
+
==== epubinfo
|
26
|
+
|
27
|
+
`epubinfo` tool extracts and shows the metadata of specified EPUB book.
|
28
|
+
|
29
|
+
See {file:docs/Epubinfo.markdown}.
|
30
|
+
|
31
|
+
==== epub-open
|
32
|
+
|
33
|
+
`epub-open` tool provides interactive shell(IRB) which helps you research about EPUB book.
|
34
|
+
|
35
|
+
See {file:docs/EpubOpen.markdown}.
|
36
|
+
|
37
|
+
==== epub-cover
|
38
|
+
|
39
|
+
`epub-cover` tool extract cover image from EPUB book.
|
40
|
+
|
41
|
+
See {file:docs/EpubCover.adoc}.
|
42
|
+
|
43
|
+
=== As a library
|
44
|
+
|
45
|
+
Use `EPUB::Parser.parse` at first:
|
46
|
+
|
47
|
+
----
|
48
|
+
require 'epub/parser'
|
49
|
+
|
50
|
+
book = EPUB::Parser.parse('/path/to/book.epub')
|
51
|
+
----
|
52
|
+
|
53
|
+
This book object can yield page by spine's order(spine defines the order to read that the author determines):
|
54
|
+
|
55
|
+
----
|
56
|
+
book.each_page_on_spine do |page|
|
57
|
+
# do something...
|
58
|
+
end
|
59
|
+
----
|
60
|
+
|
61
|
+
`page` above is an {EPUB::Publication::Package::Manifest::Item} object and you can call {EPUB::Publication::Package::Manifest::Item#href #href} to see where is the page file:
|
62
|
+
|
63
|
+
----
|
64
|
+
book.each_page_on_spine do |page|
|
65
|
+
file = page.href # => path/to/page/in/zip/archive
|
66
|
+
html = Zip::Archive.open('/path/to/book.epub') {|zip|
|
67
|
+
zip.fopen(file.to_s) {|file| file.read}
|
68
|
+
}
|
69
|
+
end
|
70
|
+
----
|
71
|
+
|
72
|
+
And {EPUB::Publication::Package::Manifest::Item Item} provides syntax suger {EPUB::Publication::Package::Manifest::Item#read #read} for above:
|
73
|
+
|
74
|
+
----
|
75
|
+
html = page.read
|
76
|
+
doc = Nokogiri.HTML(html)
|
77
|
+
# do something with Nokogiri as always
|
78
|
+
----
|
79
|
+
|
80
|
+
For several utilities of Item, see {file:docs/Item.markdown} page.
|
81
|
+
|
82
|
+
By the way, although `book` above is a {EPUB::Book} object, all features are provided by {EPUB::Book::Features} module. Therefore YourBook class can include the features of {EPUB::Book::Features}:
|
83
|
+
|
84
|
+
----
|
85
|
+
require 'epub'
|
86
|
+
|
87
|
+
class YourBook < ActiveRecord::Base
|
88
|
+
include EPUB::Book::Features
|
89
|
+
end
|
90
|
+
|
91
|
+
book = EPUB::Parser.parse(
|
92
|
+
'uploaded-book.epub',
|
93
|
+
:class => YourBook # *************** pass YourBook class
|
94
|
+
)
|
95
|
+
book.instance_of? YourBook # => true
|
96
|
+
book.required = 'value for required field'
|
97
|
+
book.save!
|
98
|
+
book.each_page_on_spine do |epage|
|
99
|
+
page = YouBookPage.create(
|
100
|
+
:some_attr => 'some attr',
|
101
|
+
:content => epage.read,
|
102
|
+
:another_attr => 'another attr'
|
103
|
+
)
|
104
|
+
book.pages << page
|
105
|
+
end
|
106
|
+
----
|
107
|
+
|
108
|
+
You are also able to find YourBook object for the first:
|
109
|
+
|
110
|
+
----
|
111
|
+
book = YourBook.find params[:id]
|
112
|
+
ret = EPUB::Parser.parse(
|
113
|
+
'uploaded-book.epub',
|
114
|
+
:book => book # ******************* pass your book instance
|
115
|
+
) # => book
|
116
|
+
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
117
|
+
# do something with your book
|
118
|
+
----
|
119
|
+
|
120
|
+
==== Switching ZIP library
|
121
|
+
|
122
|
+
EPUB Parser uses https://github.com/javanthropus/archive-zip[Archive::Zip], a pure Ruby ZIP library, by default. You can use https://bitbucket.org/winebarrel/zip-ruby/wiki/Home[Zip/Ruby], a Ruby bindings for https://libzip.org/[libzip] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
|
123
|
+
|
124
|
+
Globally:
|
125
|
+
|
126
|
+
----
|
127
|
+
EPUB::OCF::PhysicalContainer.adapter = :Zipruby
|
128
|
+
book = EPUB::Parser.parse("path/to/book.epub")
|
129
|
+
----
|
130
|
+
|
131
|
+
For each EPUB book:
|
132
|
+
|
133
|
+
----
|
134
|
+
book = EPUB::Parser.parse("path/to/book.epub", container_adapter: :Zipruby)
|
135
|
+
----
|
136
|
+
|
137
|
+
== Documentation
|
138
|
+
|
139
|
+
=== APIs
|
140
|
+
|
141
|
+
More documentations are avaiable in:
|
142
|
+
|
143
|
+
* {file:docs/Publication.markdown} includes document's meta data, file list and so on.
|
144
|
+
* {file:docs/Item.markdown} represents a file in EPUB package.
|
145
|
+
* {file:docs/FixedLayout.markdown} provides APIs to declare how EPUB reader renders in such as reflowable or fixed layout.
|
146
|
+
* {file:docs/Navigation.markdown} describes how to use Navigation Document.
|
147
|
+
* {file:docs/Searcher.markdown} introduces APIs to search words and elements, and search by EPUB CFIs(a position pointer for EPUB) from EPUB documents.
|
148
|
+
* {file:docs/UnpackedArchive.markdown} describes how to handle directories which was generated by unzip EPUB files instead of EPUB files themselves.
|
149
|
+
* {file:docs/MultipleRenditions.markdown} describes about EPUB Multiple-Rendistions Publication and APIs for that.
|
150
|
+
|
151
|
+
=== Examples
|
152
|
+
|
153
|
+
Example usages are listed in {file:Examples} page.
|
154
|
+
|
155
|
+
* {file:docs/AggregateContentsFromWeb.markdown Aggregate Contents From the Web}
|
156
|
+
* {file:examples/exctract-content-using-cfi.rb Extract contents from EPUB files using EPUB CFI(identifier for EPUB)}
|
157
|
+
* {file:examples/find-elements-and-cfis.rb Find elements and CFIs}
|
158
|
+
|
159
|
+
=== Building documentation
|
160
|
+
|
161
|
+
If you installed EPUB Parser via gem command, you can also generate documentaiton by your own(https://gitlab.com/KitaitiMakoto/rubygems-yardoc[rubygems-yardoc] gem is needed):
|
162
|
+
|
163
|
+
----
|
164
|
+
$ gem install epub-parser
|
165
|
+
$ gem yardoc epub-parser
|
166
|
+
...
|
167
|
+
Files: 33
|
168
|
+
Modules: 20 ( 20 undocumented)
|
169
|
+
Classes: 45 ( 44 undocumented)
|
170
|
+
Constants: 31 ( 31 undocumented)
|
171
|
+
Methods: 292 ( 88 undocumented)
|
172
|
+
52.84% documented
|
173
|
+
YARD documentation is generated to:
|
174
|
+
/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc
|
175
|
+
----
|
176
|
+
|
177
|
+
It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc` here) at the end.
|
178
|
+
|
179
|
+
Or, generating yardoc command is possible, too:
|
180
|
+
|
181
|
+
----
|
182
|
+
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
183
|
+
$ cd epub-parser
|
184
|
+
$ bundle install --path=deps
|
185
|
+
$ bundle exec rake doc:yard
|
186
|
+
...
|
187
|
+
Files: 33
|
188
|
+
Modules: 20 ( 20 undocumented)
|
189
|
+
Classes: 45 ( 44 undocumented)
|
190
|
+
Constants: 31 ( 31 undocumented)
|
191
|
+
Methods: 292 ( 88 undocumented)
|
192
|
+
52.84% documented
|
193
|
+
----
|
194
|
+
|
195
|
+
Then documentation will be available in `doc` directory.
|
196
|
+
|
197
|
+
== Requirements
|
198
|
+
|
199
|
+
* Ruby 2.2.0 or later
|
200
|
+
* `patch` command to install Nokogiri
|
201
|
+
* C compiler to compile Zip/Ruby and Nokogiri
|
202
|
+
|
203
|
+
== History
|
204
|
+
|
205
|
+
See {file:CHANGELOG.adoc}.
|
206
|
+
|
207
|
+
== Note
|
208
|
+
|
209
|
+
This library is still in work.
|
210
|
+
Only a few features are implemented and APIs might be changed in the future.
|
211
|
+
Note that.
|
212
|
+
|
213
|
+
Currently implemented:
|
214
|
+
|
215
|
+
* container.xml of http://idpf.org/epub/30/spec/epub30-ocf.html#sec-container-metainf-container.xml[EPUB Open Container Format (OCF) 3.0]
|
216
|
+
* http://idpf.org/epub/30/spec/epub30-publications.html[EPUB Publications 3.0]
|
217
|
+
* EPUB Navigation Documents of http://www.idpf.org/epub/30/spec/epub30-contentdocs.html[EPUB Content Documents 3.0]
|
218
|
+
* http://www.idpf.org/epub/fxl/[EPUB 3 Fixed-Layout Documents]
|
219
|
+
* metadata.xml of http://www.idpf.org/epub/renditions/multiple/[EPUB Multiple-Rendition Publications]
|
220
|
+
|
221
|
+
== License
|
222
|
+
|
223
|
+
This library is distributed under the term of the MIT Licence.
|
224
|
+
See {file:MIT-LICENSE} file for more info.
|
data/docs/Searcher.adoc
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
{file:docs/Home.markdown} > **{file:docs/Searcher.markdown}**
|
2
|
+
|
3
|
+
= Searcher
|
4
|
+
|
5
|
+
*Searcher is experimental now. Note that all interfaces are not stable at all.*
|
6
|
+
|
7
|
+
== Example
|
8
|
+
|
9
|
+
----
|
10
|
+
epub = EPUB::Parser.parse('childrens-literature.epub')
|
11
|
+
search_word = 'INTRODUCTORY'
|
12
|
+
results = EPUB::Searcher.search_text(epub, search_word)
|
13
|
+
# => [#<EPUB::Searcher::Result:0x007f80ccde9528
|
14
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9730 @index=12, @info={}, @type=:character>],
|
15
|
+
# @parent_steps=
|
16
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
17
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccf3d3e8 @index=1, @info={:id=>nil}, @type=:itemref>,
|
18
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e88 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
19
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9e38 @index=0, @info={:name=>"nav", :id=>"toc"}, @type=:element>,
|
20
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9de8 @index=1, @info={:name=>"ol", :id=>"tocList"}, @type=:element>,
|
21
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d98 @index=0, @info={:name=>"li", :id=>"np-313"}, @type=:element>,
|
22
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9d48 @index=1, @info={:name=>"ol", :id=>nil}, @type=:element>,
|
23
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9ca8 @index=1, @info={:name=>"li", :id=>"np-317"}, @type=:element>,
|
24
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9c08 @index=0, @info={:name=>"a", :id=>nil}, @type=:element>,
|
25
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde9bb8 @index=0, @info={}, @type=:text>],
|
26
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccde9af0 @index=0, @info={}, @type=:character>]>,
|
27
|
+
# #<EPUB::Searcher::Result:0x007f80ccebcb30
|
28
|
+
# @end_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebcdb0 @index=12, @info={}, @type=:character>],
|
29
|
+
# @parent_steps=
|
30
|
+
# [#<EPUB::Searcher::Result::Step:0x007f80ccf571d0 @index=2, @info={:name=>"spine", :id=>nil}, @type=:element>,
|
31
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccde94b0 @index=2, @info={:id=>nil}, @type=:itemref>,
|
32
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd328 @index=1, @info={:name=>"body", :id=>nil}, @type=:element>,
|
33
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd2d8 @index=0, @info={:name=>"section", :id=>"pgepubid00492"}, @type=:element>,
|
34
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd260 @index=3, @info={:name=>"section", :id=>"pgepubid00498"}, @type=:element>,
|
35
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd210 @index=1, @info={:name=>"h3", :id=>nil}, @type=:element>,
|
36
|
+
# ##<EPUB::Searcher::Result::Step:0x007f80ccebd198 @index=0, @info={}, @type=:text>],
|
37
|
+
# @start_steps=[#<EPUB::Searcher::Result::Step:0x007f80ccebd0d0 @index=0, @info={}, @type=:character>]>]
|
38
|
+
puts results.collect(&:to_cfi).collect(&:to_fragment)
|
39
|
+
# epubcfi(/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]/2/1,:0,:12)
|
40
|
+
# epubcfi(/6/6!/4/2[pgepubid00492]/8[pgepubid00498]/4/1,:0,:12)
|
41
|
+
# => nil
|
42
|
+
----
|
43
|
+
|
44
|
+
== Search result
|
45
|
+
|
46
|
+
Search result is an array of {EPUB::Searcher::Result} and it may be converted to an EPUBCFI string by {EPUB::Searcher::Result#to_cfi_s}.
|
47
|
+
|
48
|
+
== Seamless XHTML Searcher
|
49
|
+
|
50
|
+
Now default searcher for XHTML is *seamless* searcher, which ignores tags when searching.
|
51
|
+
|
52
|
+
You can search words 'search word' from XHTML document below:
|
53
|
+
|
54
|
+
----
|
55
|
+
<html>
|
56
|
+
<head>
|
57
|
+
<title>Sample document</title>
|
58
|
+
</head>
|
59
|
+
<body>
|
60
|
+
<p><em>search</em> word</p>
|
61
|
+
</body>
|
62
|
+
</html>
|
63
|
+
----
|
64
|
+
|
65
|
+
== Restricted XHTML Searcher
|
66
|
+
|
67
|
+
You can also use *restricted* searcher, which means that it can search from only single elements. For instance, it can find 'search word' from XHTML document below:
|
68
|
+
|
69
|
+
----
|
70
|
+
<html>
|
71
|
+
<head>
|
72
|
+
<title>Sample document</title>
|
73
|
+
</head>
|
74
|
+
<body>
|
75
|
+
<p>search word</p>
|
76
|
+
</body>
|
77
|
+
</html>
|
78
|
+
----
|
79
|
+
|
80
|
+
But cannot from document below:
|
81
|
+
|
82
|
+
----
|
83
|
+
<html>
|
84
|
+
<head>
|
85
|
+
<title>Sample document</title>
|
86
|
+
</head>
|
87
|
+
<body>
|
88
|
+
<p><em>search</em> word</p>
|
89
|
+
</body>
|
90
|
+
</html>
|
91
|
+
----
|
92
|
+
|
93
|
+
because the words 'search' and 'word' are not in the same element.
|
94
|
+
|
95
|
+
To use restricted searcher, specify `algorithm` option for `search` method:
|
96
|
+
|
97
|
+
results = EPUB::Searcher.search_text(epub, search_word, algorithm: :restricted)
|
98
|
+
|
99
|
+
== Element Searcher
|
100
|
+
|
101
|
+
You can search XHTML elements by CSS selector or XPath.
|
102
|
+
|
103
|
+
----
|
104
|
+
EPUB::Searcher::Publication.search_element(@package, css: 'ol > li').collect {|result| result[:location]}.map(&:to_fragment)
|
105
|
+
# => ["epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313])",
|
106
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/2[np-315])",
|
107
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317])",
|
108
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6)",
|
109
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319])",
|
110
|
+
# "epubcfi(/4/4!/4/2[toc]/4[tocList]/2[np-313]/4/6/4/2[np-319]/4/2)",
|
111
|
+
# :
|
112
|
+
# :
|
113
|
+
----
|
114
|
+
|
115
|
+
== Search by EPUB CFI
|
116
|
+
|
117
|
+
You can fetch XML node from EPUB document by EPUB CFI.
|
118
|
+
|
119
|
+
----
|
120
|
+
require "epub/parser"
|
121
|
+
require "epub/searcher"
|
122
|
+
|
123
|
+
epub = EPUB::Parser.parse("childrens-literature.epub")
|
124
|
+
cfi = EPUB::CFI("/6/4!/4/2[toc]/4[tocList]/2[np-313]/4/4[np-317]")
|
125
|
+
itemref, node = EPUB::Searcher.search_by_cfi(epub, cfi)
|
126
|
+
puts itemref.item.full_path
|
127
|
+
puts node
|
128
|
+
# EPUB/nav.xhtml
|
129
|
+
# <li id="np-317" class="front">
|
130
|
+
# <a href="s04.xhtml#pgepubid00498">INTRODUCTORY</a>
|
131
|
+
# </li>
|
132
|
+
----
|