epub-parser 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +51 -1
- data/.yardopts +5 -3
- data/{CHANGELOG.markdown → CHANGELOG.adoc} +49 -84
- data/README.adoc +228 -0
- data/Rakefile +3 -1
- data/bin/epub-cover +51 -0
- data/docs/EpubCover.adoc +46 -0
- data/docs/Examples.adoc +9 -0
- data/docs/Home.adoc +224 -0
- data/docs/Searcher.adoc +132 -0
- data/epub-parser.gemspec +2 -1
- data/lib/epub/book/features.rb +7 -1
- data/lib/epub/metadata.rb +9 -1
- data/lib/epub/parser/metadata.rb +4 -2
- data/lib/epub/parser/version.rb +1 -1
- data/lib/epub/publication/package/manifest.rb +1 -1
- data/lib/epub/searcher/xhtml.rb +1 -0
- data/test/helper.rb +1 -1
- metadata +26 -8
- data/README.markdown +0 -219
- data/docs/Home.markdown +0 -196
- data/docs/Searcher.markdown +0 -109
data/epub-parser.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = EPUB::Parser::VERSION
|
8
8
|
s.authors = ["KITAITI Makoto"]
|
9
9
|
s.email = ["KitaitiMakoto@gmail.com"]
|
10
|
-
s.homepage = "
|
10
|
+
s.homepage = "https://kitaitimakoto.gitlab.io/epub-parser/file.Home.html"
|
11
11
|
s.summary = %q{EPUB 3 Parser}
|
12
12
|
s.description = %q{Parse EPUB 3 book loosely}
|
13
13
|
s.license = 'MIT'
|
@@ -38,6 +38,7 @@ Gem::Specification.new do |s|
|
|
38
38
|
s.add_development_dependency 'ronn'
|
39
39
|
s.add_development_dependency 'pretty_backtrace'
|
40
40
|
s.add_development_dependency 'epub-maker'
|
41
|
+
s.add_development_dependency 'asciidoctor'
|
41
42
|
|
42
43
|
s.add_runtime_dependency 'archive-zip'
|
43
44
|
s.add_runtime_dependency 'nokogiri', '>= 1.6.0', '< 1.9'
|
data/lib/epub/book/features.rb
CHANGED
@@ -36,7 +36,7 @@ module EPUB
|
|
36
36
|
# @!parse def_delegators :package, :metadata, :manifest, :spine, :guide, :bindings
|
37
37
|
def_delegators :package, *Publication::Package::CONTENT_MODELS
|
38
38
|
def_delegators :metadata, :title, :main_title, :subtitle, :short_title, :collection_title, :edition_title, :extended_title, :description, :date, :unique_identifier, :modified, :release_identifier, :package_identifier
|
39
|
-
def_delegators :manifest, :nav
|
39
|
+
def_delegators :manifest, :nav
|
40
40
|
|
41
41
|
def container_adapter
|
42
42
|
@adapter || OCF::PhysicalContainer.adapter
|
@@ -46,6 +46,12 @@ module EPUB
|
|
46
46
|
@adapter = OCF::PhysicalContainer.find_adapter(adapter)
|
47
47
|
end
|
48
48
|
|
49
|
+
# Cover image defined in EPUB 3 or used in EPUB 2
|
50
|
+
# @return [EPUB::Publication::Package::Manifest::Item]
|
51
|
+
def cover_image
|
52
|
+
manifest.cover_image || metadata.cover_image
|
53
|
+
end
|
54
|
+
|
49
55
|
# @overload each_page_on_spine(&blk)
|
50
56
|
# iterate over items in order of spine when block given
|
51
57
|
# @yieldparam item [Publication::Package::Manifest::Item]
|
data/lib/epub/metadata.rb
CHANGED
@@ -65,6 +65,14 @@ module EPUB
|
|
65
65
|
}
|
66
66
|
end
|
67
67
|
|
68
|
+
# Cover image used in EPUB 2
|
69
|
+
# @return [EPUB::Publication::Package::Manifest::Item]
|
70
|
+
def cover_image
|
71
|
+
cover_image_meta = metas.find {|meta| meta.name == "cover"}
|
72
|
+
return unless cover_image_meta
|
73
|
+
package.manifest[cover_image_meta.meta_content]
|
74
|
+
end
|
75
|
+
|
68
76
|
def to_h
|
69
77
|
DC_ELEMS.inject({}) do |hsh, elem|
|
70
78
|
hsh[elem] = __send__(elem)
|
@@ -137,7 +145,7 @@ module EPUB
|
|
137
145
|
class Meta
|
138
146
|
include Refinee
|
139
147
|
|
140
|
-
attr_accessor :property, :id, :scheme, :content
|
148
|
+
attr_accessor :property, :id, :scheme, :content, :name, :meta_content
|
141
149
|
attr_reader :refines
|
142
150
|
|
143
151
|
def refines=(refinee)
|
data/lib/epub/parser/metadata.rb
CHANGED
@@ -38,7 +38,7 @@ module EPUB
|
|
38
38
|
when default_namespace_uri
|
39
39
|
case elem_name
|
40
40
|
when 'meta'
|
41
|
-
meta = build_model(child, :Meta, %w[property id scheme])
|
41
|
+
meta = build_model(child, :Meta, %w[property id scheme content name])
|
42
42
|
metadata.metas << meta
|
43
43
|
meta
|
44
44
|
when 'link'
|
@@ -86,9 +86,11 @@ module EPUB
|
|
86
86
|
def build_model(elem, klass=:DCMES, attributes=%w[id lang dir])
|
87
87
|
model = EPUB::Metadata.const_get(klass).new
|
88
88
|
attributes.each do |attr|
|
89
|
-
|
89
|
+
writer_name = (attr == "content") ? "meta_content=" : "#{attr.gsub('-', '_')}="
|
90
|
+
model.__send__ writer_name, extract_attribute(elem, attr)
|
90
91
|
end
|
91
92
|
model.content = elem.content unless klass == :Link
|
93
|
+
model.content.strip! if klass == :Identifier
|
92
94
|
model
|
93
95
|
end
|
94
96
|
|
data/lib/epub/parser/version.rb
CHANGED
@@ -220,6 +220,7 @@ module EPUB
|
|
220
220
|
raise ArgumentError, "Not relative: #{iri.inspect}" unless iri.relative?
|
221
221
|
raise ArgumentError, "Start with slash: #{iri.inspect}" if iri.path.start_with? Addressable::URI::SLASH
|
222
222
|
target_href = href + iri
|
223
|
+
target_href.fragment = nil
|
223
224
|
segments = target_href.to_s.split(Addressable::URI::SLASH)
|
224
225
|
clean_segments = []
|
225
226
|
segments.each do |segment|
|
@@ -227,7 +228,6 @@ module EPUB
|
|
227
228
|
segment == '..' ? clean_segments.pop : clean_segments << segment
|
228
229
|
end
|
229
230
|
target_iri = Addressable::URI.parse(clean_segments.join(Addressable::URI::SLASH))
|
230
|
-
target_iri.fragment = nil
|
231
231
|
manifest.items.find { |item| item.href == target_iri}
|
232
232
|
end
|
233
233
|
|
data/lib/epub/searcher/xhtml.rb
CHANGED
@@ -87,6 +87,7 @@ module EPUB
|
|
87
87
|
# TODO: Consider block level elements
|
88
88
|
content_length = content.length
|
89
89
|
sub_indices, sub_content = build_indices(child)
|
90
|
+
# TODO: Pass content_length and child_step to build_indices and remove this block
|
90
91
|
sub_indices.each_pair do |sub_pos, child_steps|
|
91
92
|
indices[content_length + sub_pos] = [child_step] + child_steps
|
92
93
|
end
|
data/test/helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: epub-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- KITAITI Makoto
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -206,6 +206,20 @@ dependencies:
|
|
206
206
|
- - ">="
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '0'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: asciidoctor
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - ">="
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - ">="
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0'
|
209
223
|
- !ruby/object:Gem::Dependency
|
210
224
|
name: archive-zip
|
211
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -286,6 +300,7 @@ description: Parse EPUB 3 book loosely
|
|
286
300
|
email:
|
287
301
|
- KitaitiMakoto@gmail.com
|
288
302
|
executables:
|
303
|
+
- epub-cover
|
289
304
|
- epub-open
|
290
305
|
- epubinfo
|
291
306
|
extensions: []
|
@@ -295,23 +310,26 @@ files:
|
|
295
310
|
- ".gitignore"
|
296
311
|
- ".gitlab-ci.yml"
|
297
312
|
- ".yardopts"
|
298
|
-
- CHANGELOG.
|
313
|
+
- CHANGELOG.adoc
|
299
314
|
- Gemfile
|
300
315
|
- MIT-LICENSE
|
301
|
-
- README.
|
316
|
+
- README.adoc
|
302
317
|
- Rakefile
|
318
|
+
- bin/epub-cover
|
303
319
|
- bin/epub-open
|
304
320
|
- bin/epubinfo
|
305
321
|
- docs/AggregateContentsFromWeb.markdown
|
322
|
+
- docs/EpubCover.adoc
|
306
323
|
- docs/EpubOpen.markdown
|
307
324
|
- docs/Epubinfo.markdown
|
325
|
+
- docs/Examples.adoc
|
308
326
|
- docs/FixedLayout.markdown
|
309
|
-
- docs/Home.
|
327
|
+
- docs/Home.adoc
|
310
328
|
- docs/Item.markdown
|
311
329
|
- docs/MultipleRenditions.markdown
|
312
330
|
- docs/Navigation.markdown
|
313
331
|
- docs/Publication.markdown
|
314
|
-
- docs/Searcher.
|
332
|
+
- docs/Searcher.adoc
|
315
333
|
- docs/UnpackedArchive.markdown
|
316
334
|
- docs/yard-forwardable_def_delegators_handler.rb
|
317
335
|
- epub-parser.gemspec
|
@@ -391,7 +409,7 @@ files:
|
|
391
409
|
- test/test_parser_publication.rb
|
392
410
|
- test/test_publication.rb
|
393
411
|
- test/test_searcher.rb
|
394
|
-
homepage:
|
412
|
+
homepage: https://kitaitimakoto.gitlab.io/epub-parser/file.Home.html
|
395
413
|
licenses:
|
396
414
|
- MIT
|
397
415
|
metadata: {}
|
@@ -411,7 +429,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
411
429
|
version: '0'
|
412
430
|
requirements: []
|
413
431
|
rubyforge_project:
|
414
|
-
rubygems_version: 2.7.
|
432
|
+
rubygems_version: 2.7.6
|
415
433
|
signing_key:
|
416
434
|
specification_version: 4
|
417
435
|
summary: EPUB 3 Parser
|
data/README.markdown
DELETED
@@ -1,219 +0,0 @@
|
|
1
|
-
EPUB Parser
|
2
|
-
===========
|
3
|
-
[![build status](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/build.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
4
|
-
[![Dependency Status](https://gemnasium.com/KitaitiMakoto/epub-parser.png)](https://gemnasium.com/KitaitiMakoto/epub-parser)
|
5
|
-
[![Gem Version](https://badge.fury.io/rb/epub-parser.svg)](http://badge.fury.io/rb/epub-parser)
|
6
|
-
[![coverage report](https://gitlab.com/KitaitiMakoto/epub-parser/badges/master/coverage.svg)](https://gitlab.com/KitaitiMakoto/epub-parser/commits/master)
|
7
|
-
|
8
|
-
* [Homepage][homepage]
|
9
|
-
* [Documentation][rubydoc]
|
10
|
-
* [Source Code][]
|
11
|
-
|
12
|
-
[Source Code]: https://gitlab.com/KitaitiMakoto/epub-parser
|
13
|
-
|
14
|
-
INSTALLATION
|
15
|
-
-------
|
16
|
-
|
17
|
-
gem install epub-parser
|
18
|
-
|
19
|
-
USAGE
|
20
|
-
-----
|
21
|
-
|
22
|
-
### As a library
|
23
|
-
|
24
|
-
require 'epub/parser'
|
25
|
-
|
26
|
-
book = EPUB::Parser.parse('book.epub')
|
27
|
-
book.metadata.titles # => Array of EPUB::Publication::Package::Metadata::Title. Main title, subtitle, etc...
|
28
|
-
book.metadata.title # => Title string including all titles
|
29
|
-
book.metadata.creators # => Creators(authors)
|
30
|
-
book.each_page_on_spine do |page|
|
31
|
-
page.media_type # => "application/xhtml+xml"
|
32
|
-
page.entry_name # => "OPS/nav.xhtml" entry name in EPUB package(zip archive)
|
33
|
-
page.read # => raw content document
|
34
|
-
page.content_document.nokogiri # => Nokogiri::XML::Document. The same to Nokogiri.XML(page.read)
|
35
|
-
# do something more
|
36
|
-
# :
|
37
|
-
end
|
38
|
-
|
39
|
-
See document's {file:docs/Home.markdown} or [API Documentation][rubydoc] for more info.
|
40
|
-
|
41
|
-
[rubydoc]: http://www.rubydoc.info/gems/epub-parser
|
42
|
-
|
43
|
-
### `epubinfo` command-line tool
|
44
|
-
|
45
|
-
`epubinfo` tool extracts and shows the metadata of specified EPUB book.
|
46
|
-
|
47
|
-
$ epubinfo ~/Documebts/Books/build_awesome_command_line_applications_in_ruby.epub
|
48
|
-
Title: Build Awesome Command-Line Applications in Ruby (for KITAITI MAKOTO)
|
49
|
-
Identifiers: 978-1-934356-91-3
|
50
|
-
Titles: Build Awesome Command-Line Applications in Ruby (for KITAITI MAKOTO)
|
51
|
-
Languages: en
|
52
|
-
Contributors:
|
53
|
-
Coverages:
|
54
|
-
Creators: David Bryant Copeland
|
55
|
-
Dates:
|
56
|
-
Descriptions:
|
57
|
-
Formats:
|
58
|
-
Publishers: The Pragmatic Bookshelf, LLC (338304)
|
59
|
-
Relations:
|
60
|
-
Rights: Copyright © 2012 Pragmatic Programmers, LLC
|
61
|
-
Sources:
|
62
|
-
Subjects: Pragmatic Bookshelf
|
63
|
-
Types:
|
64
|
-
Unique identifier: 978-1-934356-91-3
|
65
|
-
Epub version: 2.0
|
66
|
-
|
67
|
-
See {file:docs/Epubinfo.markdown} for more info.
|
68
|
-
|
69
|
-
### `epub-open` command-line tool
|
70
|
-
|
71
|
-
`epub-open` tool provides interactive shell(IRB) which helps you research about EPUB book.
|
72
|
-
|
73
|
-
epub-open path/to/book.epub
|
74
|
-
|
75
|
-
IRB starts. `self` becomes the EPUB book and can access to methods of `EPUB`.
|
76
|
-
|
77
|
-
title
|
78
|
-
=> "Title of the book"
|
79
|
-
metadata.creators
|
80
|
-
=> [Author 1, Author2, ...]
|
81
|
-
resources.first.properties
|
82
|
-
=> #<Set: {"nav"}> # You know that first resource of this book is nav document
|
83
|
-
nav = resources.first
|
84
|
-
=> ...
|
85
|
-
nav.href
|
86
|
-
=> #<Addressable::URI:0x15ce350 URI:nav.xhtml>
|
87
|
-
nav.media_type
|
88
|
-
=> "application/xhtml+xml"
|
89
|
-
puts nav.read
|
90
|
-
<?xml version="1.0"?>
|
91
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
</html>
|
96
|
-
=> nil
|
97
|
-
exit # Enter "exit" when exit the session
|
98
|
-
|
99
|
-
See {file:docs/EpubOpen.markdown} for more info.
|
100
|
-
|
101
|
-
DOCUMENTATION
|
102
|
-
-------------
|
103
|
-
|
104
|
-
Documentation is available in [homepage][].
|
105
|
-
|
106
|
-
If you installed EPUB Parser by gem command, you can also generate documentaiton yourself([rubygems-yardoc][] gem is needed):
|
107
|
-
|
108
|
-
$ gem install epub-parser
|
109
|
-
$ gem yardoc epub-parser
|
110
|
-
...
|
111
|
-
Files: 33
|
112
|
-
Modules: 20 ( 20 undocumented)
|
113
|
-
Classes: 45 ( 44 undocumented)
|
114
|
-
Constants: 31 ( 31 undocumented)
|
115
|
-
Methods: 292 ( 88 undocumented)
|
116
|
-
52.84% documented
|
117
|
-
YARD documentation is generated to:
|
118
|
-
/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc
|
119
|
-
|
120
|
-
It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc` here) at the end.
|
121
|
-
|
122
|
-
Or, generating by yardoc command is possible, too:
|
123
|
-
|
124
|
-
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
125
|
-
$ cd epub-parser
|
126
|
-
$ bundle install --path=deps
|
127
|
-
$ bundle exec rake doc:yard
|
128
|
-
...
|
129
|
-
Files: 33
|
130
|
-
Modules: 20 ( 20 undocumented)
|
131
|
-
Classes: 45 ( 44 undocumented)
|
132
|
-
Constants: 31 ( 31 undocumented)
|
133
|
-
Methods: 292 ( 88 undocumented)
|
134
|
-
52.84% documented
|
135
|
-
|
136
|
-
Then documentation will be available in `doc` directory.
|
137
|
-
|
138
|
-
[homepage]: http://www.rubydoc.info/gems/epub-parser/file/docs/Home.markdown
|
139
|
-
[rubygems-yardoc]: https://rubygems.org/gems/rubygems-yardoc
|
140
|
-
|
141
|
-
REQUIREMENTS
|
142
|
-
------------
|
143
|
-
* Ruby 2.3.0 or later
|
144
|
-
* `patch` command to install Nokogiri
|
145
|
-
* C compiler to compile Nokogiri
|
146
|
-
|
147
|
-
SIMILAR EFFORTS
|
148
|
-
---------------
|
149
|
-
* [gepub](https://github.com/skoji/gepub) - a generic EPUB library for Ruby
|
150
|
-
* [epubinfo](https://github.com/chdorner/epubinfo) - Extracts metadata information from EPUB files. Supports EPUB2 and EPUB3 formats.
|
151
|
-
* [ReVIEW](https://github.com/kmuto/review) - ReVIEW is a easy-to-use digital publishing system for books and ebooks.
|
152
|
-
* [epzip](https://github.com/takahashim/epzip) - epzip is EPUB packing tool. It's just only doing 'zip.' :)
|
153
|
-
* [eeepub](https://github.com/jugyo/eeepub) - EeePub is a Ruby ePub generator
|
154
|
-
* [epub-maker](https://gitlab.com/KitaitiMakoto/epub-maker) - This library supports making and editing EPUB books based on this EPUB Parser library
|
155
|
-
* [epub-cfi](https://gitlab.com/KitaitiMakoto/epub-cfi) - EPUB CFI library extracted this EPUB Parser library.
|
156
|
-
|
157
|
-
If you find other gems, please tell me or request a pull request.
|
158
|
-
|
159
|
-
RECENT CHANGES
|
160
|
-
--------------
|
161
|
-
|
162
|
-
### 0.3.6
|
163
|
-
|
164
|
-
* [BUG FIX]Ignore fragment when find item by relative IRI
|
165
|
-
* Disable [PrettyBacktrace][] by default
|
166
|
-
|
167
|
-
[PrettyBacktrace]: https://github.com/ko1/pretty_backtrace
|
168
|
-
|
169
|
-
|
170
|
-
### 0.3.5
|
171
|
-
|
172
|
-
* [BUG FIX]Fix a bug that {EPUB::ContentDocument::Navigation::Item#item} is `nil` when `href` includes double dots(`..`)(Thanks [aelkiss][]!)
|
173
|
-
|
174
|
-
### 0.3.4
|
175
|
-
|
176
|
-
* Add {EPUB::Publication::Package#full_path} and {EPUB::Publication::Package#rootfile}
|
177
|
-
* [BUG FIX]Fix a bug that {EPUB::ContentDocument::Navigation::Item#item} doesn't return correct {EPUB::Publication::Package::Manifest::Item Item}(Thanks [aelkiss][]!)
|
178
|
-
|
179
|
-
[aelkiss]: https://gitlab.com/aelkiss
|
180
|
-
|
181
|
-
### 0.3.3
|
182
|
-
|
183
|
-
* [BUG FIX]Use UnpackedURI adapter for URI
|
184
|
-
* [BREAKING CHANGE]Remove deprecated second argument from `EPUB::Parser::Publication#initialize`
|
185
|
-
* Add `detect_encoding` keyword argument to `Publication::Package::Manifest::Item#read` and `ContentDocument::XHTML#read`
|
186
|
-
|
187
|
-
See {file:CHANGELOG.markdown} for older changelogs and details.
|
188
|
-
|
189
|
-
TODOS
|
190
|
-
-----
|
191
|
-
* Consider to implement IRI feature instead of to use Addressable
|
192
|
-
* EPUB 3.0.1
|
193
|
-
* EPUB 3.1
|
194
|
-
* Help features for `epub-open` tool
|
195
|
-
* Vocabulary Association Mechanisms
|
196
|
-
* Implementing navigation document and so on
|
197
|
-
* Media Overlays
|
198
|
-
* Content Document
|
199
|
-
* Digital Signature
|
200
|
-
* Using SAX on parsing
|
201
|
-
* Abstraction of XML parser(making it possible to use REXML, standard bundled XML library of Ruby)
|
202
|
-
* Handle with encodings other than UTF-8
|
203
|
-
|
204
|
-
DONE
|
205
|
-
----
|
206
|
-
* Simple inspect for `epub-open` tool
|
207
|
-
* Using zip library instead of `unzip` command, which has security issue
|
208
|
-
* Modify methods around fallback to see `bindings` element in the package
|
209
|
-
* Content Document(only for Navigation Documents)
|
210
|
-
* Fixed Layout
|
211
|
-
* Vocabulary Association Mechanisms(only for itemref)
|
212
|
-
* Archive library abstraction
|
213
|
-
* Extracting and organizing common behavior from some classes to modules
|
214
|
-
* Multiple rootfiles
|
215
|
-
|
216
|
-
LICENSE
|
217
|
-
-------
|
218
|
-
This library is distribuetd under the term of the MIT License.
|
219
|
-
See MIT-LICENSE file for more info.
|
data/docs/Home.markdown
DELETED
@@ -1,196 +0,0 @@
|
|
1
|
-
EPUB Parser
|
2
|
-
===========
|
3
|
-
|
4
|
-
EPUB Parser gem parses EPUB 3 book loosely.
|
5
|
-
|
6
|
-
Installation
|
7
|
-
------------
|
8
|
-
|
9
|
-
gem install epub-parser
|
10
|
-
|
11
|
-
Usage
|
12
|
-
-----
|
13
|
-
|
14
|
-
### As command-line tools
|
15
|
-
|
16
|
-
#### epubinfo
|
17
|
-
|
18
|
-
`epubinfo` tool extracts and shows the metadata of specified EPUB book.
|
19
|
-
|
20
|
-
See {file:docs/Epubinfo.markdown}.
|
21
|
-
|
22
|
-
#### epub-open
|
23
|
-
|
24
|
-
`epub-open` tool provides interactive shell(IRB) which helps you research about EPUB book.
|
25
|
-
|
26
|
-
See {file:docs/EpubOpen.markdown}.
|
27
|
-
|
28
|
-
### As a library
|
29
|
-
|
30
|
-
Use `EPUB::Parser.parse` at first:
|
31
|
-
|
32
|
-
require 'epub/parser'
|
33
|
-
|
34
|
-
book = EPUB::Parser.parse('/path/to/book.epub')
|
35
|
-
|
36
|
-
This book object can yield page by spine's order(spine defines the order to read that the author determines):
|
37
|
-
|
38
|
-
book.each_page_on_spine do |page|
|
39
|
-
# do something...
|
40
|
-
end
|
41
|
-
|
42
|
-
`page` above is an {EPUB::Publication::Package::Manifest::Item} object and you can call {EPUB::Publication::Package::Manifest::Item#href #href} to see where is the page file:
|
43
|
-
|
44
|
-
book.each_page_on_spine do |page|
|
45
|
-
file = page.href # => path/to/page/in/zip/archive
|
46
|
-
html = Zip::Archive.open('/path/to/book.epub') {|zip|
|
47
|
-
zip.fopen(file.to_s) {|file| file.read}
|
48
|
-
}
|
49
|
-
end
|
50
|
-
|
51
|
-
And {EPUB::Publication::Package::Manifest::Item Item} provides syntax suger {EPUB::Publication::Package::Manifest::Item#read #read} for above:
|
52
|
-
|
53
|
-
html = page.read
|
54
|
-
doc = Nokogiri.HTML(html)
|
55
|
-
# do something with Nokogiri as always
|
56
|
-
|
57
|
-
For several utilities of Item, see {file:docs/Item.markdown} page.
|
58
|
-
|
59
|
-
By the way, although `book` above is a {EPUB::Book} object, all features are provided by {EPUB::Book::Features} module. Therefore YourBook class can include the features of {EPUB::Book::Features}:
|
60
|
-
|
61
|
-
require 'epub'
|
62
|
-
|
63
|
-
class YourBook < ActiveRecord::Base
|
64
|
-
include EPUB::Book::Features
|
65
|
-
end
|
66
|
-
|
67
|
-
book = EPUB::Parser.parse(
|
68
|
-
'uploaded-book.epub',
|
69
|
-
:class => YourBook # *************** pass YourBook class
|
70
|
-
)
|
71
|
-
book.instance_of? YourBook # => true
|
72
|
-
book.required = 'value for required field'
|
73
|
-
book.save!
|
74
|
-
book.each_page_on_spine do |epage|
|
75
|
-
page = YouBookPage.create(
|
76
|
-
:some_attr => 'some attr',
|
77
|
-
:content => epage.read,
|
78
|
-
:another_attr => 'another attr'
|
79
|
-
)
|
80
|
-
book.pages << page
|
81
|
-
end
|
82
|
-
|
83
|
-
You are also able to find YourBook object for the first:
|
84
|
-
|
85
|
-
book = YourBook.find params[:id]
|
86
|
-
ret = EPUB::Parser.parse(
|
87
|
-
'uploaded-book.epub',
|
88
|
-
:book => book # ******************* pass your book instance
|
89
|
-
) # => book
|
90
|
-
ret == book # => true; this API is not good I feel... Welcome suggestion!
|
91
|
-
# do something with your book
|
92
|
-
|
93
|
-
#### Switching ZIP library
|
94
|
-
|
95
|
-
EPUB Parser uses [Archive::Zip][], a pure Ruby ZIP library, by default. You can use [Zip/Ruby][], a Ruby bindings for [libzip][] if you have already installed Zip/Ruby gem by RubyGems or Bundler.
|
96
|
-
|
97
|
-
Globally:
|
98
|
-
|
99
|
-
EPUB::OCF::PhysicalContainer.adapter = :Zipruby
|
100
|
-
book = EPUB::Parser.parse("path/to/book.epub")
|
101
|
-
|
102
|
-
For each EPUB book:
|
103
|
-
|
104
|
-
book = EPUB::Parser.parse("path/to/book.epub", container_adapter: :Zipruby)
|
105
|
-
|
106
|
-
[Archive::Zip]: https://github.com/javanthropus/archive-zip
|
107
|
-
[Zip/Ruby]: https://bitbucket.org/winebarrel/zip-ruby/wiki/Home
|
108
|
-
[libzip]: https://nih.at/libzip/
|
109
|
-
|
110
|
-
Documentation
|
111
|
-
-------------
|
112
|
-
|
113
|
-
More documentations are avaiable in:
|
114
|
-
|
115
|
-
* {file:docs/Publication.markdown}
|
116
|
-
* {file:docs/Item.markdown}
|
117
|
-
* {file:docs/FixedLayout.markdown}
|
118
|
-
* {file:docs/Navigation.markdown}
|
119
|
-
* {file:docs/Searcher.markdown}
|
120
|
-
* {file:docs/UnpackedArchive.markdown}
|
121
|
-
* {file:docs/AggregateContentsFromWeb.markdown}
|
122
|
-
* {file:docs/MultipleRenditions.markdown}
|
123
|
-
|
124
|
-
If you installed EPUB Parser via gem command, you can also generate documentaiton by your own([rubygems-yardoc][] gem is needed):
|
125
|
-
|
126
|
-
$ gem install epub-parser
|
127
|
-
$ gem yardoc epub-parser
|
128
|
-
...
|
129
|
-
Files: 33
|
130
|
-
Modules: 20 ( 20 undocumented)
|
131
|
-
Classes: 45 ( 44 undocumented)
|
132
|
-
Constants: 31 ( 31 undocumented)
|
133
|
-
Methods: 292 ( 88 undocumented)
|
134
|
-
52.84% documented
|
135
|
-
YARD documentation is generated to:
|
136
|
-
/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc
|
137
|
-
|
138
|
-
It will show you path to generated documentation(`/path/to/gempath/ruby/2.2.0/doc/epub-parser-0.2.0/yardoc` here) at the end.
|
139
|
-
|
140
|
-
Or, generating yardoc command is possible, too:
|
141
|
-
|
142
|
-
$ git clone https://gitlab.com/KitaitiMakoto/epub-parser.git
|
143
|
-
$ cd epub-parser
|
144
|
-
$ bundle install --path=deps
|
145
|
-
$ bundle exec rake doc:yard
|
146
|
-
...
|
147
|
-
Files: 33
|
148
|
-
Modules: 20 ( 20 undocumented)
|
149
|
-
Classes: 45 ( 44 undocumented)
|
150
|
-
Constants: 31 ( 31 undocumented)
|
151
|
-
Methods: 292 ( 88 undocumented)
|
152
|
-
52.84% documented
|
153
|
-
|
154
|
-
Then documentation will be available in `doc` directory.
|
155
|
-
|
156
|
-
[homepage]: http://www.rubydoc.info/gems/epub-parser/file/docs/Home.markdown
|
157
|
-
[rubygems-yardoc]: https://rubygems.org/gems/rubygems-yardoc
|
158
|
-
|
159
|
-
Requirements
|
160
|
-
------------
|
161
|
-
|
162
|
-
* Ruby 2.2.0 or later
|
163
|
-
* `patch` command to install Nokogiri
|
164
|
-
* C compiler to compile Zip/Ruby and Nokogiri
|
165
|
-
|
166
|
-
History
|
167
|
-
-------
|
168
|
-
|
169
|
-
See {file:CHANGELOG.markdown}.
|
170
|
-
|
171
|
-
Note
|
172
|
-
----
|
173
|
-
|
174
|
-
This library is still in work.
|
175
|
-
Only a few features are implemented and APIs might be changed in the future.
|
176
|
-
Note that.
|
177
|
-
|
178
|
-
Currently implemented:
|
179
|
-
|
180
|
-
* container.xml of [EPUB Open Container Format (OCF) 3.0][]
|
181
|
-
* [EPUB Publications 3.0][]
|
182
|
-
* EPUB Navigation Documents of [EPUB Content Documents 3.0][]
|
183
|
-
* [EPUB 3 Fixed-Layout Documents][]
|
184
|
-
* metadata.xml of [EPUB Multiple-Rendition Publications][]
|
185
|
-
|
186
|
-
[EPUB Open Container Format (OCF) 3.0]:http://idpf.org/epub/30/spec/epub30-ocf.html#sec-container-metainf-container.xml
|
187
|
-
[EPUB Publications 3.0]:http://idpf.org/epub/30/spec/epub30-publications.html
|
188
|
-
[EPUB Content Documents 3.0]:http://www.idpf.org/epub/30/spec/epub30-contentdocs.html
|
189
|
-
[EPUB 3 Fixed-Layout Documents]:http://www.idpf.org/epub/fxl/
|
190
|
-
[EPUB Multiple-Rendition Publications]: http://www.idpf.org/epub/renditions/multiple/
|
191
|
-
|
192
|
-
License
|
193
|
-
-------
|
194
|
-
|
195
|
-
This library is distributed under the term of the MIT Licence.
|
196
|
-
See {file:MIT-LICENSE} file for more info.
|