epub-reader 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rbenv-gemsets +1 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +7 -0
- data/README.md +65 -0
- data/Rakefile +1 -0
- data/epub-reader.gemspec +23 -0
- data/lib/epub-reader.rb +14 -0
- data/lib/epub-reader/container.rb +35 -0
- data/lib/epub-reader/epubfile.rb +7 -0
- data/lib/epub-reader/package.rb +255 -0
- data/lib/epub-reader/page.rb +26 -0
- data/lib/epub-reader/reader.rb +118 -0
- data/lib/epub-reader/toc.rb +130 -0
- data/lib/epub-reader/version.rb +5 -0
- data/spec/container_spec.rb +22 -0
- data/spec/data/invalid.epub +0 -0
- data/spec/data/valid.epub +0 -0
- data/spec/package_spec.rb +101 -0
- data/spec/page_spec.rb +24 -0
- data/spec/reader_spec.rb +61 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/toc_spec.rb +28 -0
- data/teste.rb +9 -0
- metadata +97 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 310b2d309ca3ea509ad9c56afdcbaf53b1a7bc9a
|
4
|
+
data.tar.gz: a4ae809a633232d45737abd0363e954709bffd04
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a87eb9670787d8411fb22d0971b7e8e1bcaf4e5694fc814f2991faac2cd94521fffe87b81ca3ebe3cce526bd85728cab34985a469fc7e6d24efdd01e5c3ba9eb
|
7
|
+
data.tar.gz: d7e1a1cc73c277912c119522535fdf34e77e0b0365da5cbf39af553995d9c06b8ad5f1e6edc8e5bc81d752695e32b7a67ce16667e3711a9dd613909def788b99
|
data/.gitignore
ADDED
data/.rbenv-gemsets
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
epub-reader
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0-p247
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
EPUB Reader is a Ruby library which helps you to parse EPUB files conforming
|
4
|
+
as much as possible the specification from IDPF.
|
5
|
+
|
6
|
+
# Installation
|
7
|
+
|
8
|
+
The recommended installation method is via Rubygems.
|
9
|
+
|
10
|
+
gem install epub-reader
|
11
|
+
|
12
|
+
# Usage
|
13
|
+
|
14
|
+
Begin by creating a Epub::Reader instance that points to a EPUB file. Document
|
15
|
+
level information (metadata, toc, page count, etc) is available via this object.
|
16
|
+
|
17
|
+
reader = Epub::Reader.open("somefile.epub")
|
18
|
+
puts reader.epub_version
|
19
|
+
puts reader.title
|
20
|
+
puts reader.author
|
21
|
+
puts reader.publication_date
|
22
|
+
puts reader.language
|
23
|
+
reader.pages.each do |page|
|
24
|
+
puts page.title
|
25
|
+
puts page.content
|
26
|
+
end
|
27
|
+
|
28
|
+
# Exceptions
|
29
|
+
|
30
|
+
There are two key exceptions that you will need to watch out for when processing a
|
31
|
+
EPUB file:
|
32
|
+
|
33
|
+
FileNotFoundError - The argument passed to Epub::Reader.open('file.epub') is a file
|
34
|
+
path. If the file does not exist the FileNotFoundError is thrown.
|
35
|
+
|
36
|
+
MalformedEpubError - The EPUB appears to be corrupt in some way. If you believe the
|
37
|
+
file should be valid, or that a corrupt file didn't raise an exception, please
|
38
|
+
forward a copy of the file to the maintainers using the Bitbucket issue tracker
|
39
|
+
and we will attempt to improve the code.
|
40
|
+
|
41
|
+
MalformedEpubError has some subclasses if you want to detect finer grained issues. If you
|
42
|
+
don't, 'rescue MalformedEpubError' will catch all the subclassed errors as well.
|
43
|
+
|
44
|
+
Any other exceptions should be considered bugs in either Epub::Reader (please
|
45
|
+
report it!).
|
46
|
+
|
47
|
+
# Mantainers
|
48
|
+
|
49
|
+
- Fernando Almeida <fernando@fernandoalmeida.net>
|
50
|
+
|
51
|
+
# Licensing
|
52
|
+
|
53
|
+
This is a proprietary library and all rights are reserved to eBookPlus.com.
|
54
|
+
|
55
|
+
# References
|
56
|
+
|
57
|
+
[What is EPUB 3?](http://shop.oreilly.com/product/0636920022442.do)
|
58
|
+
|
59
|
+
[EPUB Publications Specifications](http://idpf.org/epub/30/spec/epub30-publications.html)
|
60
|
+
|
61
|
+
[EPUB Content Documents Specifications](http://idpf.org/epub/30/spec/epub30-contentdocs.html)
|
62
|
+
|
63
|
+
[EPUB Open Container Formats Specifications](http://idpf.org/epub/30/spec/epub30-ocf.html)
|
64
|
+
|
65
|
+
[Shared Workspace for Emerging Specifications and Schemas for EPUB 3](http://code.google.com/p/epub-revision/downloads/list)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/epub-reader.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "epub-reader/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "epub-reader"
|
7
|
+
s.version = Epub::Reader::VERSION
|
8
|
+
s.authors = ["Fernando Almeida"]
|
9
|
+
s.email = ["fernando@fernandoalmeida.net"]
|
10
|
+
s.homepage = "http://bitbucket.com/fernandoalmeida/epub-reader"
|
11
|
+
s.summary = "A library for accessing the content of EPUB files"
|
12
|
+
s.description = "The epub-reader library implements a EPUB parser conforming as much as possible to the EPUB 3 specification from IDPF"
|
13
|
+
|
14
|
+
s.rubyforge_project = "epub-reader"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_dependency('rubyzip')
|
22
|
+
s.add_dependency('nokogiri')
|
23
|
+
end
|
data/lib/epub-reader.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'zip/zipfilesystem'
|
2
|
+
require 'nokogiri'
|
3
|
+
require "epub-reader/version"
|
4
|
+
require "epub-reader/reader"
|
5
|
+
require "epub-reader/epubfile"
|
6
|
+
require "epub-reader/container"
|
7
|
+
require "epub-reader/package"
|
8
|
+
require "epub-reader/toc"
|
9
|
+
require "epub-reader/page"
|
10
|
+
|
11
|
+
module Epub
|
12
|
+
class FileNotFoundError < StandardError; end
|
13
|
+
class MalformedFileError < StandardError; end
|
14
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Epub
|
2
|
+
class Container
|
3
|
+
|
4
|
+
attr_reader :packages
|
5
|
+
|
6
|
+
def initialize(reader)
|
7
|
+
@reader = reader
|
8
|
+
@container = get_container_content
|
9
|
+
@xml = Nokogiri::XML(@container)
|
10
|
+
@packages = []
|
11
|
+
@xml.css('container rootfiles rootfile').each do |rootfile|
|
12
|
+
@packages << Package.new(rootfile, @reader.file)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def raw
|
17
|
+
@container.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def package(index = 0)
|
21
|
+
@packages[index]
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def get_container_content
|
27
|
+
begin
|
28
|
+
@reader.file.get_input_stream('META-INF/container.xml').read
|
29
|
+
rescue
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,255 @@
|
|
1
|
+
module Epub
|
2
|
+
class Package
|
3
|
+
|
4
|
+
def initialize(rootfile, file)
|
5
|
+
@rootfile = rootfile
|
6
|
+
@file = file
|
7
|
+
@package = get_package_content(file)
|
8
|
+
@xml = Nokogiri::XML(@package).remove_namespaces!
|
9
|
+
end
|
10
|
+
|
11
|
+
def raw
|
12
|
+
@package.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
def path
|
16
|
+
@rootfile.attr('full-path').to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
def relative_content_path
|
20
|
+
i = path.rindex('/').to_i
|
21
|
+
i > 0 ? path[0,i+1] : ""
|
22
|
+
end
|
23
|
+
|
24
|
+
def mediatype
|
25
|
+
@rootfile.attr('media-type')
|
26
|
+
end
|
27
|
+
|
28
|
+
def version
|
29
|
+
root.attr('version').to_s.to_i
|
30
|
+
end
|
31
|
+
|
32
|
+
def identifier
|
33
|
+
identifiers.css("[id=#{unique_identifier}]").text
|
34
|
+
end
|
35
|
+
|
36
|
+
# TODO: identify language
|
37
|
+
# TODO: identify subtitles
|
38
|
+
def title
|
39
|
+
titles.first.text
|
40
|
+
end
|
41
|
+
|
42
|
+
def language
|
43
|
+
languages.first.text
|
44
|
+
end
|
45
|
+
|
46
|
+
# TODO: identify role
|
47
|
+
# TODO: identify file-as
|
48
|
+
# TODO: identify alternate-script
|
49
|
+
# TODO: identify display-seq
|
50
|
+
def creator
|
51
|
+
creators.size > 0 ? creators.first.text : ""
|
52
|
+
end
|
53
|
+
|
54
|
+
# TODO: equal to creator
|
55
|
+
def contributor
|
56
|
+
contributors.size > 0 ? contributors.first.text : ""
|
57
|
+
end
|
58
|
+
|
59
|
+
def date
|
60
|
+
d = metadata.css('data')
|
61
|
+
d.size > 0 ? d.text : ""
|
62
|
+
end
|
63
|
+
|
64
|
+
def source
|
65
|
+
s = metadata.css('source')
|
66
|
+
s.size > 0 ? s.text : ""
|
67
|
+
end
|
68
|
+
|
69
|
+
def type
|
70
|
+
t = metadata.css('type')
|
71
|
+
t.size > 0 ? t.text : ""
|
72
|
+
end
|
73
|
+
|
74
|
+
def resources
|
75
|
+
manifest.css('item')
|
76
|
+
end
|
77
|
+
|
78
|
+
def images
|
79
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/^image\/(gif|jpeg|svg\+xml)/)}
|
80
|
+
end
|
81
|
+
|
82
|
+
def html
|
83
|
+
resources.css('[media-type="application/xhtml+xml"]')
|
84
|
+
end
|
85
|
+
|
86
|
+
def stylesheets
|
87
|
+
resources.css('[media-type="text/css"]')
|
88
|
+
end
|
89
|
+
|
90
|
+
def javascripts
|
91
|
+
resources.css('[media-type="text/javascript"]')
|
92
|
+
end
|
93
|
+
|
94
|
+
def fonts
|
95
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/application\/(vnd\.ms-opentype|font-woff)/)}
|
96
|
+
end
|
97
|
+
|
98
|
+
def audios
|
99
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/^audio\/(mpeg|mp4)/)}
|
100
|
+
end
|
101
|
+
|
102
|
+
def toc
|
103
|
+
toc_item_id = spine.attr("toc")
|
104
|
+
toc_item_mimetype = "application/x-dtbncx+xml"
|
105
|
+
toc_item_selector = toc_item_id ? "##{toc_item_id.to_s}" : '[media-type="#{toc_item_mimetype}"]'
|
106
|
+
relative_content_path + resources.css(toc_item_selector).attr('href').to_s
|
107
|
+
end
|
108
|
+
|
109
|
+
def cover
|
110
|
+
begin
|
111
|
+
cover_meta = metadata.css('[name="cover"]')
|
112
|
+
meta_content = cover_meta.size == 1 ? cover_meta.attr('content') : nil
|
113
|
+
cover_content = meta_content || manifest.css('[properties="cover-image"]').attr('id').to_s
|
114
|
+
cover_path = (cover_content.to_s.match(/\.(gif|jpe?g|png)/) ? cover_content : resources.css("##{cover_content}").attr('href').to_s)
|
115
|
+
if cover_exist?(relative_content_path + cover_path)
|
116
|
+
relative_content_path + cover_path
|
117
|
+
elsif cover_exist?(relative_content_path + "Images/" + cover_path)
|
118
|
+
relative_content_path + "Images/" + cover_path
|
119
|
+
else
|
120
|
+
""
|
121
|
+
end
|
122
|
+
rescue
|
123
|
+
""
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# TODO: to parse
|
128
|
+
# guide [optional/deprecated]
|
129
|
+
# bindings [optional]
|
130
|
+
|
131
|
+
def reading_order
|
132
|
+
spine_items.map do |item|
|
133
|
+
item_id = item.attr('idref').to_s
|
134
|
+
manifest.css("##{item_id}") if item_id
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
protected
|
139
|
+
|
140
|
+
def spine_items
|
141
|
+
spine.css('itemref')
|
142
|
+
end
|
143
|
+
|
144
|
+
def get_package_content(file)
|
145
|
+
begin
|
146
|
+
file.get_input_stream(path)
|
147
|
+
rescue
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def cover_exist?(path)
|
153
|
+
begin
|
154
|
+
!!@file.find_entry(path)
|
155
|
+
rescue
|
156
|
+
false
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def root
|
161
|
+
@xml.css('package')
|
162
|
+
end
|
163
|
+
|
164
|
+
def unique_identifier
|
165
|
+
root.attr('unique-identifier').to_s
|
166
|
+
end
|
167
|
+
|
168
|
+
def prefix
|
169
|
+
root.attr('prefix').to_s
|
170
|
+
end
|
171
|
+
|
172
|
+
def lang
|
173
|
+
root.attr('xml:lang').to_s
|
174
|
+
end
|
175
|
+
|
176
|
+
def dir
|
177
|
+
(spine.attr('page-progression-direction') || root.attr('dir')).to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
def id
|
181
|
+
root.attr('id').to_s
|
182
|
+
end
|
183
|
+
|
184
|
+
# TODO: to do parse of
|
185
|
+
# DCMES Optional Elements [0 or more]
|
186
|
+
# contributor
|
187
|
+
# coverage
|
188
|
+
# creator
|
189
|
+
# date
|
190
|
+
# description
|
191
|
+
# format
|
192
|
+
# publisher
|
193
|
+
# relation
|
194
|
+
# rights
|
195
|
+
# source
|
196
|
+
# subject
|
197
|
+
# type
|
198
|
+
# meta [1 or more]
|
199
|
+
# OPF2 meta [0 or more]
|
200
|
+
# link [0 or more]
|
201
|
+
|
202
|
+
############
|
203
|
+
# Metadata #
|
204
|
+
############
|
205
|
+
def metadata
|
206
|
+
root.css('metadata')
|
207
|
+
end
|
208
|
+
|
209
|
+
def identifiers
|
210
|
+
metadata.css('identifier')
|
211
|
+
end
|
212
|
+
|
213
|
+
def titles
|
214
|
+
metadata.css('title')
|
215
|
+
end
|
216
|
+
|
217
|
+
def languages
|
218
|
+
metadata.css('language')
|
219
|
+
end
|
220
|
+
|
221
|
+
def creators
|
222
|
+
metadata.css('creator')
|
223
|
+
end
|
224
|
+
|
225
|
+
def contributors
|
226
|
+
metadata.css('contributor')
|
227
|
+
end
|
228
|
+
|
229
|
+
def meta
|
230
|
+
metadata.css('meta')
|
231
|
+
end
|
232
|
+
|
233
|
+
def link
|
234
|
+
metadata.css('link')
|
235
|
+
end
|
236
|
+
|
237
|
+
############
|
238
|
+
# Manifest #
|
239
|
+
############
|
240
|
+
def manifest
|
241
|
+
root.css('manifest')
|
242
|
+
end
|
243
|
+
|
244
|
+
############
|
245
|
+
# Spine #
|
246
|
+
############
|
247
|
+
def spine
|
248
|
+
root.css('spine')
|
249
|
+
end
|
250
|
+
|
251
|
+
def reading_order_selectors
|
252
|
+
reading_order.map{|item| "##{item.attr('idref')}"}
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Epub
|
2
|
+
class Page
|
3
|
+
def initialize(title, path, file)
|
4
|
+
@title = title
|
5
|
+
@path = path
|
6
|
+
@file = file
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :title, :path
|
10
|
+
|
11
|
+
def content
|
12
|
+
@content ||= get_page_content
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def get_page_content
|
18
|
+
begin
|
19
|
+
@file.get_input_stream(@path).read.force_encoding(Encoding::UTF_8)
|
20
|
+
rescue
|
21
|
+
""
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Epub
|
2
|
+
class Reader
|
3
|
+
|
4
|
+
EPUB_MIMETYPE = "application/epub+zip"
|
5
|
+
PACKAGE_MEDIATYPE = "application/oebps-package+xml"
|
6
|
+
|
7
|
+
attr_reader :filepath, :file
|
8
|
+
|
9
|
+
def initialize(f)
|
10
|
+
raise(FileNotFoundError, "File not found") unless File.exists?(f)
|
11
|
+
@filepath = f.to_s
|
12
|
+
@file = EpubFile.new(f)
|
13
|
+
raise(MalformedFileError, "Invalid EPUB file format") unless valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def Reader.open(f)
|
17
|
+
reader = Reader.new(f)
|
18
|
+
if block_given?
|
19
|
+
yield reader
|
20
|
+
else
|
21
|
+
reader
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def mimetype
|
26
|
+
@mimetype ||= begin
|
27
|
+
file.get_input_stream('mimetype').read
|
28
|
+
rescue
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def epub_version
|
34
|
+
@version ||= package.version
|
35
|
+
end
|
36
|
+
|
37
|
+
def uid
|
38
|
+
@uid ||= package.identifier
|
39
|
+
end
|
40
|
+
|
41
|
+
def title
|
42
|
+
@title ||= package.title
|
43
|
+
end
|
44
|
+
|
45
|
+
def author
|
46
|
+
@author ||= package.creator
|
47
|
+
end
|
48
|
+
|
49
|
+
def publication_date
|
50
|
+
@publication_date ||= package.date
|
51
|
+
end
|
52
|
+
|
53
|
+
def language
|
54
|
+
@language ||= package.language
|
55
|
+
end
|
56
|
+
|
57
|
+
def toc
|
58
|
+
@toc ||= Toc.new(package.toc, self)
|
59
|
+
end
|
60
|
+
|
61
|
+
def pages
|
62
|
+
@pages ||= toc.pages
|
63
|
+
end
|
64
|
+
|
65
|
+
def container
|
66
|
+
@container ||= Container.new(self)
|
67
|
+
end
|
68
|
+
|
69
|
+
def cover
|
70
|
+
@cover ||= package.cover
|
71
|
+
end
|
72
|
+
|
73
|
+
# TODO: To parse other META-INF files
|
74
|
+
# signatures.xml [optional]
|
75
|
+
# Contains digital signatures for various assets.
|
76
|
+
|
77
|
+
# encryption.xml [optional]
|
78
|
+
# Contains information about the encryption of Publication resources. (This file is required if font obfuscation is used.)
|
79
|
+
|
80
|
+
# metadata.xml [optional]
|
81
|
+
# Used to store metadata about the container.
|
82
|
+
|
83
|
+
# rights.xml [optional]
|
84
|
+
# Used to store information about digital rights.
|
85
|
+
|
86
|
+
# manifest.xml [allowed]
|
87
|
+
# A manifest of container contents as allowed by Open Document Format [ODF].
|
88
|
+
|
89
|
+
# Convenient method
|
90
|
+
def package(index = 0)
|
91
|
+
container.package(index)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def valid?
|
97
|
+
valid_mimetype? && valid_container? && valid_package? && valid_toc?
|
98
|
+
end
|
99
|
+
|
100
|
+
def valid_mimetype?
|
101
|
+
/application\/epub\+zip/.match(mimetype)
|
102
|
+
end
|
103
|
+
|
104
|
+
def valid_container?
|
105
|
+
!container.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
def valid_package?
|
109
|
+
package.path.match(/\.opf$/) && package.mediatype == PACKAGE_MEDIATYPE
|
110
|
+
end
|
111
|
+
|
112
|
+
# TODO: validates TOC
|
113
|
+
def valid_toc?
|
114
|
+
true
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Epub
|
2
|
+
class Toc
|
3
|
+
|
4
|
+
def initialize(tocfile, reader)
|
5
|
+
@tocfile = tocfile
|
6
|
+
@reader = reader
|
7
|
+
@file = @reader.file
|
8
|
+
@content = get_toc_content
|
9
|
+
@xml = Nokogiri::XML(@content).remove_namespaces!
|
10
|
+
end
|
11
|
+
|
12
|
+
def content
|
13
|
+
if ncx?
|
14
|
+
if has_toc?
|
15
|
+
ncx_to_html
|
16
|
+
else
|
17
|
+
spine_to_html
|
18
|
+
end
|
19
|
+
else
|
20
|
+
@content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def pages
|
25
|
+
points = @xml.css("ncx navMap navPoint")
|
26
|
+
items = @reader.package.reading_order
|
27
|
+
if ncx? && has_toc? && points.size > 1
|
28
|
+
points.map do |point|
|
29
|
+
title = point.css('navLabel > text').first.text
|
30
|
+
file_path = @reader.package.relative_content_path + point.css('content').attr('src').to_s
|
31
|
+
Page.new(title, file_path, @reader.file)
|
32
|
+
end
|
33
|
+
else
|
34
|
+
items.map do |item|
|
35
|
+
title = ""
|
36
|
+
file_path = @reader.package.relative_content_path + item.attr('href').to_s
|
37
|
+
Page.new(title, file_path, @reader.file)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def ncx?
|
45
|
+
@tocfile.match(/(\.ncx)$/)
|
46
|
+
end
|
47
|
+
|
48
|
+
def has_toc?
|
49
|
+
@xml.css('navMap > navPoint').size > 0
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_toc_content
|
53
|
+
begin
|
54
|
+
@reader.file.get_input_stream(@tocfile).read
|
55
|
+
rescue
|
56
|
+
""
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO: Add Stylesheets
|
61
|
+
# TODO: Convert nested navigation
|
62
|
+
# TODO: Refactoring to DRY with spine_to_html
|
63
|
+
def ncx_to_html
|
64
|
+
html = <<EOF
|
65
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
66
|
+
<html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
|
67
|
+
<head>
|
68
|
+
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
|
69
|
+
<title>#{title}</title>
|
70
|
+
</head>
|
71
|
+
<body>
|
72
|
+
<section>
|
73
|
+
<nav id="toc" epub:type="toc">
|
74
|
+
<ol>
|
75
|
+
EOF
|
76
|
+
selector = "ncx > navMap > navPoint"
|
77
|
+
@xml.css(selector).each do |point|
|
78
|
+
html += "<li id=\"#{point.attr('id').to_s}\"><a href=\"#{point.css('content').attr('src').to_s}\">#{point.css('navLabel text').text}</a></li>"
|
79
|
+
end
|
80
|
+
html += <<EOF
|
81
|
+
</ol>
|
82
|
+
</nav>
|
83
|
+
</section>
|
84
|
+
</body>
|
85
|
+
</html>
|
86
|
+
EOF
|
87
|
+
html
|
88
|
+
end
|
89
|
+
|
90
|
+
def spine_to_html
|
91
|
+
html = <<EOF
|
92
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
93
|
+
<html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
|
94
|
+
<head>
|
95
|
+
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
|
96
|
+
<title>#{title}</title>
|
97
|
+
</head>
|
98
|
+
<body>
|
99
|
+
<section>
|
100
|
+
<nav id="toc" epub:type="toc">
|
101
|
+
<ol>
|
102
|
+
EOF
|
103
|
+
@reader.package.reading_order.each do |item|
|
104
|
+
link = item.attr('href').to_s
|
105
|
+
html += "<li id=\"#{item.attr('id').to_s}\"><a href=\"#{link}\">#{link[0,link.rindex('.')]}</a></li>"
|
106
|
+
end
|
107
|
+
html += <<EOF
|
108
|
+
</ol>
|
109
|
+
</nav>
|
110
|
+
</section>
|
111
|
+
</body>
|
112
|
+
</html>
|
113
|
+
EOF
|
114
|
+
html
|
115
|
+
end
|
116
|
+
|
117
|
+
def title
|
118
|
+
root.css('docTitle > text').text
|
119
|
+
end
|
120
|
+
|
121
|
+
def root
|
122
|
+
@xml.css('ncx')
|
123
|
+
end
|
124
|
+
|
125
|
+
def navmap
|
126
|
+
root.css('navMap')
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Container do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@epub = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'get raw content' do
|
11
|
+
@epub.container.raw.should_not be_empty
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'get package documents' do
|
15
|
+
@epub.container.packages.should_not be_empty
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'get default package document' do
|
19
|
+
@epub.container.package.should be_a(Epub::Package)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
Binary file
|
Binary file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Package do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@reader = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'get raw content' do
|
11
|
+
@reader.package.raw.should_not be_empty
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'get file path' do
|
15
|
+
@reader.package.path.should_not be_empty
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'get media type' do
|
19
|
+
@reader.package.mediatype.should eq("application/oebps-package+xml")
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'get the epub version' do
|
23
|
+
@reader.package.version.should eq(3)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'get the unique identifier' do
|
27
|
+
@reader.package.identifier.should eq("urn:isbn:9780316000000")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'get the content language' do
|
31
|
+
@reader.package.language.should eq("en-US")
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'get the content title' do
|
35
|
+
@reader.package.title.should eq("Moby-Dick")
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'get the content creator' do
|
39
|
+
@reader.package.creator.should eq("Herman Melville")
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'get the content contributor' do
|
43
|
+
@reader.package.contributor.should be_empty
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'get the publication date' do
|
47
|
+
@reader.package.date.should be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'get the publication source' do
|
51
|
+
@reader.package.source.should be_empty
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'get the content type' do
|
55
|
+
@reader.package.source.should be_empty
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'get the full resource list' do
|
59
|
+
@reader.package.resources.should_not be_empty
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'get the image list' do
|
63
|
+
@reader.package.images.size.should eq(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'get the html list' do
|
67
|
+
@reader.package.html.size.should eq(143)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'get the stylesheet list' do
|
71
|
+
@reader.package.stylesheets.size.should eq(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'get the javascript list' do
|
75
|
+
@reader.package.javascripts.should be_empty
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'get the font list' do
|
79
|
+
@reader.package.fonts.should be_empty
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'get the audio list' do
|
83
|
+
@reader.package.audios.should be_empty
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'get the table of content (toc)' do
|
87
|
+
@reader.package.toc.should eq("OPS/toc.ncx")
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'get the reading order' do
|
91
|
+
list = @reader.package.reading_order
|
92
|
+
list.size.should eq(142)
|
93
|
+
list[0].attr('href').to_s.should eq('cover.xhtml')
|
94
|
+
list[1].attr('href').to_s.should eq('titlepage.xhtml')
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'get the book cover' do
|
98
|
+
@reader.package.cover.should eq("OPS/images/9780316000000.jpg")
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
data/spec/page_spec.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Page do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
file = 'spec/data/valid.epub'
|
7
|
+
reader = Epub::Reader.open(file)
|
8
|
+
@toc = Epub::Toc.new(reader.package.toc, reader)
|
9
|
+
@page = @toc.pages.last
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'get page title' do
|
13
|
+
@page.title.should eq('Copyright Page')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'get page path' do
|
17
|
+
@page.path.should eq('OPS/copyright.xhtml')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'get page content' do
|
21
|
+
@page.content.should match('<html.*>')
|
22
|
+
@page.content.should match('Produced by Daniel Lazarus and Jonesey')
|
23
|
+
end
|
24
|
+
end
|
data/spec/reader_spec.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Reader do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@reader = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'open a epub file' do
|
11
|
+
@reader.should_not be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'raises an exception if file not found' do
|
15
|
+
lambda {Epub::Reader.open('not_found.epub')}.should raise_error
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'raises an exception if malformed file' do
|
19
|
+
lambda {Epub::Reader.open('spec/data/invalid.epub')}.should raise_error
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'get epub file path' do
|
23
|
+
@reader.filepath.should eq(@file)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'get epub mime type' do
|
27
|
+
@reader.mimetype.should eq("application/epub+zip")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'get the epub version' do
|
31
|
+
@reader.epub_version.should eq(3)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'get the epub unique identifier' do
|
35
|
+
@reader.uid.should eq("urn:isbn:9780316000000")
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'get the title' do
|
39
|
+
@reader.title.should eq("Moby-Dick")
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'get the author' do
|
43
|
+
@reader.author.should eq("Herman Melville")
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'get the publication date' do
|
47
|
+
@reader.publication_date.should be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'get the language' do
|
51
|
+
@reader.language.should eq("en-US")
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'get the TOC' do
|
55
|
+
@reader.toc.should be_a(Epub::Toc)
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'get the pages list' do
|
59
|
+
@reader.pages.size.should eq(142)
|
60
|
+
end
|
61
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/toc_spec.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Epub::Toc do
|
5
|
+
|
6
|
+
before(:all) do
|
7
|
+
file = 'spec/data/valid.epub'
|
8
|
+
@reader = Epub::Reader.open(file)
|
9
|
+
@toc = Epub::Toc.new(@reader.package.toc, @reader)
|
10
|
+
@html = Nokogiri::XML(@toc.content)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'convert <ncx> to <html>' do
|
14
|
+
@html.css('html').size.should eq(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'convert <docTitle> to <title>' do
|
18
|
+
@html.css('head > title').text.should eq("Moby-Dick")
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'convert <navMap> to <nav>' do
|
22
|
+
@html.css('nav').size.should eq(1)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'convert <navPoint> to <a>' do
|
26
|
+
@html.css('li > a').size.should eq(142)
|
27
|
+
end
|
28
|
+
end
|
data/teste.rb
ADDED
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: epub-reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.9
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fernando Almeida
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-09-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rubyzip
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: The epub-reader library implements a EPUB parser conforming as much as
|
42
|
+
possible to the EPUB 3 specification from IDPF
|
43
|
+
email:
|
44
|
+
- fernando@fernandoalmeida.net
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- .gitignore
|
50
|
+
- .rbenv-gemsets
|
51
|
+
- .rspec
|
52
|
+
- .ruby-version
|
53
|
+
- Gemfile
|
54
|
+
- README.md
|
55
|
+
- Rakefile
|
56
|
+
- epub-reader.gemspec
|
57
|
+
- lib/epub-reader.rb
|
58
|
+
- lib/epub-reader/container.rb
|
59
|
+
- lib/epub-reader/epubfile.rb
|
60
|
+
- lib/epub-reader/package.rb
|
61
|
+
- lib/epub-reader/page.rb
|
62
|
+
- lib/epub-reader/reader.rb
|
63
|
+
- lib/epub-reader/toc.rb
|
64
|
+
- lib/epub-reader/version.rb
|
65
|
+
- spec/container_spec.rb
|
66
|
+
- spec/data/invalid.epub
|
67
|
+
- spec/data/valid.epub
|
68
|
+
- spec/package_spec.rb
|
69
|
+
- spec/page_spec.rb
|
70
|
+
- spec/reader_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
- spec/toc_spec.rb
|
73
|
+
- teste.rb
|
74
|
+
homepage: http://bitbucket.com/fernandoalmeida/epub-reader
|
75
|
+
licenses: []
|
76
|
+
metadata: {}
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project: epub-reader
|
93
|
+
rubygems_version: 2.0.3
|
94
|
+
signing_key:
|
95
|
+
specification_version: 4
|
96
|
+
summary: A library for accessing the content of EPUB files
|
97
|
+
test_files: []
|