epub-reader 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rbenv-gemsets +1 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +7 -0
- data/README.md +65 -0
- data/Rakefile +1 -0
- data/epub-reader.gemspec +23 -0
- data/lib/epub-reader.rb +14 -0
- data/lib/epub-reader/container.rb +35 -0
- data/lib/epub-reader/epubfile.rb +7 -0
- data/lib/epub-reader/package.rb +255 -0
- data/lib/epub-reader/page.rb +26 -0
- data/lib/epub-reader/reader.rb +118 -0
- data/lib/epub-reader/toc.rb +130 -0
- data/lib/epub-reader/version.rb +5 -0
- data/spec/container_spec.rb +22 -0
- data/spec/data/invalid.epub +0 -0
- data/spec/data/valid.epub +0 -0
- data/spec/package_spec.rb +101 -0
- data/spec/page_spec.rb +24 -0
- data/spec/reader_spec.rb +61 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/toc_spec.rb +28 -0
- data/teste.rb +9 -0
- metadata +97 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 310b2d309ca3ea509ad9c56afdcbaf53b1a7bc9a
|
4
|
+
data.tar.gz: a4ae809a633232d45737abd0363e954709bffd04
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a87eb9670787d8411fb22d0971b7e8e1bcaf4e5694fc814f2991faac2cd94521fffe87b81ca3ebe3cce526bd85728cab34985a469fc7e6d24efdd01e5c3ba9eb
|
7
|
+
data.tar.gz: d7e1a1cc73c277912c119522535fdf34e77e0b0365da5cbf39af553995d9c06b8ad5f1e6edc8e5bc81d752695e32b7a67ce16667e3711a9dd613909def788b99
|
data/.gitignore
ADDED
data/.rbenv-gemsets
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
epub-reader
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0-p247
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Overview
|
2
|
+
|
3
|
+
EPUB Reader is a Ruby library which helps you to parse EPUB files conforming
|
4
|
+
as much as possible the specification from IDPF.
|
5
|
+
|
6
|
+
# Installation
|
7
|
+
|
8
|
+
The recommended installation method is via Rubygems.
|
9
|
+
|
10
|
+
gem install epub-reader
|
11
|
+
|
12
|
+
# Usage
|
13
|
+
|
14
|
+
Begin by creating a Epub::Reader instance that points to a EPUB file. Document
|
15
|
+
level information (metadata, toc, page count, etc) is available via this object.
|
16
|
+
|
17
|
+
reader = Epub::Reader.open("somefile.epub")
|
18
|
+
puts reader.epub_version
|
19
|
+
puts reader.title
|
20
|
+
puts reader.author
|
21
|
+
puts reader.publication_date
|
22
|
+
puts reader.language
|
23
|
+
reader.pages.each do |page|
|
24
|
+
puts page.title
|
25
|
+
puts page.content
|
26
|
+
end
|
27
|
+
|
28
|
+
# Exceptions
|
29
|
+
|
30
|
+
There are two key exceptions that you will need to watch out for when processing a
|
31
|
+
EPUB file:
|
32
|
+
|
33
|
+
FileNotFoundError - The argument passed to Epub::Reader.open('file.epub') is a file
|
34
|
+
path. If the file does not exist the FileNotFoundError is thrown.
|
35
|
+
|
36
|
+
MalformedEpubError - The EPUB appears to be corrupt in some way. If you believe the
|
37
|
+
file should be valid, or that a corrupt file didn't raise an exception, please
|
38
|
+
forward a copy of the file to the maintainers using the Bitbucket issue tracker
|
39
|
+
and we will attempt to improve the code.
|
40
|
+
|
41
|
+
MalformedEpubError has some subclasses if you want to detect finer grained issues. If you
|
42
|
+
don't, 'rescue MalformedEpubError' will catch all the subclassed errors as well.
|
43
|
+
|
44
|
+
Any other exceptions should be considered bugs in either Epub::Reader (please
|
45
|
+
report it!).
|
46
|
+
|
47
|
+
# Mantainers
|
48
|
+
|
49
|
+
- Fernando Almeida <fernando@fernandoalmeida.net>
|
50
|
+
|
51
|
+
# Licensing
|
52
|
+
|
53
|
+
This is a proprietary library and all rights are reserved to eBookPlus.com.
|
54
|
+
|
55
|
+
# References
|
56
|
+
|
57
|
+
[What is EPUB 3?](http://shop.oreilly.com/product/0636920022442.do)
|
58
|
+
|
59
|
+
[EPUB Publications Specifications](http://idpf.org/epub/30/spec/epub30-publications.html)
|
60
|
+
|
61
|
+
[EPUB Content Documents Specifications](http://idpf.org/epub/30/spec/epub30-contentdocs.html)
|
62
|
+
|
63
|
+
[EPUB Open Container Formats Specifications](http://idpf.org/epub/30/spec/epub30-ocf.html)
|
64
|
+
|
65
|
+
[Shared Workspace for Emerging Specifications and Schemas for EPUB 3](http://code.google.com/p/epub-revision/downloads/list)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/epub-reader.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "epub-reader/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "epub-reader"
|
7
|
+
s.version = Epub::Reader::VERSION
|
8
|
+
s.authors = ["Fernando Almeida"]
|
9
|
+
s.email = ["fernando@fernandoalmeida.net"]
|
10
|
+
s.homepage = "http://bitbucket.com/fernandoalmeida/epub-reader"
|
11
|
+
s.summary = "A library for accessing the content of EPUB files"
|
12
|
+
s.description = "The epub-reader library implements a EPUB parser conforming as much as possible to the EPUB 3 specification from IDPF"
|
13
|
+
|
14
|
+
s.rubyforge_project = "epub-reader"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_dependency('rubyzip')
|
22
|
+
s.add_dependency('nokogiri')
|
23
|
+
end
|
data/lib/epub-reader.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'zip/zipfilesystem'
|
2
|
+
require 'nokogiri'
|
3
|
+
require "epub-reader/version"
|
4
|
+
require "epub-reader/reader"
|
5
|
+
require "epub-reader/epubfile"
|
6
|
+
require "epub-reader/container"
|
7
|
+
require "epub-reader/package"
|
8
|
+
require "epub-reader/toc"
|
9
|
+
require "epub-reader/page"
|
10
|
+
|
11
|
+
module Epub
|
12
|
+
class FileNotFoundError < StandardError; end
|
13
|
+
class MalformedFileError < StandardError; end
|
14
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Epub
|
2
|
+
class Container
|
3
|
+
|
4
|
+
attr_reader :packages
|
5
|
+
|
6
|
+
def initialize(reader)
|
7
|
+
@reader = reader
|
8
|
+
@container = get_container_content
|
9
|
+
@xml = Nokogiri::XML(@container)
|
10
|
+
@packages = []
|
11
|
+
@xml.css('container rootfiles rootfile').each do |rootfile|
|
12
|
+
@packages << Package.new(rootfile, @reader.file)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def raw
|
17
|
+
@container.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def package(index = 0)
|
21
|
+
@packages[index]
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def get_container_content
|
27
|
+
begin
|
28
|
+
@reader.file.get_input_stream('META-INF/container.xml').read
|
29
|
+
rescue
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,255 @@
|
|
1
|
+
module Epub
|
2
|
+
class Package
|
3
|
+
|
4
|
+
def initialize(rootfile, file)
|
5
|
+
@rootfile = rootfile
|
6
|
+
@file = file
|
7
|
+
@package = get_package_content(file)
|
8
|
+
@xml = Nokogiri::XML(@package).remove_namespaces!
|
9
|
+
end
|
10
|
+
|
11
|
+
def raw
|
12
|
+
@package.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
def path
|
16
|
+
@rootfile.attr('full-path').to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
def relative_content_path
|
20
|
+
i = path.rindex('/').to_i
|
21
|
+
i > 0 ? path[0,i+1] : ""
|
22
|
+
end
|
23
|
+
|
24
|
+
def mediatype
|
25
|
+
@rootfile.attr('media-type')
|
26
|
+
end
|
27
|
+
|
28
|
+
def version
|
29
|
+
root.attr('version').to_s.to_i
|
30
|
+
end
|
31
|
+
|
32
|
+
def identifier
|
33
|
+
identifiers.css("[id=#{unique_identifier}]").text
|
34
|
+
end
|
35
|
+
|
36
|
+
# TODO: identify language
|
37
|
+
# TODO: identify subtitles
|
38
|
+
def title
|
39
|
+
titles.first.text
|
40
|
+
end
|
41
|
+
|
42
|
+
def language
|
43
|
+
languages.first.text
|
44
|
+
end
|
45
|
+
|
46
|
+
# TODO: identify role
|
47
|
+
# TODO: identify file-as
|
48
|
+
# TODO: identify alternate-script
|
49
|
+
# TODO: identify display-seq
|
50
|
+
def creator
|
51
|
+
creators.size > 0 ? creators.first.text : ""
|
52
|
+
end
|
53
|
+
|
54
|
+
# TODO: equal to creator
|
55
|
+
def contributor
|
56
|
+
contributors.size > 0 ? contributors.first.text : ""
|
57
|
+
end
|
58
|
+
|
59
|
+
def date
|
60
|
+
d = metadata.css('data')
|
61
|
+
d.size > 0 ? d.text : ""
|
62
|
+
end
|
63
|
+
|
64
|
+
def source
|
65
|
+
s = metadata.css('source')
|
66
|
+
s.size > 0 ? s.text : ""
|
67
|
+
end
|
68
|
+
|
69
|
+
def type
|
70
|
+
t = metadata.css('type')
|
71
|
+
t.size > 0 ? t.text : ""
|
72
|
+
end
|
73
|
+
|
74
|
+
def resources
|
75
|
+
manifest.css('item')
|
76
|
+
end
|
77
|
+
|
78
|
+
def images
|
79
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/^image\/(gif|jpeg|svg\+xml)/)}
|
80
|
+
end
|
81
|
+
|
82
|
+
def html
|
83
|
+
resources.css('[media-type="application/xhtml+xml"]')
|
84
|
+
end
|
85
|
+
|
86
|
+
def stylesheets
|
87
|
+
resources.css('[media-type="text/css"]')
|
88
|
+
end
|
89
|
+
|
90
|
+
def javascripts
|
91
|
+
resources.css('[media-type="text/javascript"]')
|
92
|
+
end
|
93
|
+
|
94
|
+
def fonts
|
95
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/application\/(vnd\.ms-opentype|font-woff)/)}
|
96
|
+
end
|
97
|
+
|
98
|
+
def audios
|
99
|
+
resources.select{|resource| resource.attr('media-type').to_s.match(/^audio\/(mpeg|mp4)/)}
|
100
|
+
end
|
101
|
+
|
102
|
+
def toc
|
103
|
+
toc_item_id = spine.attr("toc")
|
104
|
+
toc_item_mimetype = "application/x-dtbncx+xml"
|
105
|
+
toc_item_selector = toc_item_id ? "##{toc_item_id.to_s}" : '[media-type="#{toc_item_mimetype}"]'
|
106
|
+
relative_content_path + resources.css(toc_item_selector).attr('href').to_s
|
107
|
+
end
|
108
|
+
|
109
|
+
def cover
|
110
|
+
begin
|
111
|
+
cover_meta = metadata.css('[name="cover"]')
|
112
|
+
meta_content = cover_meta.size == 1 ? cover_meta.attr('content') : nil
|
113
|
+
cover_content = meta_content || manifest.css('[properties="cover-image"]').attr('id').to_s
|
114
|
+
cover_path = (cover_content.to_s.match(/\.(gif|jpe?g|png)/) ? cover_content : resources.css("##{cover_content}").attr('href').to_s)
|
115
|
+
if cover_exist?(relative_content_path + cover_path)
|
116
|
+
relative_content_path + cover_path
|
117
|
+
elsif cover_exist?(relative_content_path + "Images/" + cover_path)
|
118
|
+
relative_content_path + "Images/" + cover_path
|
119
|
+
else
|
120
|
+
""
|
121
|
+
end
|
122
|
+
rescue
|
123
|
+
""
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# TODO: to parse
|
128
|
+
# guide [optional/deprecated]
|
129
|
+
# bindings [optional]
|
130
|
+
|
131
|
+
def reading_order
|
132
|
+
spine_items.map do |item|
|
133
|
+
item_id = item.attr('idref').to_s
|
134
|
+
manifest.css("##{item_id}") if item_id
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
protected
|
139
|
+
|
140
|
+
def spine_items
|
141
|
+
spine.css('itemref')
|
142
|
+
end
|
143
|
+
|
144
|
+
def get_package_content(file)
|
145
|
+
begin
|
146
|
+
file.get_input_stream(path)
|
147
|
+
rescue
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def cover_exist?(path)
|
153
|
+
begin
|
154
|
+
!!@file.find_entry(path)
|
155
|
+
rescue
|
156
|
+
false
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def root
|
161
|
+
@xml.css('package')
|
162
|
+
end
|
163
|
+
|
164
|
+
def unique_identifier
|
165
|
+
root.attr('unique-identifier').to_s
|
166
|
+
end
|
167
|
+
|
168
|
+
def prefix
|
169
|
+
root.attr('prefix').to_s
|
170
|
+
end
|
171
|
+
|
172
|
+
def lang
|
173
|
+
root.attr('xml:lang').to_s
|
174
|
+
end
|
175
|
+
|
176
|
+
def dir
|
177
|
+
(spine.attr('page-progression-direction') || root.attr('dir')).to_s
|
178
|
+
end
|
179
|
+
|
180
|
+
def id
|
181
|
+
root.attr('id').to_s
|
182
|
+
end
|
183
|
+
|
184
|
+
# TODO: to do parse of
|
185
|
+
# DCMES Optional Elements [0 or more]
|
186
|
+
# contributor
|
187
|
+
# coverage
|
188
|
+
# creator
|
189
|
+
# date
|
190
|
+
# description
|
191
|
+
# format
|
192
|
+
# publisher
|
193
|
+
# relation
|
194
|
+
# rights
|
195
|
+
# source
|
196
|
+
# subject
|
197
|
+
# type
|
198
|
+
# meta [1 or more]
|
199
|
+
# OPF2 meta [0 or more]
|
200
|
+
# link [0 or more]
|
201
|
+
|
202
|
+
############
|
203
|
+
# Metadata #
|
204
|
+
############
|
205
|
+
def metadata
|
206
|
+
root.css('metadata')
|
207
|
+
end
|
208
|
+
|
209
|
+
def identifiers
|
210
|
+
metadata.css('identifier')
|
211
|
+
end
|
212
|
+
|
213
|
+
def titles
|
214
|
+
metadata.css('title')
|
215
|
+
end
|
216
|
+
|
217
|
+
def languages
|
218
|
+
metadata.css('language')
|
219
|
+
end
|
220
|
+
|
221
|
+
def creators
|
222
|
+
metadata.css('creator')
|
223
|
+
end
|
224
|
+
|
225
|
+
def contributors
|
226
|
+
metadata.css('contributor')
|
227
|
+
end
|
228
|
+
|
229
|
+
def meta
|
230
|
+
metadata.css('meta')
|
231
|
+
end
|
232
|
+
|
233
|
+
def link
|
234
|
+
metadata.css('link')
|
235
|
+
end
|
236
|
+
|
237
|
+
############
|
238
|
+
# Manifest #
|
239
|
+
############
|
240
|
+
def manifest
|
241
|
+
root.css('manifest')
|
242
|
+
end
|
243
|
+
|
244
|
+
############
|
245
|
+
# Spine #
|
246
|
+
############
|
247
|
+
def spine
|
248
|
+
root.css('spine')
|
249
|
+
end
|
250
|
+
|
251
|
+
def reading_order_selectors
|
252
|
+
reading_order.map{|item| "##{item.attr('idref')}"}
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Epub
|
2
|
+
class Page
|
3
|
+
def initialize(title, path, file)
|
4
|
+
@title = title
|
5
|
+
@path = path
|
6
|
+
@file = file
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_reader :title, :path
|
10
|
+
|
11
|
+
def content
|
12
|
+
@content ||= get_page_content
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def get_page_content
|
18
|
+
begin
|
19
|
+
@file.get_input_stream(@path).read.force_encoding(Encoding::UTF_8)
|
20
|
+
rescue
|
21
|
+
""
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Epub
|
2
|
+
class Reader
|
3
|
+
|
4
|
+
EPUB_MIMETYPE = "application/epub+zip"
|
5
|
+
PACKAGE_MEDIATYPE = "application/oebps-package+xml"
|
6
|
+
|
7
|
+
attr_reader :filepath, :file
|
8
|
+
|
9
|
+
def initialize(f)
|
10
|
+
raise(FileNotFoundError, "File not found") unless File.exists?(f)
|
11
|
+
@filepath = f.to_s
|
12
|
+
@file = EpubFile.new(f)
|
13
|
+
raise(MalformedFileError, "Invalid EPUB file format") unless valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def Reader.open(f)
|
17
|
+
reader = Reader.new(f)
|
18
|
+
if block_given?
|
19
|
+
yield reader
|
20
|
+
else
|
21
|
+
reader
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def mimetype
|
26
|
+
@mimetype ||= begin
|
27
|
+
file.get_input_stream('mimetype').read
|
28
|
+
rescue
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def epub_version
|
34
|
+
@version ||= package.version
|
35
|
+
end
|
36
|
+
|
37
|
+
def uid
|
38
|
+
@uid ||= package.identifier
|
39
|
+
end
|
40
|
+
|
41
|
+
def title
|
42
|
+
@title ||= package.title
|
43
|
+
end
|
44
|
+
|
45
|
+
def author
|
46
|
+
@author ||= package.creator
|
47
|
+
end
|
48
|
+
|
49
|
+
def publication_date
|
50
|
+
@publication_date ||= package.date
|
51
|
+
end
|
52
|
+
|
53
|
+
def language
|
54
|
+
@language ||= package.language
|
55
|
+
end
|
56
|
+
|
57
|
+
def toc
|
58
|
+
@toc ||= Toc.new(package.toc, self)
|
59
|
+
end
|
60
|
+
|
61
|
+
def pages
|
62
|
+
@pages ||= toc.pages
|
63
|
+
end
|
64
|
+
|
65
|
+
def container
|
66
|
+
@container ||= Container.new(self)
|
67
|
+
end
|
68
|
+
|
69
|
+
def cover
|
70
|
+
@cover ||= package.cover
|
71
|
+
end
|
72
|
+
|
73
|
+
# TODO: To parse other META-INF files
|
74
|
+
# signatures.xml [optional]
|
75
|
+
# Contains digital signatures for various assets.
|
76
|
+
|
77
|
+
# encryption.xml [optional]
|
78
|
+
# Contains information about the encryption of Publication resources. (This file is required if font obfuscation is used.)
|
79
|
+
|
80
|
+
# metadata.xml [optional]
|
81
|
+
# Used to store metadata about the container.
|
82
|
+
|
83
|
+
# rights.xml [optional]
|
84
|
+
# Used to store information about digital rights.
|
85
|
+
|
86
|
+
# manifest.xml [allowed]
|
87
|
+
# A manifest of container contents as allowed by Open Document Format [ODF].
|
88
|
+
|
89
|
+
# Convenient method
|
90
|
+
def package(index = 0)
|
91
|
+
container.package(index)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def valid?
|
97
|
+
valid_mimetype? && valid_container? && valid_package? && valid_toc?
|
98
|
+
end
|
99
|
+
|
100
|
+
def valid_mimetype?
|
101
|
+
/application\/epub\+zip/.match(mimetype)
|
102
|
+
end
|
103
|
+
|
104
|
+
def valid_container?
|
105
|
+
!container.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
def valid_package?
|
109
|
+
package.path.match(/\.opf$/) && package.mediatype == PACKAGE_MEDIATYPE
|
110
|
+
end
|
111
|
+
|
112
|
+
# TODO: validates TOC
|
113
|
+
def valid_toc?
|
114
|
+
true
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Epub
|
2
|
+
class Toc
|
3
|
+
|
4
|
+
def initialize(tocfile, reader)
|
5
|
+
@tocfile = tocfile
|
6
|
+
@reader = reader
|
7
|
+
@file = @reader.file
|
8
|
+
@content = get_toc_content
|
9
|
+
@xml = Nokogiri::XML(@content).remove_namespaces!
|
10
|
+
end
|
11
|
+
|
12
|
+
def content
|
13
|
+
if ncx?
|
14
|
+
if has_toc?
|
15
|
+
ncx_to_html
|
16
|
+
else
|
17
|
+
spine_to_html
|
18
|
+
end
|
19
|
+
else
|
20
|
+
@content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def pages
|
25
|
+
points = @xml.css("ncx navMap navPoint")
|
26
|
+
items = @reader.package.reading_order
|
27
|
+
if ncx? && has_toc? && points.size > 1
|
28
|
+
points.map do |point|
|
29
|
+
title = point.css('navLabel > text').first.text
|
30
|
+
file_path = @reader.package.relative_content_path + point.css('content').attr('src').to_s
|
31
|
+
Page.new(title, file_path, @reader.file)
|
32
|
+
end
|
33
|
+
else
|
34
|
+
items.map do |item|
|
35
|
+
title = ""
|
36
|
+
file_path = @reader.package.relative_content_path + item.attr('href').to_s
|
37
|
+
Page.new(title, file_path, @reader.file)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def ncx?
|
45
|
+
@tocfile.match(/(\.ncx)$/)
|
46
|
+
end
|
47
|
+
|
48
|
+
def has_toc?
|
49
|
+
@xml.css('navMap > navPoint').size > 0
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_toc_content
|
53
|
+
begin
|
54
|
+
@reader.file.get_input_stream(@tocfile).read
|
55
|
+
rescue
|
56
|
+
""
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO: Add Stylesheets
|
61
|
+
# TODO: Convert nested navigation
|
62
|
+
# TODO: Refactoring to DRY with spine_to_html
|
63
|
+
def ncx_to_html
|
64
|
+
html = <<EOF
|
65
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
66
|
+
<html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
|
67
|
+
<head>
|
68
|
+
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
|
69
|
+
<title>#{title}</title>
|
70
|
+
</head>
|
71
|
+
<body>
|
72
|
+
<section>
|
73
|
+
<nav id="toc" epub:type="toc">
|
74
|
+
<ol>
|
75
|
+
EOF
|
76
|
+
selector = "ncx > navMap > navPoint"
|
77
|
+
@xml.css(selector).each do |point|
|
78
|
+
html += "<li id=\"#{point.attr('id').to_s}\"><a href=\"#{point.css('content').attr('src').to_s}\">#{point.css('navLabel text').text}</a></li>"
|
79
|
+
end
|
80
|
+
html += <<EOF
|
81
|
+
</ol>
|
82
|
+
</nav>
|
83
|
+
</section>
|
84
|
+
</body>
|
85
|
+
</html>
|
86
|
+
EOF
|
87
|
+
html
|
88
|
+
end
|
89
|
+
|
90
|
+
def spine_to_html
|
91
|
+
html = <<EOF
|
92
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
93
|
+
<html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
|
94
|
+
<head>
|
95
|
+
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
|
96
|
+
<title>#{title}</title>
|
97
|
+
</head>
|
98
|
+
<body>
|
99
|
+
<section>
|
100
|
+
<nav id="toc" epub:type="toc">
|
101
|
+
<ol>
|
102
|
+
EOF
|
103
|
+
@reader.package.reading_order.each do |item|
|
104
|
+
link = item.attr('href').to_s
|
105
|
+
html += "<li id=\"#{item.attr('id').to_s}\"><a href=\"#{link}\">#{link[0,link.rindex('.')]}</a></li>"
|
106
|
+
end
|
107
|
+
html += <<EOF
|
108
|
+
</ol>
|
109
|
+
</nav>
|
110
|
+
</section>
|
111
|
+
</body>
|
112
|
+
</html>
|
113
|
+
EOF
|
114
|
+
html
|
115
|
+
end
|
116
|
+
|
117
|
+
def title
|
118
|
+
root.css('docTitle > text').text
|
119
|
+
end
|
120
|
+
|
121
|
+
def root
|
122
|
+
@xml.css('ncx')
|
123
|
+
end
|
124
|
+
|
125
|
+
def navmap
|
126
|
+
root.css('navMap')
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Container do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@epub = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'get raw content' do
|
11
|
+
@epub.container.raw.should_not be_empty
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'get package documents' do
|
15
|
+
@epub.container.packages.should_not be_empty
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'get default package document' do
|
19
|
+
@epub.container.package.should be_a(Epub::Package)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
Binary file
|
Binary file
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Package do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@reader = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'get raw content' do
|
11
|
+
@reader.package.raw.should_not be_empty
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'get file path' do
|
15
|
+
@reader.package.path.should_not be_empty
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'get media type' do
|
19
|
+
@reader.package.mediatype.should eq("application/oebps-package+xml")
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'get the epub version' do
|
23
|
+
@reader.package.version.should eq(3)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'get the unique identifier' do
|
27
|
+
@reader.package.identifier.should eq("urn:isbn:9780316000000")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'get the content language' do
|
31
|
+
@reader.package.language.should eq("en-US")
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'get the content title' do
|
35
|
+
@reader.package.title.should eq("Moby-Dick")
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'get the content creator' do
|
39
|
+
@reader.package.creator.should eq("Herman Melville")
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'get the content contributor' do
|
43
|
+
@reader.package.contributor.should be_empty
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'get the publication date' do
|
47
|
+
@reader.package.date.should be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'get the publication source' do
|
51
|
+
@reader.package.source.should be_empty
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'get the content type' do
|
55
|
+
@reader.package.source.should be_empty
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'get the full resource list' do
|
59
|
+
@reader.package.resources.should_not be_empty
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'get the image list' do
|
63
|
+
@reader.package.images.size.should eq(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'get the html list' do
|
67
|
+
@reader.package.html.size.should eq(143)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'get the stylesheet list' do
|
71
|
+
@reader.package.stylesheets.size.should eq(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'get the javascript list' do
|
75
|
+
@reader.package.javascripts.should be_empty
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'get the font list' do
|
79
|
+
@reader.package.fonts.should be_empty
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'get the audio list' do
|
83
|
+
@reader.package.audios.should be_empty
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'get the table of content (toc)' do
|
87
|
+
@reader.package.toc.should eq("OPS/toc.ncx")
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'get the reading order' do
|
91
|
+
list = @reader.package.reading_order
|
92
|
+
list.size.should eq(142)
|
93
|
+
list[0].attr('href').to_s.should eq('cover.xhtml')
|
94
|
+
list[1].attr('href').to_s.should eq('titlepage.xhtml')
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'get the book cover' do
|
98
|
+
@reader.package.cover.should eq("OPS/images/9780316000000.jpg")
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
data/spec/page_spec.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Page do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
file = 'spec/data/valid.epub'
|
7
|
+
reader = Epub::Reader.open(file)
|
8
|
+
@toc = Epub::Toc.new(reader.package.toc, reader)
|
9
|
+
@page = @toc.pages.last
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'get page title' do
|
13
|
+
@page.title.should eq('Copyright Page')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'get page path' do
|
17
|
+
@page.path.should eq('OPS/copyright.xhtml')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'get page content' do
|
21
|
+
@page.content.should match('<html.*>')
|
22
|
+
@page.content.should match('Produced by Daniel Lazarus and Jonesey')
|
23
|
+
end
|
24
|
+
end
|
data/spec/reader_spec.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Epub::Reader do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@file = 'spec/data/valid.epub'
|
7
|
+
@reader = Epub::Reader.open(@file)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'open a epub file' do
|
11
|
+
@reader.should_not be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'raises an exception if file not found' do
|
15
|
+
lambda {Epub::Reader.open('not_found.epub')}.should raise_error
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'raises an exception if malformed file' do
|
19
|
+
lambda {Epub::Reader.open('spec/data/invalid.epub')}.should raise_error
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'get epub file path' do
|
23
|
+
@reader.filepath.should eq(@file)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'get epub mime type' do
|
27
|
+
@reader.mimetype.should eq("application/epub+zip")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'get the epub version' do
|
31
|
+
@reader.epub_version.should eq(3)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'get the epub unique identifier' do
|
35
|
+
@reader.uid.should eq("urn:isbn:9780316000000")
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'get the title' do
|
39
|
+
@reader.title.should eq("Moby-Dick")
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'get the author' do
|
43
|
+
@reader.author.should eq("Herman Melville")
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'get the publication date' do
|
47
|
+
@reader.publication_date.should be_empty
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'get the language' do
|
51
|
+
@reader.language.should eq("en-US")
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'get the TOC' do
|
55
|
+
@reader.toc.should be_a(Epub::Toc)
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'get the pages list' do
|
59
|
+
@reader.pages.size.should eq(142)
|
60
|
+
end
|
61
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/toc_spec.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Epub::Toc do
|
5
|
+
|
6
|
+
before(:all) do
|
7
|
+
file = 'spec/data/valid.epub'
|
8
|
+
@reader = Epub::Reader.open(file)
|
9
|
+
@toc = Epub::Toc.new(@reader.package.toc, @reader)
|
10
|
+
@html = Nokogiri::XML(@toc.content)
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'convert <ncx> to <html>' do
|
14
|
+
@html.css('html').size.should eq(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'convert <docTitle> to <title>' do
|
18
|
+
@html.css('head > title').text.should eq("Moby-Dick")
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'convert <navMap> to <nav>' do
|
22
|
+
@html.css('nav').size.should eq(1)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'convert <navPoint> to <a>' do
|
26
|
+
@html.css('li > a').size.should eq(142)
|
27
|
+
end
|
28
|
+
end
|
data/teste.rb
ADDED
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: epub-reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.9
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Fernando Almeida
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-09-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rubyzip
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: The epub-reader library implements a EPUB parser conforming as much as
|
42
|
+
possible to the EPUB 3 specification from IDPF
|
43
|
+
email:
|
44
|
+
- fernando@fernandoalmeida.net
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- .gitignore
|
50
|
+
- .rbenv-gemsets
|
51
|
+
- .rspec
|
52
|
+
- .ruby-version
|
53
|
+
- Gemfile
|
54
|
+
- README.md
|
55
|
+
- Rakefile
|
56
|
+
- epub-reader.gemspec
|
57
|
+
- lib/epub-reader.rb
|
58
|
+
- lib/epub-reader/container.rb
|
59
|
+
- lib/epub-reader/epubfile.rb
|
60
|
+
- lib/epub-reader/package.rb
|
61
|
+
- lib/epub-reader/page.rb
|
62
|
+
- lib/epub-reader/reader.rb
|
63
|
+
- lib/epub-reader/toc.rb
|
64
|
+
- lib/epub-reader/version.rb
|
65
|
+
- spec/container_spec.rb
|
66
|
+
- spec/data/invalid.epub
|
67
|
+
- spec/data/valid.epub
|
68
|
+
- spec/package_spec.rb
|
69
|
+
- spec/page_spec.rb
|
70
|
+
- spec/reader_spec.rb
|
71
|
+
- spec/spec_helper.rb
|
72
|
+
- spec/toc_spec.rb
|
73
|
+
- teste.rb
|
74
|
+
homepage: http://bitbucket.com/fernandoalmeida/epub-reader
|
75
|
+
licenses: []
|
76
|
+
metadata: {}
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options: []
|
79
|
+
require_paths:
|
80
|
+
- lib
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
requirements: []
|
92
|
+
rubyforge_project: epub-reader
|
93
|
+
rubygems_version: 2.0.3
|
94
|
+
signing_key:
|
95
|
+
specification_version: 4
|
96
|
+
summary: A library for accessing the content of EPUB files
|
97
|
+
test_files: []
|