epub-reader 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 310b2d309ca3ea509ad9c56afdcbaf53b1a7bc9a
4
+ data.tar.gz: a4ae809a633232d45737abd0363e954709bffd04
5
+ SHA512:
6
+ metadata.gz: a87eb9670787d8411fb22d0971b7e8e1bcaf4e5694fc814f2991faac2cd94521fffe87b81ca3ebe3cce526bd85728cab34985a469fc7e6d24efdd01e5c3ba9eb
7
+ data.tar.gz: d7e1a1cc73c277912c119522535fdf34e77e0b0365da5cbf39af553995d9c06b8ad5f1e6edc8e5bc81d752695e32b7a67ce16667e3711a9dd613909def788b99
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
@@ -0,0 +1 @@
1
+ epub-reader
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format nested
@@ -0,0 +1 @@
1
+ 2.0.0-p247
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem "rspec"
7
+ end
@@ -0,0 +1,65 @@
1
+ # Overview
2
+
3
+ EPUB Reader is a Ruby library which helps you to parse EPUB files conforming
4
+ as much as possible the specification from IDPF.
5
+
6
+ # Installation
7
+
8
+ The recommended installation method is via Rubygems.
9
+
10
+ gem install epub-reader
11
+
12
+ # Usage
13
+
14
+ Begin by creating a Epub::Reader instance that points to a EPUB file. Document
15
+ level information (metadata, toc, page count, etc) is available via this object.
16
+
17
+ reader = Epub::Reader.open("somefile.epub")
18
+ puts reader.epub_version
19
+ puts reader.title
20
+ puts reader.author
21
+ puts reader.publication_date
22
+ puts reader.language
23
+ reader.pages.each do |page|
24
+ puts page.title
25
+ puts page.content
26
+ end
27
+
28
+ # Exceptions
29
+
30
+ There are two key exceptions that you will need to watch out for when processing a
31
+ EPUB file:
32
+
33
+ FileNotFoundError - The argument passed to Epub::Reader.open('file.epub') is a file
34
+ path. If the file does not exist the FileNotFoundError is thrown.
35
+
36
+ MalformedEpubError - The EPUB appears to be corrupt in some way. If you believe the
37
+ file should be valid, or that a corrupt file didn't raise an exception, please
38
+ forward a copy of the file to the maintainers using the Bitbucket issue tracker
39
+ and we will attempt to improve the code.
40
+
41
+ MalformedEpubError has some subclasses if you want to detect finer grained issues. If you
42
+ don't, 'rescue MalformedEpubError' will catch all the subclassed errors as well.
43
+
44
+ Any other exceptions should be considered bugs in either Epub::Reader (please
45
+ report it!).
46
+
47
+ # Mantainers
48
+
49
+ - Fernando Almeida <fernando@fernandoalmeida.net>
50
+
51
+ # Licensing
52
+
53
+ This is a proprietary library and all rights are reserved to eBookPlus.com.
54
+
55
+ # References
56
+
57
+ [What is EPUB 3?](http://shop.oreilly.com/product/0636920022442.do)
58
+
59
+ [EPUB Publications Specifications](http://idpf.org/epub/30/spec/epub30-publications.html)
60
+
61
+ [EPUB Content Documents Specifications](http://idpf.org/epub/30/spec/epub30-contentdocs.html)
62
+
63
+ [EPUB Open Container Formats Specifications](http://idpf.org/epub/30/spec/epub30-ocf.html)
64
+
65
+ [Shared Workspace for Emerging Specifications and Schemas for EPUB 3](http://code.google.com/p/epub-revision/downloads/list)
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "epub-reader/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "epub-reader"
7
+ s.version = Epub::Reader::VERSION
8
+ s.authors = ["Fernando Almeida"]
9
+ s.email = ["fernando@fernandoalmeida.net"]
10
+ s.homepage = "http://bitbucket.com/fernandoalmeida/epub-reader"
11
+ s.summary = "A library for accessing the content of EPUB files"
12
+ s.description = "The epub-reader library implements a EPUB parser conforming as much as possible to the EPUB 3 specification from IDPF"
13
+
14
+ s.rubyforge_project = "epub-reader"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency('rubyzip')
22
+ s.add_dependency('nokogiri')
23
+ end
@@ -0,0 +1,14 @@
1
+ require 'zip/zipfilesystem'
2
+ require 'nokogiri'
3
+ require "epub-reader/version"
4
+ require "epub-reader/reader"
5
+ require "epub-reader/epubfile"
6
+ require "epub-reader/container"
7
+ require "epub-reader/package"
8
+ require "epub-reader/toc"
9
+ require "epub-reader/page"
10
+
11
+ module Epub
12
+ class FileNotFoundError < StandardError; end
13
+ class MalformedFileError < StandardError; end
14
+ end
@@ -0,0 +1,35 @@
1
+ module Epub
2
+ class Container
3
+
4
+ attr_reader :packages
5
+
6
+ def initialize(reader)
7
+ @reader = reader
8
+ @container = get_container_content
9
+ @xml = Nokogiri::XML(@container)
10
+ @packages = []
11
+ @xml.css('container rootfiles rootfile').each do |rootfile|
12
+ @packages << Package.new(rootfile, @reader.file)
13
+ end
14
+ end
15
+
16
+ def raw
17
+ @container.to_s
18
+ end
19
+
20
+ def package(index = 0)
21
+ @packages[index]
22
+ end
23
+
24
+ private
25
+
26
+ def get_container_content
27
+ begin
28
+ @reader.file.get_input_stream('META-INF/container.xml').read
29
+ rescue
30
+ nil
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,7 @@
1
+ module Epub
2
+ class EpubFile < Zip::ZipFile
3
+ def initialize(f)
4
+ super
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,255 @@
1
+ module Epub
2
+ class Package
3
+
4
+ def initialize(rootfile, file)
5
+ @rootfile = rootfile
6
+ @file = file
7
+ @package = get_package_content(file)
8
+ @xml = Nokogiri::XML(@package).remove_namespaces!
9
+ end
10
+
11
+ def raw
12
+ @package.to_s
13
+ end
14
+
15
+ def path
16
+ @rootfile.attr('full-path').to_s
17
+ end
18
+
19
+ def relative_content_path
20
+ i = path.rindex('/').to_i
21
+ i > 0 ? path[0,i+1] : ""
22
+ end
23
+
24
+ def mediatype
25
+ @rootfile.attr('media-type')
26
+ end
27
+
28
+ def version
29
+ root.attr('version').to_s.to_i
30
+ end
31
+
32
+ def identifier
33
+ identifiers.css("[id=#{unique_identifier}]").text
34
+ end
35
+
36
+ # TODO: identify language
37
+ # TODO: identify subtitles
38
+ def title
39
+ titles.first.text
40
+ end
41
+
42
+ def language
43
+ languages.first.text
44
+ end
45
+
46
+ # TODO: identify role
47
+ # TODO: identify file-as
48
+ # TODO: identify alternate-script
49
+ # TODO: identify display-seq
50
+ def creator
51
+ creators.size > 0 ? creators.first.text : ""
52
+ end
53
+
54
+ # TODO: equal to creator
55
+ def contributor
56
+ contributors.size > 0 ? contributors.first.text : ""
57
+ end
58
+
59
+ def date
60
+ d = metadata.css('data')
61
+ d.size > 0 ? d.text : ""
62
+ end
63
+
64
+ def source
65
+ s = metadata.css('source')
66
+ s.size > 0 ? s.text : ""
67
+ end
68
+
69
+ def type
70
+ t = metadata.css('type')
71
+ t.size > 0 ? t.text : ""
72
+ end
73
+
74
+ def resources
75
+ manifest.css('item')
76
+ end
77
+
78
+ def images
79
+ resources.select{|resource| resource.attr('media-type').to_s.match(/^image\/(gif|jpeg|svg\+xml)/)}
80
+ end
81
+
82
+ def html
83
+ resources.css('[media-type="application/xhtml+xml"]')
84
+ end
85
+
86
+ def stylesheets
87
+ resources.css('[media-type="text/css"]')
88
+ end
89
+
90
+ def javascripts
91
+ resources.css('[media-type="text/javascript"]')
92
+ end
93
+
94
+ def fonts
95
+ resources.select{|resource| resource.attr('media-type').to_s.match(/application\/(vnd\.ms-opentype|font-woff)/)}
96
+ end
97
+
98
+ def audios
99
+ resources.select{|resource| resource.attr('media-type').to_s.match(/^audio\/(mpeg|mp4)/)}
100
+ end
101
+
102
+ def toc
103
+ toc_item_id = spine.attr("toc")
104
+ toc_item_mimetype = "application/x-dtbncx+xml"
105
+ toc_item_selector = toc_item_id ? "##{toc_item_id.to_s}" : '[media-type="#{toc_item_mimetype}"]'
106
+ relative_content_path + resources.css(toc_item_selector).attr('href').to_s
107
+ end
108
+
109
+ def cover
110
+ begin
111
+ cover_meta = metadata.css('[name="cover"]')
112
+ meta_content = cover_meta.size == 1 ? cover_meta.attr('content') : nil
113
+ cover_content = meta_content || manifest.css('[properties="cover-image"]').attr('id').to_s
114
+ cover_path = (cover_content.to_s.match(/\.(gif|jpe?g|png)/) ? cover_content : resources.css("##{cover_content}").attr('href').to_s)
115
+ if cover_exist?(relative_content_path + cover_path)
116
+ relative_content_path + cover_path
117
+ elsif cover_exist?(relative_content_path + "Images/" + cover_path)
118
+ relative_content_path + "Images/" + cover_path
119
+ else
120
+ ""
121
+ end
122
+ rescue
123
+ ""
124
+ end
125
+ end
126
+
127
+ # TODO: to parse
128
+ # guide [optional/deprecated]
129
+ # bindings [optional]
130
+
131
+ def reading_order
132
+ spine_items.map do |item|
133
+ item_id = item.attr('idref').to_s
134
+ manifest.css("##{item_id}") if item_id
135
+ end
136
+ end
137
+
138
+ protected
139
+
140
+ def spine_items
141
+ spine.css('itemref')
142
+ end
143
+
144
+ def get_package_content(file)
145
+ begin
146
+ file.get_input_stream(path)
147
+ rescue
148
+ nil
149
+ end
150
+ end
151
+
152
+ def cover_exist?(path)
153
+ begin
154
+ !!@file.find_entry(path)
155
+ rescue
156
+ false
157
+ end
158
+ end
159
+
160
+ def root
161
+ @xml.css('package')
162
+ end
163
+
164
+ def unique_identifier
165
+ root.attr('unique-identifier').to_s
166
+ end
167
+
168
+ def prefix
169
+ root.attr('prefix').to_s
170
+ end
171
+
172
+ def lang
173
+ root.attr('xml:lang').to_s
174
+ end
175
+
176
+ def dir
177
+ (spine.attr('page-progression-direction') || root.attr('dir')).to_s
178
+ end
179
+
180
+ def id
181
+ root.attr('id').to_s
182
+ end
183
+
184
+ # TODO: to do parse of
185
+ # DCMES Optional Elements [0 or more]
186
+ # contributor
187
+ # coverage
188
+ # creator
189
+ # date
190
+ # description
191
+ # format
192
+ # publisher
193
+ # relation
194
+ # rights
195
+ # source
196
+ # subject
197
+ # type
198
+ # meta [1 or more]
199
+ # OPF2 meta [0 or more]
200
+ # link [0 or more]
201
+
202
+ ############
203
+ # Metadata #
204
+ ############
205
+ def metadata
206
+ root.css('metadata')
207
+ end
208
+
209
+ def identifiers
210
+ metadata.css('identifier')
211
+ end
212
+
213
+ def titles
214
+ metadata.css('title')
215
+ end
216
+
217
+ def languages
218
+ metadata.css('language')
219
+ end
220
+
221
+ def creators
222
+ metadata.css('creator')
223
+ end
224
+
225
+ def contributors
226
+ metadata.css('contributor')
227
+ end
228
+
229
+ def meta
230
+ metadata.css('meta')
231
+ end
232
+
233
+ def link
234
+ metadata.css('link')
235
+ end
236
+
237
+ ############
238
+ # Manifest #
239
+ ############
240
+ def manifest
241
+ root.css('manifest')
242
+ end
243
+
244
+ ############
245
+ # Spine #
246
+ ############
247
+ def spine
248
+ root.css('spine')
249
+ end
250
+
251
+ def reading_order_selectors
252
+ reading_order.map{|item| "##{item.attr('idref')}"}
253
+ end
254
+ end
255
+ end
@@ -0,0 +1,26 @@
1
+ module Epub
2
+ class Page
3
+ def initialize(title, path, file)
4
+ @title = title
5
+ @path = path
6
+ @file = file
7
+ end
8
+
9
+ attr_reader :title, :path
10
+
11
+ def content
12
+ @content ||= get_page_content
13
+ end
14
+
15
+ private
16
+
17
+ def get_page_content
18
+ begin
19
+ @file.get_input_stream(@path).read.force_encoding(Encoding::UTF_8)
20
+ rescue
21
+ ""
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,118 @@
1
+ module Epub
2
+ class Reader
3
+
4
+ EPUB_MIMETYPE = "application/epub+zip"
5
+ PACKAGE_MEDIATYPE = "application/oebps-package+xml"
6
+
7
+ attr_reader :filepath, :file
8
+
9
+ def initialize(f)
10
+ raise(FileNotFoundError, "File not found") unless File.exists?(f)
11
+ @filepath = f.to_s
12
+ @file = EpubFile.new(f)
13
+ raise(MalformedFileError, "Invalid EPUB file format") unless valid?
14
+ end
15
+
16
+ def Reader.open(f)
17
+ reader = Reader.new(f)
18
+ if block_given?
19
+ yield reader
20
+ else
21
+ reader
22
+ end
23
+ end
24
+
25
+ def mimetype
26
+ @mimetype ||= begin
27
+ file.get_input_stream('mimetype').read
28
+ rescue
29
+ nil
30
+ end
31
+ end
32
+
33
+ def epub_version
34
+ @version ||= package.version
35
+ end
36
+
37
+ def uid
38
+ @uid ||= package.identifier
39
+ end
40
+
41
+ def title
42
+ @title ||= package.title
43
+ end
44
+
45
+ def author
46
+ @author ||= package.creator
47
+ end
48
+
49
+ def publication_date
50
+ @publication_date ||= package.date
51
+ end
52
+
53
+ def language
54
+ @language ||= package.language
55
+ end
56
+
57
+ def toc
58
+ @toc ||= Toc.new(package.toc, self)
59
+ end
60
+
61
+ def pages
62
+ @pages ||= toc.pages
63
+ end
64
+
65
+ def container
66
+ @container ||= Container.new(self)
67
+ end
68
+
69
+ def cover
70
+ @cover ||= package.cover
71
+ end
72
+
73
+ # TODO: To parse other META-INF files
74
+ # signatures.xml [optional]
75
+ # Contains digital signatures for various assets.
76
+
77
+ # encryption.xml [optional]
78
+ # Contains information about the encryption of Publication resources. (This file is required if font obfuscation is used.)
79
+
80
+ # metadata.xml [optional]
81
+ # Used to store metadata about the container.
82
+
83
+ # rights.xml [optional]
84
+ # Used to store information about digital rights.
85
+
86
+ # manifest.xml [allowed]
87
+ # A manifest of container contents as allowed by Open Document Format [ODF].
88
+
89
+ # Convenient method
90
+ def package(index = 0)
91
+ container.package(index)
92
+ end
93
+
94
+ private
95
+
96
+ def valid?
97
+ valid_mimetype? && valid_container? && valid_package? && valid_toc?
98
+ end
99
+
100
+ def valid_mimetype?
101
+ /application\/epub\+zip/.match(mimetype)
102
+ end
103
+
104
+ def valid_container?
105
+ !container.nil?
106
+ end
107
+
108
+ def valid_package?
109
+ package.path.match(/\.opf$/) && package.mediatype == PACKAGE_MEDIATYPE
110
+ end
111
+
112
+ # TODO: validates TOC
113
+ def valid_toc?
114
+ true
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,130 @@
1
+ module Epub
2
+ class Toc
3
+
4
+ def initialize(tocfile, reader)
5
+ @tocfile = tocfile
6
+ @reader = reader
7
+ @file = @reader.file
8
+ @content = get_toc_content
9
+ @xml = Nokogiri::XML(@content).remove_namespaces!
10
+ end
11
+
12
+ def content
13
+ if ncx?
14
+ if has_toc?
15
+ ncx_to_html
16
+ else
17
+ spine_to_html
18
+ end
19
+ else
20
+ @content
21
+ end
22
+ end
23
+
24
+ def pages
25
+ points = @xml.css("ncx navMap navPoint")
26
+ items = @reader.package.reading_order
27
+ if ncx? && has_toc? && points.size > 1
28
+ points.map do |point|
29
+ title = point.css('navLabel > text').first.text
30
+ file_path = @reader.package.relative_content_path + point.css('content').attr('src').to_s
31
+ Page.new(title, file_path, @reader.file)
32
+ end
33
+ else
34
+ items.map do |item|
35
+ title = ""
36
+ file_path = @reader.package.relative_content_path + item.attr('href').to_s
37
+ Page.new(title, file_path, @reader.file)
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def ncx?
45
+ @tocfile.match(/(\.ncx)$/)
46
+ end
47
+
48
+ def has_toc?
49
+ @xml.css('navMap > navPoint').size > 0
50
+ end
51
+
52
+ def get_toc_content
53
+ begin
54
+ @reader.file.get_input_stream(@tocfile).read
55
+ rescue
56
+ ""
57
+ end
58
+ end
59
+
60
+ # TODO: Add Stylesheets
61
+ # TODO: Convert nested navigation
62
+ # TODO: Refactoring to DRY with spine_to_html
63
+ def ncx_to_html
64
+ html = <<EOF
65
+ <?xml version="1.0" encoding="UTF-8"?>
66
+ <html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
67
+ <head>
68
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
69
+ <title>#{title}</title>
70
+ </head>
71
+ <body>
72
+ <section>
73
+ <nav id="toc" epub:type="toc">
74
+ <ol>
75
+ EOF
76
+ selector = "ncx > navMap > navPoint"
77
+ @xml.css(selector).each do |point|
78
+ html += "<li id=\"#{point.attr('id').to_s}\"><a href=\"#{point.css('content').attr('src').to_s}\">#{point.css('navLabel text').text}</a></li>"
79
+ end
80
+ html += <<EOF
81
+ </ol>
82
+ </nav>
83
+ </section>
84
+ </body>
85
+ </html>
86
+ EOF
87
+ html
88
+ end
89
+
90
+ def spine_to_html
91
+ html = <<EOF
92
+ <?xml version="1.0" encoding="UTF-8"?>
93
+ <html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
94
+ <head>
95
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
96
+ <title>#{title}</title>
97
+ </head>
98
+ <body>
99
+ <section>
100
+ <nav id="toc" epub:type="toc">
101
+ <ol>
102
+ EOF
103
+ @reader.package.reading_order.each do |item|
104
+ link = item.attr('href').to_s
105
+ html += "<li id=\"#{item.attr('id').to_s}\"><a href=\"#{link}\">#{link[0,link.rindex('.')]}</a></li>"
106
+ end
107
+ html += <<EOF
108
+ </ol>
109
+ </nav>
110
+ </section>
111
+ </body>
112
+ </html>
113
+ EOF
114
+ html
115
+ end
116
+
117
+ def title
118
+ root.css('docTitle > text').text
119
+ end
120
+
121
+ def root
122
+ @xml.css('ncx')
123
+ end
124
+
125
+ def navmap
126
+ root.css('navMap')
127
+ end
128
+
129
+ end
130
+ end
@@ -0,0 +1,5 @@
1
+ module Epub
2
+ class Reader
3
+ VERSION = "0.0.9"
4
+ end
5
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Container do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @epub = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'get raw content' do
11
+ @epub.container.raw.should_not be_empty
12
+ end
13
+
14
+ it 'get package documents' do
15
+ @epub.container.packages.should_not be_empty
16
+ end
17
+
18
+ it 'get default package document' do
19
+ @epub.container.package.should be_a(Epub::Package)
20
+ end
21
+
22
+ end
Binary file
Binary file
@@ -0,0 +1,101 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Package do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @reader = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'get raw content' do
11
+ @reader.package.raw.should_not be_empty
12
+ end
13
+
14
+ it 'get file path' do
15
+ @reader.package.path.should_not be_empty
16
+ end
17
+
18
+ it 'get media type' do
19
+ @reader.package.mediatype.should eq("application/oebps-package+xml")
20
+ end
21
+
22
+ it 'get the epub version' do
23
+ @reader.package.version.should eq(3)
24
+ end
25
+
26
+ it 'get the unique identifier' do
27
+ @reader.package.identifier.should eq("urn:isbn:9780316000000")
28
+ end
29
+
30
+ it 'get the content language' do
31
+ @reader.package.language.should eq("en-US")
32
+ end
33
+
34
+ it 'get the content title' do
35
+ @reader.package.title.should eq("Moby-Dick")
36
+ end
37
+
38
+ it 'get the content creator' do
39
+ @reader.package.creator.should eq("Herman Melville")
40
+ end
41
+
42
+ it 'get the content contributor' do
43
+ @reader.package.contributor.should be_empty
44
+ end
45
+
46
+ it 'get the publication date' do
47
+ @reader.package.date.should be_empty
48
+ end
49
+
50
+ it 'get the publication source' do
51
+ @reader.package.source.should be_empty
52
+ end
53
+
54
+ it 'get the content type' do
55
+ @reader.package.source.should be_empty
56
+ end
57
+
58
+ it 'get the full resource list' do
59
+ @reader.package.resources.should_not be_empty
60
+ end
61
+
62
+ it 'get the image list' do
63
+ @reader.package.images.size.should eq(2)
64
+ end
65
+
66
+ it 'get the html list' do
67
+ @reader.package.html.size.should eq(143)
68
+ end
69
+
70
+ it 'get the stylesheet list' do
71
+ @reader.package.stylesheets.size.should eq(1)
72
+ end
73
+
74
+ it 'get the javascript list' do
75
+ @reader.package.javascripts.should be_empty
76
+ end
77
+
78
+ it 'get the font list' do
79
+ @reader.package.fonts.should be_empty
80
+ end
81
+
82
+ it 'get the audio list' do
83
+ @reader.package.audios.should be_empty
84
+ end
85
+
86
+ it 'get the table of content (toc)' do
87
+ @reader.package.toc.should eq("OPS/toc.ncx")
88
+ end
89
+
90
+ it 'get the reading order' do
91
+ list = @reader.package.reading_order
92
+ list.size.should eq(142)
93
+ list[0].attr('href').to_s.should eq('cover.xhtml')
94
+ list[1].attr('href').to_s.should eq('titlepage.xhtml')
95
+ end
96
+
97
+ it 'get the book cover' do
98
+ @reader.package.cover.should eq("OPS/images/9780316000000.jpg")
99
+ end
100
+
101
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Page do
4
+
5
+ before(:all) do
6
+ file = 'spec/data/valid.epub'
7
+ reader = Epub::Reader.open(file)
8
+ @toc = Epub::Toc.new(reader.package.toc, reader)
9
+ @page = @toc.pages.last
10
+ end
11
+
12
+ it 'get page title' do
13
+ @page.title.should eq('Copyright Page')
14
+ end
15
+
16
+ it 'get page path' do
17
+ @page.path.should eq('OPS/copyright.xhtml')
18
+ end
19
+
20
+ it 'get page content' do
21
+ @page.content.should match('<html.*>')
22
+ @page.content.should match('Produced by Daniel Lazarus and Jonesey')
23
+ end
24
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Reader do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @reader = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'open a epub file' do
11
+ @reader.should_not be_nil
12
+ end
13
+
14
+ it 'raises an exception if file not found' do
15
+ lambda {Epub::Reader.open('not_found.epub')}.should raise_error
16
+ end
17
+
18
+ it 'raises an exception if malformed file' do
19
+ lambda {Epub::Reader.open('spec/data/invalid.epub')}.should raise_error
20
+ end
21
+
22
+ it 'get epub file path' do
23
+ @reader.filepath.should eq(@file)
24
+ end
25
+
26
+ it 'get epub mime type' do
27
+ @reader.mimetype.should eq("application/epub+zip")
28
+ end
29
+
30
+ it 'get the epub version' do
31
+ @reader.epub_version.should eq(3)
32
+ end
33
+
34
+ it 'get the epub unique identifier' do
35
+ @reader.uid.should eq("urn:isbn:9780316000000")
36
+ end
37
+
38
+ it 'get the title' do
39
+ @reader.title.should eq("Moby-Dick")
40
+ end
41
+
42
+ it 'get the author' do
43
+ @reader.author.should eq("Herman Melville")
44
+ end
45
+
46
+ it 'get the publication date' do
47
+ @reader.publication_date.should be_empty
48
+ end
49
+
50
+ it 'get the language' do
51
+ @reader.language.should eq("en-US")
52
+ end
53
+
54
+ it 'get the TOC' do
55
+ @reader.toc.should be_a(Epub::Toc)
56
+ end
57
+
58
+ it 'get the pages list' do
59
+ @reader.pages.size.should eq(142)
60
+ end
61
+ end
@@ -0,0 +1,8 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+
3
+ require "rspec"
4
+ require "epub-reader"
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :rspec
8
+ end
@@ -0,0 +1,28 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Epub::Toc do
5
+
6
+ before(:all) do
7
+ file = 'spec/data/valid.epub'
8
+ @reader = Epub::Reader.open(file)
9
+ @toc = Epub::Toc.new(@reader.package.toc, @reader)
10
+ @html = Nokogiri::XML(@toc.content)
11
+ end
12
+
13
+ it 'convert <ncx> to <html>' do
14
+ @html.css('html').size.should eq(1)
15
+ end
16
+
17
+ it 'convert <docTitle> to <title>' do
18
+ @html.css('head > title').text.should eq("Moby-Dick")
19
+ end
20
+
21
+ it 'convert <navMap> to <nav>' do
22
+ @html.css('nav').size.should eq(1)
23
+ end
24
+
25
+ it 'convert <navPoint> to <a>' do
26
+ @html.css('li > a').size.should eq(142)
27
+ end
28
+ end
@@ -0,0 +1,9 @@
1
+ # `rake install`
2
+ require 'rubygems'
3
+ require 'epub-reader'
4
+
5
+ hd = '/home/fernando/Dropbox/trabalho/livros_e_apostilas/EPUB/e_book_Marketing.epub'
6
+
7
+ f1 = Epub::Reader.open(hd)
8
+
9
+ puts f1.cover
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: epub-reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.9
5
+ platform: ruby
6
+ authors:
7
+ - Fernando Almeida
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rubyzip
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: The epub-reader library implements a EPUB parser conforming as much as
42
+ possible to the EPUB 3 specification from IDPF
43
+ email:
44
+ - fernando@fernandoalmeida.net
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .rbenv-gemsets
51
+ - .rspec
52
+ - .ruby-version
53
+ - Gemfile
54
+ - README.md
55
+ - Rakefile
56
+ - epub-reader.gemspec
57
+ - lib/epub-reader.rb
58
+ - lib/epub-reader/container.rb
59
+ - lib/epub-reader/epubfile.rb
60
+ - lib/epub-reader/package.rb
61
+ - lib/epub-reader/page.rb
62
+ - lib/epub-reader/reader.rb
63
+ - lib/epub-reader/toc.rb
64
+ - lib/epub-reader/version.rb
65
+ - spec/container_spec.rb
66
+ - spec/data/invalid.epub
67
+ - spec/data/valid.epub
68
+ - spec/package_spec.rb
69
+ - spec/page_spec.rb
70
+ - spec/reader_spec.rb
71
+ - spec/spec_helper.rb
72
+ - spec/toc_spec.rb
73
+ - teste.rb
74
+ homepage: http://bitbucket.com/fernandoalmeida/epub-reader
75
+ licenses: []
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project: epub-reader
93
+ rubygems_version: 2.0.3
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: A library for accessing the content of EPUB files
97
+ test_files: []