epub-reader 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 310b2d309ca3ea509ad9c56afdcbaf53b1a7bc9a
4
+ data.tar.gz: a4ae809a633232d45737abd0363e954709bffd04
5
+ SHA512:
6
+ metadata.gz: a87eb9670787d8411fb22d0971b7e8e1bcaf4e5694fc814f2991faac2cd94521fffe87b81ca3ebe3cce526bd85728cab34985a469fc7e6d24efdd01e5c3ba9eb
7
+ data.tar.gz: d7e1a1cc73c277912c119522535fdf34e77e0b0365da5cbf39af553995d9c06b8ad5f1e6edc8e5bc81d752695e32b7a67ce16667e3711a9dd613909def788b99
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
@@ -0,0 +1 @@
1
+ epub-reader
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format nested
@@ -0,0 +1 @@
1
+ 2.0.0-p247
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem "rspec"
7
+ end
@@ -0,0 +1,65 @@
1
+ # Overview
2
+
3
+ EPUB Reader is a Ruby library which helps you to parse EPUB files conforming
4
+ as much as possible the specification from IDPF.
5
+
6
+ # Installation
7
+
8
+ The recommended installation method is via Rubygems.
9
+
10
+ gem install epub-reader
11
+
12
+ # Usage
13
+
14
+ Begin by creating a Epub::Reader instance that points to a EPUB file. Document
15
+ level information (metadata, toc, page count, etc) is available via this object.
16
+
17
+ reader = Epub::Reader.open("somefile.epub")
18
+ puts reader.epub_version
19
+ puts reader.title
20
+ puts reader.author
21
+ puts reader.publication_date
22
+ puts reader.language
23
+ reader.pages.each do |page|
24
+ puts page.title
25
+ puts page.content
26
+ end
27
+
28
+ # Exceptions
29
+
30
+ There are two key exceptions that you will need to watch out for when processing a
31
+ EPUB file:
32
+
33
+ FileNotFoundError - The argument passed to Epub::Reader.open('file.epub') is a file
34
+ path. If the file does not exist the FileNotFoundError is thrown.
35
+
36
+ MalformedEpubError - The EPUB appears to be corrupt in some way. If you believe the
37
+ file should be valid, or that a corrupt file didn't raise an exception, please
38
+ forward a copy of the file to the maintainers using the Bitbucket issue tracker
39
+ and we will attempt to improve the code.
40
+
41
+ MalformedEpubError has some subclasses if you want to detect finer grained issues. If you
42
+ don't, 'rescue MalformedEpubError' will catch all the subclassed errors as well.
43
+
44
+ Any other exceptions should be considered bugs in either Epub::Reader (please
45
+ report it!).
46
+
47
+ # Mantainers
48
+
49
+ - Fernando Almeida <fernando@fernandoalmeida.net>
50
+
51
+ # Licensing
52
+
53
+ This is a proprietary library and all rights are reserved to eBookPlus.com.
54
+
55
+ # References
56
+
57
+ [What is EPUB 3?](http://shop.oreilly.com/product/0636920022442.do)
58
+
59
+ [EPUB Publications Specifications](http://idpf.org/epub/30/spec/epub30-publications.html)
60
+
61
+ [EPUB Content Documents Specifications](http://idpf.org/epub/30/spec/epub30-contentdocs.html)
62
+
63
+ [EPUB Open Container Formats Specifications](http://idpf.org/epub/30/spec/epub30-ocf.html)
64
+
65
+ [Shared Workspace for Emerging Specifications and Schemas for EPUB 3](http://code.google.com/p/epub-revision/downloads/list)
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "epub-reader/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "epub-reader"
7
+ s.version = Epub::Reader::VERSION
8
+ s.authors = ["Fernando Almeida"]
9
+ s.email = ["fernando@fernandoalmeida.net"]
10
+ s.homepage = "http://bitbucket.com/fernandoalmeida/epub-reader"
11
+ s.summary = "A library for accessing the content of EPUB files"
12
+ s.description = "The epub-reader library implements a EPUB parser conforming as much as possible to the EPUB 3 specification from IDPF"
13
+
14
+ s.rubyforge_project = "epub-reader"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_dependency('rubyzip')
22
+ s.add_dependency('nokogiri')
23
+ end
@@ -0,0 +1,14 @@
1
+ require 'zip/zipfilesystem'
2
+ require 'nokogiri'
3
+ require "epub-reader/version"
4
+ require "epub-reader/reader"
5
+ require "epub-reader/epubfile"
6
+ require "epub-reader/container"
7
+ require "epub-reader/package"
8
+ require "epub-reader/toc"
9
+ require "epub-reader/page"
10
+
11
+ module Epub
12
+ class FileNotFoundError < StandardError; end
13
+ class MalformedFileError < StandardError; end
14
+ end
@@ -0,0 +1,35 @@
1
+ module Epub
2
+ class Container
3
+
4
+ attr_reader :packages
5
+
6
+ def initialize(reader)
7
+ @reader = reader
8
+ @container = get_container_content
9
+ @xml = Nokogiri::XML(@container)
10
+ @packages = []
11
+ @xml.css('container rootfiles rootfile').each do |rootfile|
12
+ @packages << Package.new(rootfile, @reader.file)
13
+ end
14
+ end
15
+
16
+ def raw
17
+ @container.to_s
18
+ end
19
+
20
+ def package(index = 0)
21
+ @packages[index]
22
+ end
23
+
24
+ private
25
+
26
+ def get_container_content
27
+ begin
28
+ @reader.file.get_input_stream('META-INF/container.xml').read
29
+ rescue
30
+ nil
31
+ end
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,7 @@
1
+ module Epub
2
+ class EpubFile < Zip::ZipFile
3
+ def initialize(f)
4
+ super
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,255 @@
1
+ module Epub
2
+ class Package
3
+
4
+ def initialize(rootfile, file)
5
+ @rootfile = rootfile
6
+ @file = file
7
+ @package = get_package_content(file)
8
+ @xml = Nokogiri::XML(@package).remove_namespaces!
9
+ end
10
+
11
+ def raw
12
+ @package.to_s
13
+ end
14
+
15
+ def path
16
+ @rootfile.attr('full-path').to_s
17
+ end
18
+
19
+ def relative_content_path
20
+ i = path.rindex('/').to_i
21
+ i > 0 ? path[0,i+1] : ""
22
+ end
23
+
24
+ def mediatype
25
+ @rootfile.attr('media-type')
26
+ end
27
+
28
+ def version
29
+ root.attr('version').to_s.to_i
30
+ end
31
+
32
+ def identifier
33
+ identifiers.css("[id=#{unique_identifier}]").text
34
+ end
35
+
36
+ # TODO: identify language
37
+ # TODO: identify subtitles
38
+ def title
39
+ titles.first.text
40
+ end
41
+
42
+ def language
43
+ languages.first.text
44
+ end
45
+
46
+ # TODO: identify role
47
+ # TODO: identify file-as
48
+ # TODO: identify alternate-script
49
+ # TODO: identify display-seq
50
+ def creator
51
+ creators.size > 0 ? creators.first.text : ""
52
+ end
53
+
54
+ # TODO: equal to creator
55
+ def contributor
56
+ contributors.size > 0 ? contributors.first.text : ""
57
+ end
58
+
59
+ def date
60
+ d = metadata.css('data')
61
+ d.size > 0 ? d.text : ""
62
+ end
63
+
64
+ def source
65
+ s = metadata.css('source')
66
+ s.size > 0 ? s.text : ""
67
+ end
68
+
69
+ def type
70
+ t = metadata.css('type')
71
+ t.size > 0 ? t.text : ""
72
+ end
73
+
74
+ def resources
75
+ manifest.css('item')
76
+ end
77
+
78
+ def images
79
+ resources.select{|resource| resource.attr('media-type').to_s.match(/^image\/(gif|jpeg|svg\+xml)/)}
80
+ end
81
+
82
+ def html
83
+ resources.css('[media-type="application/xhtml+xml"]')
84
+ end
85
+
86
+ def stylesheets
87
+ resources.css('[media-type="text/css"]')
88
+ end
89
+
90
+ def javascripts
91
+ resources.css('[media-type="text/javascript"]')
92
+ end
93
+
94
+ def fonts
95
+ resources.select{|resource| resource.attr('media-type').to_s.match(/application\/(vnd\.ms-opentype|font-woff)/)}
96
+ end
97
+
98
+ def audios
99
+ resources.select{|resource| resource.attr('media-type').to_s.match(/^audio\/(mpeg|mp4)/)}
100
+ end
101
+
102
+ def toc
103
+ toc_item_id = spine.attr("toc")
104
+ toc_item_mimetype = "application/x-dtbncx+xml"
105
+ toc_item_selector = toc_item_id ? "##{toc_item_id.to_s}" : '[media-type="#{toc_item_mimetype}"]'
106
+ relative_content_path + resources.css(toc_item_selector).attr('href').to_s
107
+ end
108
+
109
+ def cover
110
+ begin
111
+ cover_meta = metadata.css('[name="cover"]')
112
+ meta_content = cover_meta.size == 1 ? cover_meta.attr('content') : nil
113
+ cover_content = meta_content || manifest.css('[properties="cover-image"]').attr('id').to_s
114
+ cover_path = (cover_content.to_s.match(/\.(gif|jpe?g|png)/) ? cover_content : resources.css("##{cover_content}").attr('href').to_s)
115
+ if cover_exist?(relative_content_path + cover_path)
116
+ relative_content_path + cover_path
117
+ elsif cover_exist?(relative_content_path + "Images/" + cover_path)
118
+ relative_content_path + "Images/" + cover_path
119
+ else
120
+ ""
121
+ end
122
+ rescue
123
+ ""
124
+ end
125
+ end
126
+
127
+ # TODO: to parse
128
+ # guide [optional/deprecated]
129
+ # bindings [optional]
130
+
131
+ def reading_order
132
+ spine_items.map do |item|
133
+ item_id = item.attr('idref').to_s
134
+ manifest.css("##{item_id}") if item_id
135
+ end
136
+ end
137
+
138
+ protected
139
+
140
+ def spine_items
141
+ spine.css('itemref')
142
+ end
143
+
144
+ def get_package_content(file)
145
+ begin
146
+ file.get_input_stream(path)
147
+ rescue
148
+ nil
149
+ end
150
+ end
151
+
152
+ def cover_exist?(path)
153
+ begin
154
+ !!@file.find_entry(path)
155
+ rescue
156
+ false
157
+ end
158
+ end
159
+
160
+ def root
161
+ @xml.css('package')
162
+ end
163
+
164
+ def unique_identifier
165
+ root.attr('unique-identifier').to_s
166
+ end
167
+
168
+ def prefix
169
+ root.attr('prefix').to_s
170
+ end
171
+
172
+ def lang
173
+ root.attr('xml:lang').to_s
174
+ end
175
+
176
+ def dir
177
+ (spine.attr('page-progression-direction') || root.attr('dir')).to_s
178
+ end
179
+
180
+ def id
181
+ root.attr('id').to_s
182
+ end
183
+
184
+ # TODO: to do parse of
185
+ # DCMES Optional Elements [0 or more]
186
+ # contributor
187
+ # coverage
188
+ # creator
189
+ # date
190
+ # description
191
+ # format
192
+ # publisher
193
+ # relation
194
+ # rights
195
+ # source
196
+ # subject
197
+ # type
198
+ # meta [1 or more]
199
+ # OPF2 meta [0 or more]
200
+ # link [0 or more]
201
+
202
+ ############
203
+ # Metadata #
204
+ ############
205
+ def metadata
206
+ root.css('metadata')
207
+ end
208
+
209
+ def identifiers
210
+ metadata.css('identifier')
211
+ end
212
+
213
+ def titles
214
+ metadata.css('title')
215
+ end
216
+
217
+ def languages
218
+ metadata.css('language')
219
+ end
220
+
221
+ def creators
222
+ metadata.css('creator')
223
+ end
224
+
225
+ def contributors
226
+ metadata.css('contributor')
227
+ end
228
+
229
+ def meta
230
+ metadata.css('meta')
231
+ end
232
+
233
+ def link
234
+ metadata.css('link')
235
+ end
236
+
237
+ ############
238
+ # Manifest #
239
+ ############
240
+ def manifest
241
+ root.css('manifest')
242
+ end
243
+
244
+ ############
245
+ # Spine #
246
+ ############
247
+ def spine
248
+ root.css('spine')
249
+ end
250
+
251
+ def reading_order_selectors
252
+ reading_order.map{|item| "##{item.attr('idref')}"}
253
+ end
254
+ end
255
+ end
@@ -0,0 +1,26 @@
1
+ module Epub
2
+ class Page
3
+ def initialize(title, path, file)
4
+ @title = title
5
+ @path = path
6
+ @file = file
7
+ end
8
+
9
+ attr_reader :title, :path
10
+
11
+ def content
12
+ @content ||= get_page_content
13
+ end
14
+
15
+ private
16
+
17
+ def get_page_content
18
+ begin
19
+ @file.get_input_stream(@path).read.force_encoding(Encoding::UTF_8)
20
+ rescue
21
+ ""
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,118 @@
1
+ module Epub
2
+ class Reader
3
+
4
+ EPUB_MIMETYPE = "application/epub+zip"
5
+ PACKAGE_MEDIATYPE = "application/oebps-package+xml"
6
+
7
+ attr_reader :filepath, :file
8
+
9
+ def initialize(f)
10
+ raise(FileNotFoundError, "File not found") unless File.exists?(f)
11
+ @filepath = f.to_s
12
+ @file = EpubFile.new(f)
13
+ raise(MalformedFileError, "Invalid EPUB file format") unless valid?
14
+ end
15
+
16
+ def Reader.open(f)
17
+ reader = Reader.new(f)
18
+ if block_given?
19
+ yield reader
20
+ else
21
+ reader
22
+ end
23
+ end
24
+
25
+ def mimetype
26
+ @mimetype ||= begin
27
+ file.get_input_stream('mimetype').read
28
+ rescue
29
+ nil
30
+ end
31
+ end
32
+
33
+ def epub_version
34
+ @version ||= package.version
35
+ end
36
+
37
+ def uid
38
+ @uid ||= package.identifier
39
+ end
40
+
41
+ def title
42
+ @title ||= package.title
43
+ end
44
+
45
+ def author
46
+ @author ||= package.creator
47
+ end
48
+
49
+ def publication_date
50
+ @publication_date ||= package.date
51
+ end
52
+
53
+ def language
54
+ @language ||= package.language
55
+ end
56
+
57
+ def toc
58
+ @toc ||= Toc.new(package.toc, self)
59
+ end
60
+
61
+ def pages
62
+ @pages ||= toc.pages
63
+ end
64
+
65
+ def container
66
+ @container ||= Container.new(self)
67
+ end
68
+
69
+ def cover
70
+ @cover ||= package.cover
71
+ end
72
+
73
+ # TODO: To parse other META-INF files
74
+ # signatures.xml [optional]
75
+ # Contains digital signatures for various assets.
76
+
77
+ # encryption.xml [optional]
78
+ # Contains information about the encryption of Publication resources. (This file is required if font obfuscation is used.)
79
+
80
+ # metadata.xml [optional]
81
+ # Used to store metadata about the container.
82
+
83
+ # rights.xml [optional]
84
+ # Used to store information about digital rights.
85
+
86
+ # manifest.xml [allowed]
87
+ # A manifest of container contents as allowed by Open Document Format [ODF].
88
+
89
+ # Convenient method
90
+ def package(index = 0)
91
+ container.package(index)
92
+ end
93
+
94
+ private
95
+
96
+ def valid?
97
+ valid_mimetype? && valid_container? && valid_package? && valid_toc?
98
+ end
99
+
100
+ def valid_mimetype?
101
+ /application\/epub\+zip/.match(mimetype)
102
+ end
103
+
104
+ def valid_container?
105
+ !container.nil?
106
+ end
107
+
108
+ def valid_package?
109
+ package.path.match(/\.opf$/) && package.mediatype == PACKAGE_MEDIATYPE
110
+ end
111
+
112
+ # TODO: validates TOC
113
+ def valid_toc?
114
+ true
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,130 @@
1
+ module Epub
2
+ class Toc
3
+
4
+ def initialize(tocfile, reader)
5
+ @tocfile = tocfile
6
+ @reader = reader
7
+ @file = @reader.file
8
+ @content = get_toc_content
9
+ @xml = Nokogiri::XML(@content).remove_namespaces!
10
+ end
11
+
12
+ def content
13
+ if ncx?
14
+ if has_toc?
15
+ ncx_to_html
16
+ else
17
+ spine_to_html
18
+ end
19
+ else
20
+ @content
21
+ end
22
+ end
23
+
24
+ def pages
25
+ points = @xml.css("ncx navMap navPoint")
26
+ items = @reader.package.reading_order
27
+ if ncx? && has_toc? && points.size > 1
28
+ points.map do |point|
29
+ title = point.css('navLabel > text').first.text
30
+ file_path = @reader.package.relative_content_path + point.css('content').attr('src').to_s
31
+ Page.new(title, file_path, @reader.file)
32
+ end
33
+ else
34
+ items.map do |item|
35
+ title = ""
36
+ file_path = @reader.package.relative_content_path + item.attr('href').to_s
37
+ Page.new(title, file_path, @reader.file)
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def ncx?
45
+ @tocfile.match(/(\.ncx)$/)
46
+ end
47
+
48
+ def has_toc?
49
+ @xml.css('navMap > navPoint').size > 0
50
+ end
51
+
52
+ def get_toc_content
53
+ begin
54
+ @reader.file.get_input_stream(@tocfile).read
55
+ rescue
56
+ ""
57
+ end
58
+ end
59
+
60
+ # TODO: Add Stylesheets
61
+ # TODO: Convert nested navigation
62
+ # TODO: Refactoring to DRY with spine_to_html
63
+ def ncx_to_html
64
+ html = <<EOF
65
+ <?xml version="1.0" encoding="UTF-8"?>
66
+ <html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
67
+ <head>
68
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
69
+ <title>#{title}</title>
70
+ </head>
71
+ <body>
72
+ <section>
73
+ <nav id="toc" epub:type="toc">
74
+ <ol>
75
+ EOF
76
+ selector = "ncx > navMap > navPoint"
77
+ @xml.css(selector).each do |point|
78
+ html += "<li id=\"#{point.attr('id').to_s}\"><a href=\"#{point.css('content').attr('src').to_s}\">#{point.css('navLabel text').text}</a></li>"
79
+ end
80
+ html += <<EOF
81
+ </ol>
82
+ </nav>
83
+ </section>
84
+ </body>
85
+ </html>
86
+ EOF
87
+ html
88
+ end
89
+
90
+ def spine_to_html
91
+ html = <<EOF
92
+ <?xml version="1.0" encoding="UTF-8"?>
93
+ <html xmlns="http://www.w3.org/1999/xhtml" profile="http://www.idpf.org/epub/30/profile/content/">
94
+ <head>
95
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
96
+ <title>#{title}</title>
97
+ </head>
98
+ <body>
99
+ <section>
100
+ <nav id="toc" epub:type="toc">
101
+ <ol>
102
+ EOF
103
+ @reader.package.reading_order.each do |item|
104
+ link = item.attr('href').to_s
105
+ html += "<li id=\"#{item.attr('id').to_s}\"><a href=\"#{link}\">#{link[0,link.rindex('.')]}</a></li>"
106
+ end
107
+ html += <<EOF
108
+ </ol>
109
+ </nav>
110
+ </section>
111
+ </body>
112
+ </html>
113
+ EOF
114
+ html
115
+ end
116
+
117
+ def title
118
+ root.css('docTitle > text').text
119
+ end
120
+
121
+ def root
122
+ @xml.css('ncx')
123
+ end
124
+
125
+ def navmap
126
+ root.css('navMap')
127
+ end
128
+
129
+ end
130
+ end
@@ -0,0 +1,5 @@
1
+ module Epub
2
+ class Reader
3
+ VERSION = "0.0.9"
4
+ end
5
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Container do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @epub = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'get raw content' do
11
+ @epub.container.raw.should_not be_empty
12
+ end
13
+
14
+ it 'get package documents' do
15
+ @epub.container.packages.should_not be_empty
16
+ end
17
+
18
+ it 'get default package document' do
19
+ @epub.container.package.should be_a(Epub::Package)
20
+ end
21
+
22
+ end
Binary file
Binary file
@@ -0,0 +1,101 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Package do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @reader = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'get raw content' do
11
+ @reader.package.raw.should_not be_empty
12
+ end
13
+
14
+ it 'get file path' do
15
+ @reader.package.path.should_not be_empty
16
+ end
17
+
18
+ it 'get media type' do
19
+ @reader.package.mediatype.should eq("application/oebps-package+xml")
20
+ end
21
+
22
+ it 'get the epub version' do
23
+ @reader.package.version.should eq(3)
24
+ end
25
+
26
+ it 'get the unique identifier' do
27
+ @reader.package.identifier.should eq("urn:isbn:9780316000000")
28
+ end
29
+
30
+ it 'get the content language' do
31
+ @reader.package.language.should eq("en-US")
32
+ end
33
+
34
+ it 'get the content title' do
35
+ @reader.package.title.should eq("Moby-Dick")
36
+ end
37
+
38
+ it 'get the content creator' do
39
+ @reader.package.creator.should eq("Herman Melville")
40
+ end
41
+
42
+ it 'get the content contributor' do
43
+ @reader.package.contributor.should be_empty
44
+ end
45
+
46
+ it 'get the publication date' do
47
+ @reader.package.date.should be_empty
48
+ end
49
+
50
+ it 'get the publication source' do
51
+ @reader.package.source.should be_empty
52
+ end
53
+
54
+ it 'get the content type' do
55
+ @reader.package.source.should be_empty
56
+ end
57
+
58
+ it 'get the full resource list' do
59
+ @reader.package.resources.should_not be_empty
60
+ end
61
+
62
+ it 'get the image list' do
63
+ @reader.package.images.size.should eq(2)
64
+ end
65
+
66
+ it 'get the html list' do
67
+ @reader.package.html.size.should eq(143)
68
+ end
69
+
70
+ it 'get the stylesheet list' do
71
+ @reader.package.stylesheets.size.should eq(1)
72
+ end
73
+
74
+ it 'get the javascript list' do
75
+ @reader.package.javascripts.should be_empty
76
+ end
77
+
78
+ it 'get the font list' do
79
+ @reader.package.fonts.should be_empty
80
+ end
81
+
82
+ it 'get the audio list' do
83
+ @reader.package.audios.should be_empty
84
+ end
85
+
86
+ it 'get the table of content (toc)' do
87
+ @reader.package.toc.should eq("OPS/toc.ncx")
88
+ end
89
+
90
+ it 'get the reading order' do
91
+ list = @reader.package.reading_order
92
+ list.size.should eq(142)
93
+ list[0].attr('href').to_s.should eq('cover.xhtml')
94
+ list[1].attr('href').to_s.should eq('titlepage.xhtml')
95
+ end
96
+
97
+ it 'get the book cover' do
98
+ @reader.package.cover.should eq("OPS/images/9780316000000.jpg")
99
+ end
100
+
101
+ end
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Page do
4
+
5
+ before(:all) do
6
+ file = 'spec/data/valid.epub'
7
+ reader = Epub::Reader.open(file)
8
+ @toc = Epub::Toc.new(reader.package.toc, reader)
9
+ @page = @toc.pages.last
10
+ end
11
+
12
+ it 'get page title' do
13
+ @page.title.should eq('Copyright Page')
14
+ end
15
+
16
+ it 'get page path' do
17
+ @page.path.should eq('OPS/copyright.xhtml')
18
+ end
19
+
20
+ it 'get page content' do
21
+ @page.content.should match('<html.*>')
22
+ @page.content.should match('Produced by Daniel Lazarus and Jonesey')
23
+ end
24
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe Epub::Reader do
4
+
5
+ before(:all) do
6
+ @file = 'spec/data/valid.epub'
7
+ @reader = Epub::Reader.open(@file)
8
+ end
9
+
10
+ it 'open a epub file' do
11
+ @reader.should_not be_nil
12
+ end
13
+
14
+ it 'raises an exception if file not found' do
15
+ lambda {Epub::Reader.open('not_found.epub')}.should raise_error
16
+ end
17
+
18
+ it 'raises an exception if malformed file' do
19
+ lambda {Epub::Reader.open('spec/data/invalid.epub')}.should raise_error
20
+ end
21
+
22
+ it 'get epub file path' do
23
+ @reader.filepath.should eq(@file)
24
+ end
25
+
26
+ it 'get epub mime type' do
27
+ @reader.mimetype.should eq("application/epub+zip")
28
+ end
29
+
30
+ it 'get the epub version' do
31
+ @reader.epub_version.should eq(3)
32
+ end
33
+
34
+ it 'get the epub unique identifier' do
35
+ @reader.uid.should eq("urn:isbn:9780316000000")
36
+ end
37
+
38
+ it 'get the title' do
39
+ @reader.title.should eq("Moby-Dick")
40
+ end
41
+
42
+ it 'get the author' do
43
+ @reader.author.should eq("Herman Melville")
44
+ end
45
+
46
+ it 'get the publication date' do
47
+ @reader.publication_date.should be_empty
48
+ end
49
+
50
+ it 'get the language' do
51
+ @reader.language.should eq("en-US")
52
+ end
53
+
54
+ it 'get the TOC' do
55
+ @reader.toc.should be_a(Epub::Toc)
56
+ end
57
+
58
+ it 'get the pages list' do
59
+ @reader.pages.size.should eq(142)
60
+ end
61
+ end
@@ -0,0 +1,8 @@
1
+ $: << File.join(File.dirname(__FILE__), "/../lib")
2
+
3
+ require "rspec"
4
+ require "epub-reader"
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :rspec
8
+ end
@@ -0,0 +1,28 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Epub::Toc do
5
+
6
+ before(:all) do
7
+ file = 'spec/data/valid.epub'
8
+ @reader = Epub::Reader.open(file)
9
+ @toc = Epub::Toc.new(@reader.package.toc, @reader)
10
+ @html = Nokogiri::XML(@toc.content)
11
+ end
12
+
13
+ it 'convert <ncx> to <html>' do
14
+ @html.css('html').size.should eq(1)
15
+ end
16
+
17
+ it 'convert <docTitle> to <title>' do
18
+ @html.css('head > title').text.should eq("Moby-Dick")
19
+ end
20
+
21
+ it 'convert <navMap> to <nav>' do
22
+ @html.css('nav').size.should eq(1)
23
+ end
24
+
25
+ it 'convert <navPoint> to <a>' do
26
+ @html.css('li > a').size.should eq(142)
27
+ end
28
+ end
@@ -0,0 +1,9 @@
1
+ # `rake install`
2
+ require 'rubygems'
3
+ require 'epub-reader'
4
+
5
+ hd = '/home/fernando/Dropbox/trabalho/livros_e_apostilas/EPUB/e_book_Marketing.epub'
6
+
7
+ f1 = Epub::Reader.open(hd)
8
+
9
+ puts f1.cover
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: epub-reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.9
5
+ platform: ruby
6
+ authors:
7
+ - Fernando Almeida
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rubyzip
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: The epub-reader library implements a EPUB parser conforming as much as
42
+ possible to the EPUB 3 specification from IDPF
43
+ email:
44
+ - fernando@fernandoalmeida.net
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .rbenv-gemsets
51
+ - .rspec
52
+ - .ruby-version
53
+ - Gemfile
54
+ - README.md
55
+ - Rakefile
56
+ - epub-reader.gemspec
57
+ - lib/epub-reader.rb
58
+ - lib/epub-reader/container.rb
59
+ - lib/epub-reader/epubfile.rb
60
+ - lib/epub-reader/package.rb
61
+ - lib/epub-reader/page.rb
62
+ - lib/epub-reader/reader.rb
63
+ - lib/epub-reader/toc.rb
64
+ - lib/epub-reader/version.rb
65
+ - spec/container_spec.rb
66
+ - spec/data/invalid.epub
67
+ - spec/data/valid.epub
68
+ - spec/package_spec.rb
69
+ - spec/page_spec.rb
70
+ - spec/reader_spec.rb
71
+ - spec/spec_helper.rb
72
+ - spec/toc_spec.rb
73
+ - teste.rb
74
+ homepage: http://bitbucket.com/fernandoalmeida/epub-reader
75
+ licenses: []
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project: epub-reader
93
+ rubygems_version: 2.0.3
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: A library for accessing the content of EPUB files
97
+ test_files: []