peregrin 1.1.1 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -0
- data/lib/formats/epub.rb +74 -13
- data/lib/formats/zhook.rb +3 -5
- data/lib/peregrin/book.rb +4 -0
- data/lib/peregrin/version.rb +1 -1
- data/lib/peregrin/zip_patch.rb +2 -1
- data/test/formats/epub_test.rb +85 -0
- metadata +18 -7
data/README.md
CHANGED
data/lib/formats/epub.rb
CHANGED
@@ -7,7 +7,8 @@ class Peregrin::Epub
|
|
7
7
|
:opf => { 'opf' => 'http://www.idpf.org/2007/opf' },
|
8
8
|
:dc => { 'dc' => 'http://purl.org/dc/elements/1.1/' },
|
9
9
|
:ncx => { 'ncx' => 'http://www.daisy.org/z3986/2005/ncx/' },
|
10
|
-
:svg => { 'svg' => 'http://www.w3.org/2000/svg' }
|
10
|
+
:svg => { 'svg' => 'http://www.w3.org/2000/svg' },
|
11
|
+
:nav => { 'nav' => 'http://www.w3.org/1999/xhtml'}
|
11
12
|
}
|
12
13
|
OCF_PATH = "META-INF/container.xml"
|
13
14
|
HTML5_TAGNAMES = %w[section nav article aside hgroup header footer figure figcaption] # FIXME: Which to divify? Which to leave as-is?
|
@@ -85,13 +86,13 @@ class Peregrin::Epub
|
|
85
86
|
docs = load_config_documents(zipfile)
|
86
87
|
extract_properties(docs[:opf])
|
87
88
|
extract_components(zipfile, docs[:opf], docs[:opf_root])
|
88
|
-
extract_chapters(zipfile, docs[:ncx])
|
89
|
+
extract_chapters(zipfile, {:ncx => docs[:ncx], :nav => docs[:nav]})
|
89
90
|
extract_cover(zipfile, docs)
|
90
91
|
}
|
91
92
|
@book.read_resource_proc = lambda { |resource|
|
92
93
|
media_path = from_opf_root(docs[:opf_root], resource.src)
|
93
94
|
media_path = URI.unescape(media_path)
|
94
|
-
Zip::Archive.open(epub_path) { |zipfile| zipfile.
|
95
|
+
Zip::Archive.open(epub_path) { |zipfile| zipfile.content(media_path) }
|
95
96
|
}
|
96
97
|
end
|
97
98
|
|
@@ -99,7 +100,8 @@ class Peregrin::Epub
|
|
99
100
|
def load_config_documents(zipfile)
|
100
101
|
# The OCF file.
|
101
102
|
begin
|
102
|
-
|
103
|
+
ocf_content = zipfile.content(OCF_PATH)
|
104
|
+
docs = { :ocf => Nokogiri::XML::Document.parse(ocf_content) }
|
103
105
|
rescue
|
104
106
|
raise FailureLoadingOCF
|
105
107
|
end
|
@@ -111,12 +113,17 @@ class Peregrin::Epub
|
|
111
113
|
NAMESPACES[:ocf]
|
112
114
|
)['full-path']
|
113
115
|
docs[:opf_root] = File.dirname(docs[:opf_path])
|
114
|
-
|
116
|
+
opf_content = zipfile.content(docs[:opf_path])
|
117
|
+
docs[:opf] = Nokogiri::XML::Document.parse(opf_content)
|
115
118
|
rescue
|
116
119
|
raise FailureLoadingOPF
|
117
120
|
end
|
118
121
|
|
122
|
+
# Extract Epub version
|
123
|
+
@book.version = docs[:opf].at_xpath('//opf:package', NAMESPACES[:opf])['version'].to_f
|
124
|
+
|
119
125
|
# The NCX file.
|
126
|
+
# Must be present only with Ebook < 3.0 but can be use for forward compatibility
|
120
127
|
begin
|
121
128
|
spine = docs[:opf].at_xpath('//opf:spine', NAMESPACES[:opf])
|
122
129
|
ncx_id = spine['toc'] ? spine['toc'] : 'ncx'
|
@@ -126,10 +133,25 @@ class Peregrin::Epub
|
|
126
133
|
)
|
127
134
|
|
128
135
|
docs[:ncx_path] = from_opf_root(docs[:opf_root], item['href'])
|
129
|
-
ncx_content = zipfile.
|
136
|
+
ncx_content = zipfile.content(docs[:ncx_path])
|
130
137
|
docs[:ncx] = Nokogiri::XML::Document.parse(ncx_content)
|
131
138
|
rescue => e
|
132
|
-
raise
|
139
|
+
# Only raise an exeption for Ebook with version lower than 3.0
|
140
|
+
raise FailureLoadingNCX if @book.version < 3
|
141
|
+
end
|
142
|
+
|
143
|
+
# The NAV file. (Epub3 only)
|
144
|
+
if @book.version >= 3
|
145
|
+
begin
|
146
|
+
docs[:nav_path] = from_opf_root(
|
147
|
+
docs[:opf_root],
|
148
|
+
docs[:opf].at_xpath("//opf:manifest/opf:item[contains(concat(' ', normalize-space(@properties), ' '), ' nav ')]", NAMESPACES[:opf])['href']
|
149
|
+
)
|
150
|
+
nav_content = zipfile.content(docs[:nav_path])
|
151
|
+
docs[:nav] = Nokogiri::XML::Document.parse(nav_content)
|
152
|
+
rescue => e
|
153
|
+
raise FailureLoadingNAV
|
154
|
+
end
|
133
155
|
end
|
134
156
|
|
135
157
|
docs
|
@@ -153,10 +175,14 @@ class Peregrin::Epub
|
|
153
175
|
end
|
154
176
|
atts = elem.attributes.inject({}) { |acc, pair|
|
155
177
|
key, attr = pair
|
156
|
-
|
178
|
+
if !["name", "content", "property"].include?(key)
|
179
|
+
acc[key] = attr.value
|
180
|
+
elsif key == "property"
|
181
|
+
@book.add_property(attr.value, elem.text)
|
182
|
+
end
|
157
183
|
acc
|
158
184
|
}
|
159
|
-
@book.add_property(name, content, atts)
|
185
|
+
@book.add_property(name, content, atts) unless name.nil?
|
160
186
|
}
|
161
187
|
end
|
162
188
|
|
@@ -175,10 +201,10 @@ class Peregrin::Epub
|
|
175
201
|
href = item['href']
|
176
202
|
linear = iref['linear'] != 'no'
|
177
203
|
begin
|
178
|
-
content = zipfile.
|
204
|
+
content = zipfile.content(from_opf_root(opf_root, href))
|
179
205
|
rescue
|
180
206
|
href = URI.unescape(href)
|
181
|
-
content = zipfile.
|
207
|
+
content = zipfile.content(from_opf_root(opf_root, href))
|
182
208
|
end
|
183
209
|
@book.add_component(
|
184
210
|
href,
|
@@ -205,8 +231,16 @@ class Peregrin::Epub
|
|
205
231
|
}
|
206
232
|
end
|
207
233
|
|
234
|
+
def extract_chapters(zipfile, docs)
|
235
|
+
if @book.version >= 3 && !docs[:nav].nil?
|
236
|
+
extract_nav_chapters(zipfile, docs[:nav])
|
237
|
+
else
|
238
|
+
extract_ncx_chapters(zipfile, docs[:ncx])
|
239
|
+
end
|
240
|
+
end
|
208
241
|
|
209
|
-
|
242
|
+
# Epub < 3.0 only
|
243
|
+
def extract_ncx_chapters(zipfile, ncx_doc)
|
210
244
|
curse = lambda { |point|
|
211
245
|
chp = Peregrin::Chapter.new(
|
212
246
|
point.at_xpath('.//ncx:text', NAMESPACES[:ncx]).content,
|
@@ -225,6 +259,32 @@ class Peregrin::Epub
|
|
225
259
|
}
|
226
260
|
end
|
227
261
|
|
262
|
+
# Epub >= 3.0 only
|
263
|
+
def extract_nav_chapters(zipfile, nav_doc)
|
264
|
+
curse = lambda { |point, position|
|
265
|
+
chp = Peregrin::Chapter.new(
|
266
|
+
point.at_xpath('.//nav:a', NAMESPACES[:nav]).content,
|
267
|
+
position,
|
268
|
+
point.at_xpath('.//nav:a', NAMESPACES[:nav])['href']
|
269
|
+
)
|
270
|
+
ol = point.at_xpath('.//nav:ol', NAMESPACES[:nav])
|
271
|
+
ol.children.each { |pt|
|
272
|
+
next unless pt.element? && pt.name == "li"
|
273
|
+
position += 1
|
274
|
+
position, chapter = curse.call(pt, position)
|
275
|
+
chp.children.push chapter
|
276
|
+
} if ol
|
277
|
+
[position, chp]
|
278
|
+
}
|
279
|
+
position = 0
|
280
|
+
nav_doc.at_xpath("//nav:nav/nav:ol", NAMESPACES[:nav]).children.each { |pt|
|
281
|
+
next unless pt.element? && pt.name == "li"
|
282
|
+
position += 1
|
283
|
+
position, chapter = curse.call(pt, position)
|
284
|
+
@book.chapters.push chapter
|
285
|
+
}
|
286
|
+
end
|
287
|
+
|
228
288
|
|
229
289
|
def extract_cover(zipfile, docs)
|
230
290
|
@book.cover = nil
|
@@ -252,7 +312,7 @@ class Peregrin::Epub
|
|
252
312
|
else
|
253
313
|
path = from_opf_root(docs[:opf_root], res.src)
|
254
314
|
begin
|
255
|
-
doc = Nokogiri::XML::Document.parse(zipfile.
|
315
|
+
doc = Nokogiri::XML::Document.parse(zipfile.content(path))
|
256
316
|
src = nil
|
257
317
|
if img = doc.at_css('img')
|
258
318
|
src = img['src']
|
@@ -549,5 +609,6 @@ class Peregrin::Epub
|
|
549
609
|
class FailureLoadingOCF < ValidationError; end
|
550
610
|
class FailureLoadingOPF < ValidationError; end
|
551
611
|
class FailureLoadingNCX < ValidationError; end
|
612
|
+
class FailureLoadingNAV < ValidationError; end
|
552
613
|
|
553
614
|
end
|
data/lib/formats/zhook.rb
CHANGED
@@ -28,7 +28,7 @@ class Peregrin::Zhook
|
|
28
28
|
raise MissingCoverPNG.new(path)
|
29
29
|
end
|
30
30
|
|
31
|
-
doc = Nokogiri::HTML::Document.parse(zf.
|
31
|
+
doc = Nokogiri::HTML::Document.parse(zf.content(INDEX_PATH), nil, 'UTF-8')
|
32
32
|
raise IndexHTMLRootHasId.new(path) if doc.root['id']
|
33
33
|
|
34
34
|
ensure
|
@@ -42,16 +42,14 @@ class Peregrin::Zhook
|
|
42
42
|
validate(path)
|
43
43
|
book = Peregrin::Book.new
|
44
44
|
Zip::Archive.open(path) { |zf|
|
45
|
-
book.add_component(INDEX_PATH, zf.
|
45
|
+
book.add_component(INDEX_PATH, zf.content(INDEX_PATH))
|
46
46
|
zf.each { |entry|
|
47
47
|
ze = entry.name
|
48
48
|
book.add_resource(ze) unless ze == INDEX_PATH || entry.directory?
|
49
49
|
}
|
50
50
|
}
|
51
51
|
book.read_resource_proc = lambda { |resource|
|
52
|
-
Zip::Archive.open(path) { |zipfile|
|
53
|
-
zipfile.read(resource.src)
|
54
|
-
}
|
52
|
+
Zip::Archive.open(path) { |zipfile| zipfile.content(resource.src) }
|
55
53
|
}
|
56
54
|
|
57
55
|
extract_properties_from_index(book)
|
data/lib/peregrin/book.rb
CHANGED
@@ -19,6 +19,10 @@ class Peregrin::Book
|
|
19
19
|
# A Resource that is used for the book cover.
|
20
20
|
attr_accessor :cover
|
21
21
|
|
22
|
+
# The current version of document specifications
|
23
|
+
# Only used for Epub for now
|
24
|
+
attr_accessor :version
|
25
|
+
|
22
26
|
# A proc that copies a resource to the given destination.
|
23
27
|
attr_writer :read_resource_proc
|
24
28
|
|
data/lib/peregrin/version.rb
CHANGED
data/lib/peregrin/zip_patch.rb
CHANGED
data/test/formats/epub_test.rb
CHANGED
@@ -89,6 +89,91 @@ class Peregrin::Tests::EpubTest < Test::Unit::TestCase
|
|
89
89
|
assert_equal("cover.png", epub.to_book.cover.src)
|
90
90
|
end
|
91
91
|
|
92
|
+
def test_extracting_epub3_fixed_layout_properties
|
93
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
|
94
|
+
book = epub.to_book
|
95
|
+
assert_equal("2012-05-09T08:58:00Z", book.property_for('dcterms:modified'))
|
96
|
+
assert_equal("pre-paginated", book.property_for('rendition:layout'))
|
97
|
+
assert_equal("auto", book.property_for('rendition:orientation'))
|
98
|
+
assert_equal("both", book.property_for('rendition:spread'))
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_extracting_version
|
102
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
|
103
|
+
assert_equal(3.0, epub.to_book.version)
|
104
|
+
|
105
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
|
106
|
+
assert_equal(2.0, epub.to_book.version)
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_extracting_chapters_from_ocx
|
110
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
|
111
|
+
assert_equal(9, epub.to_book.chapters.count)
|
112
|
+
assert_equal("Title", epub.to_book.chapters.first.title)
|
113
|
+
assert_equal("title.xml", epub.to_book.chapters.first.src)
|
114
|
+
assert_equal(1, epub.to_book.chapters.first.position)
|
115
|
+
assert_equal("Recommendations", epub.to_book.chapters.last.title)
|
116
|
+
assert_equal("similar.xml", epub.to_book.chapters.last.src)
|
117
|
+
assert_equal(27, epub.to_book.chapters.last.position)
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_extracting_chapters_from_nav
|
121
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_fixed_layout.epub")
|
122
|
+
assert_equal(3, epub.to_book.chapters.count)
|
123
|
+
assert_equal("Images and Text", epub.to_book.chapters.first.title)
|
124
|
+
assert_equal("page01.xhtml", epub.to_book.chapters.first.src)
|
125
|
+
assert_equal(1, epub.to_book.chapters.first.position)
|
126
|
+
assert_equal("Dragons", epub.to_book.chapters.last.title)
|
127
|
+
assert_equal("page04.xhtml", epub.to_book.chapters.last.src)
|
128
|
+
assert_equal(3, epub.to_book.chapters.last.position)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_extracting_nested_chapters_from_nav
|
132
|
+
epub = Peregrin::Epub.read("test/fixtures/epubs/epub3_nested_nav.epub")
|
133
|
+
assert_equal(11, epub.to_book.chapters.count)
|
134
|
+
assert_equal(
|
135
|
+
["EPUB 3.0 Specification",
|
136
|
+
"EPUB 3 Specifications - Table of Contents",
|
137
|
+
"Terminology",
|
138
|
+
"EPUB 3 Overview",
|
139
|
+
"EPUB Publications 3.0",
|
140
|
+
"EPUB Content Documents 3.0",
|
141
|
+
"EPUB Media Overlays 3.0",
|
142
|
+
"Acknowledgements and Contributors",
|
143
|
+
"References",
|
144
|
+
"EPUB Open Container Format (OCF) 3.0",
|
145
|
+
"EPUB 3 Changes from EPUB 2.0.1"],
|
146
|
+
epub.to_book.chapters.map(&:title)
|
147
|
+
)
|
148
|
+
assert_equal(
|
149
|
+
[1, 2, 3, 4, 30, 85, 184, 230, 231, 232, 265],
|
150
|
+
epub.to_book.chapters.map(&:position)
|
151
|
+
)
|
152
|
+
assert_equal(
|
153
|
+
["1. Introduction",
|
154
|
+
"2. Features",
|
155
|
+
"3. Global Language Support",
|
156
|
+
"4. Accessibility"],
|
157
|
+
epub.to_book.chapters[3].children.map(&:title)
|
158
|
+
)
|
159
|
+
assert_equal(
|
160
|
+
[5, 8, 22, 29],
|
161
|
+
epub.to_book.chapters[3].children.map(&:position)
|
162
|
+
)
|
163
|
+
assert_equal(
|
164
|
+
["3.1. Metadata",
|
165
|
+
"3.2. Content Documents",
|
166
|
+
"3.3. CSS",
|
167
|
+
"3.4. Fonts",
|
168
|
+
"3.5. Text-to-speech",
|
169
|
+
"3.6. Container"],
|
170
|
+
epub.to_book.chapters[3].children[2].children.map(&:title)
|
171
|
+
)
|
172
|
+
assert_equal(
|
173
|
+
[23, 24, 25, 26, 27, 28],
|
174
|
+
epub.to_book.chapters[3].children[2].children.map(&:position)
|
175
|
+
)
|
176
|
+
end
|
92
177
|
|
93
178
|
def test_read_epub_to_write_epub
|
94
179
|
epub = Peregrin::Epub.read("test/fixtures/epubs/strunk.epub")
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: peregrin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
5
6
|
segments:
|
6
7
|
- 1
|
7
8
|
- 1
|
8
|
-
-
|
9
|
-
version: 1.1.
|
9
|
+
- 4
|
10
|
+
version: 1.1.4
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Joseph Pearson
|
@@ -14,16 +15,17 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
-
default_executable:
|
18
|
+
date: 2012-05-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: nokogiri
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
27
29
|
segments:
|
28
30
|
- 0
|
29
31
|
version: "0"
|
@@ -33,9 +35,11 @@ dependencies:
|
|
33
35
|
name: zipruby
|
34
36
|
prerelease: false
|
35
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
36
39
|
requirements:
|
37
40
|
- - ">="
|
38
41
|
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
39
43
|
segments:
|
40
44
|
- 0
|
41
45
|
version: "0"
|
@@ -45,9 +49,11 @@ dependencies:
|
|
45
49
|
name: mime-types
|
46
50
|
prerelease: false
|
47
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
48
53
|
requirements:
|
49
54
|
- - ">="
|
50
55
|
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
51
57
|
segments:
|
52
58
|
- 0
|
53
59
|
version: "0"
|
@@ -57,9 +63,11 @@ dependencies:
|
|
57
63
|
name: rake
|
58
64
|
prerelease: false
|
59
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
60
67
|
requirements:
|
61
68
|
- - ">="
|
62
69
|
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
63
71
|
segments:
|
64
72
|
- 0
|
65
73
|
version: "0"
|
@@ -98,7 +106,6 @@ files:
|
|
98
106
|
- test/utils/outliner_test.rb
|
99
107
|
- README.md
|
100
108
|
- MIT-LICENSE
|
101
|
-
has_rdoc: true
|
102
109
|
homepage: http://ochook.org/peregrin
|
103
110
|
licenses: []
|
104
111
|
|
@@ -111,23 +118,27 @@ rdoc_options:
|
|
111
118
|
require_paths:
|
112
119
|
- lib
|
113
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
114
122
|
requirements:
|
115
123
|
- - ">="
|
116
124
|
- !ruby/object:Gem::Version
|
125
|
+
hash: 3
|
117
126
|
segments:
|
118
127
|
- 0
|
119
128
|
version: "0"
|
120
129
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
121
131
|
requirements:
|
122
132
|
- - ">="
|
123
133
|
- !ruby/object:Gem::Version
|
134
|
+
hash: 3
|
124
135
|
segments:
|
125
136
|
- 0
|
126
137
|
version: "0"
|
127
138
|
requirements: []
|
128
139
|
|
129
140
|
rubyforge_project: nowarning
|
130
|
-
rubygems_version: 1.
|
141
|
+
rubygems_version: 1.8.24
|
131
142
|
signing_key:
|
132
143
|
specification_version: 3
|
133
144
|
summary: Peregrin - ebook conversion
|