peregrin 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.md +148 -0
- data/bin/peregrin +6 -0
- data/lib/formats/epub.rb +553 -0
- data/lib/formats/ochook.rb +113 -0
- data/lib/formats/zhook.rb +394 -0
- data/lib/peregrin/book.rb +87 -0
- data/lib/peregrin/chapter.rb +31 -0
- data/lib/peregrin/component.rb +12 -0
- data/lib/peregrin/componentizer.rb +118 -0
- data/lib/peregrin/outliner.rb +204 -0
- data/lib/peregrin/property.rb +16 -0
- data/lib/peregrin/resource.rb +24 -0
- data/lib/peregrin/version.rb +5 -0
- data/lib/peregrin/zip_patch.rb +11 -0
- data/lib/peregrin.rb +139 -0
- data/test/conversion_test.rb +80 -0
- data/test/formats/epub_test.rb +159 -0
- data/test/formats/ochook_test.rb +104 -0
- data/test/formats/zhook_test.rb +219 -0
- data/test/test_helper.rb +16 -0
- data/test/utils/componentizer_test.rb +78 -0
- data/test/utils/outliner_test.rb +49 -0
- metadata +135 -0
data/lib/formats/epub.rb
ADDED
@@ -0,0 +1,553 @@
|
|
1
|
+
class Peregrin::Epub
|
2
|
+
|
3
|
+
FORMAT = "EPUB"
|
4
|
+
|
5
|
+
NAMESPACES = {
|
6
|
+
:ocf => { 'ocf' => 'urn:oasis:names:tc:opendocument:xmlns:container' },
|
7
|
+
:opf => { 'opf' => 'http://www.idpf.org/2007/opf' },
|
8
|
+
:dc => { 'dc' => 'http://purl.org/dc/elements/1.1/' },
|
9
|
+
:ncx => { 'ncx' => 'http://www.daisy.org/z3986/2005/ncx/' },
|
10
|
+
:svg => { 'svg' => 'http://www.w3.org/2000/svg' }
|
11
|
+
}
|
12
|
+
OCF_PATH = "META-INF/container.xml"
|
13
|
+
HTML5_TAGNAMES = %w[section nav article aside hgroup header footer figure figcaption] # FIXME: Which to divify? Which to leave as-is?
|
14
|
+
MIMETYPE_MAP = {
|
15
|
+
'.xhtml' => 'application/xhtml+xml',
|
16
|
+
'.odt' => 'application/x-dtbook+xml',
|
17
|
+
'.odt' => 'application/x-dtbook+xml',
|
18
|
+
'.ncx' => 'application/x-dtbncx+xml',
|
19
|
+
'.epub' => 'application/epub+zip'
|
20
|
+
}
|
21
|
+
OEBPS = "OEBPS"
|
22
|
+
NCX = 'content'
|
23
|
+
OPF = 'content'
|
24
|
+
|
25
|
+
|
26
|
+
def self.validate(path)
|
27
|
+
raise FileNotFound.new(path) unless File.file?(path)
|
28
|
+
begin
|
29
|
+
zf = Zip::Archive.open(path)
|
30
|
+
rescue => e
|
31
|
+
raise NotAZipArchive.new(path)
|
32
|
+
end
|
33
|
+
|
34
|
+
begin
|
35
|
+
book = Peregrin::Book.new
|
36
|
+
epub = new(book)
|
37
|
+
epub.send(:load_config_documents, zf)
|
38
|
+
rescue => e
|
39
|
+
raise e.class.new(path)
|
40
|
+
end
|
41
|
+
ensure
|
42
|
+
zf.close if zf
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.read(path)
|
47
|
+
book = Peregrin::Book.new
|
48
|
+
new(book, path)
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def initialize(book, epub_path = nil)
|
53
|
+
@book = book
|
54
|
+
if epub_path
|
55
|
+
load_from_path(epub_path)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
def write(path)
|
61
|
+
with_working_dir(path) {
|
62
|
+
build_ocf
|
63
|
+
build_ncx
|
64
|
+
write_components
|
65
|
+
build_opf
|
66
|
+
zip_it_up(File.basename(path))
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
def to_book(options = {})
|
72
|
+
bk = @book.deep_clone
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
protected
|
77
|
+
|
78
|
+
#---------------------------------------------------------------------------
|
79
|
+
# READING
|
80
|
+
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
81
|
+
|
82
|
+
def load_from_path(epub_path)
|
83
|
+
docs = nil
|
84
|
+
Zip::Archive.open(epub_path) { |zipfile|
|
85
|
+
docs = load_config_documents(zipfile)
|
86
|
+
extract_properties(docs[:opf])
|
87
|
+
extract_components(zipfile, docs[:opf], docs[:opf_root])
|
88
|
+
extract_chapters(zipfile, docs[:ncx])
|
89
|
+
extract_cover(zipfile, docs)
|
90
|
+
}
|
91
|
+
@book.read_resource_proc = lambda { |resource|
|
92
|
+
media_path = from_opf_root(docs[:opf_root], resource.src)
|
93
|
+
media_path = URI.unescape(media_path)
|
94
|
+
Zip::Archive.open(epub_path) { |zipfile| zipfile.read(media_path) }
|
95
|
+
}
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
def load_config_documents(zipfile)
|
100
|
+
# The OCF file.
|
101
|
+
begin
|
102
|
+
docs = { :ocf => Nokogiri::XML::Document.parse(zipfile.read(OCF_PATH)) }
|
103
|
+
rescue
|
104
|
+
raise FailureLoadingOCF
|
105
|
+
end
|
106
|
+
|
107
|
+
# The OPF file.
|
108
|
+
begin
|
109
|
+
docs[:opf_path] = docs[:ocf].at_xpath(
|
110
|
+
'//ocf:rootfile[@media-type="application/oebps-package+xml"]',
|
111
|
+
NAMESPACES[:ocf]
|
112
|
+
)['full-path']
|
113
|
+
docs[:opf_root] = File.dirname(docs[:opf_path])
|
114
|
+
docs[:opf] = Nokogiri::XML::Document.parse(zipfile.read(docs[:opf_path]))
|
115
|
+
rescue
|
116
|
+
raise FailureLoadingOPF
|
117
|
+
end
|
118
|
+
|
119
|
+
# The NCX file.
|
120
|
+
begin
|
121
|
+
spine = docs[:opf].at_xpath('//opf:spine', NAMESPACES[:opf])
|
122
|
+
ncx_id = spine['toc'] ? spine['toc'] : 'ncx'
|
123
|
+
item = docs[:opf].at_xpath(
|
124
|
+
"//opf:manifest/opf:item[@id=#{escape_for_xpath(ncx_id)}]",
|
125
|
+
NAMESPACES[:opf]
|
126
|
+
)
|
127
|
+
|
128
|
+
docs[:ncx_path] = from_opf_root(docs[:opf_root], item['href'])
|
129
|
+
ncx_content = zipfile.read(docs[:ncx_path])
|
130
|
+
docs[:ncx] = Nokogiri::XML::Document.parse(ncx_content)
|
131
|
+
rescue => e
|
132
|
+
raise FailureLoadingNCX
|
133
|
+
end
|
134
|
+
|
135
|
+
docs
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def extract_properties(opf_doc)
|
140
|
+
meta_elems = opf_doc.at_xpath(
|
141
|
+
'//opf:metadata',
|
142
|
+
NAMESPACES[:opf]
|
143
|
+
).children.select { |ch|
|
144
|
+
ch.element?
|
145
|
+
}
|
146
|
+
meta_elems.each { |elem|
|
147
|
+
if elem.name == "meta"
|
148
|
+
name = elem['name']
|
149
|
+
content = elem['content']
|
150
|
+
else
|
151
|
+
name = elem.name
|
152
|
+
content = elem.content
|
153
|
+
end
|
154
|
+
atts = elem.attributes.inject({}) { |acc, pair|
|
155
|
+
key, attr = pair
|
156
|
+
acc[key] = attr.value unless ["name", "content"].include?(key)
|
157
|
+
acc
|
158
|
+
}
|
159
|
+
@book.add_property(name, content, atts)
|
160
|
+
}
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
def extract_components(zipfile, opf_doc, opf_root)
|
165
|
+
ids = {}
|
166
|
+
manifest = opf_doc.at_xpath('//opf:manifest', NAMESPACES[:opf])
|
167
|
+
spine = opf_doc.at_xpath('//opf:spine', NAMESPACES[:opf])
|
168
|
+
|
169
|
+
spine.search('//opf:itemref', NAMESPACES[:opf]).each { |iref|
|
170
|
+
id = iref['idref']
|
171
|
+
if item = manifest.at_xpath(
|
172
|
+
"//opf:item[@id=#{escape_for_xpath(id)}]",
|
173
|
+
NAMESPACES[:opf]
|
174
|
+
)
|
175
|
+
href = item['href']
|
176
|
+
linear = iref['linear'] != 'no'
|
177
|
+
begin
|
178
|
+
content = zipfile.read(from_opf_root(opf_root, href))
|
179
|
+
rescue
|
180
|
+
href = URI.unescape(href)
|
181
|
+
content = zipfile.read(from_opf_root(opf_root, href))
|
182
|
+
end
|
183
|
+
@book.add_component(
|
184
|
+
href,
|
185
|
+
content,
|
186
|
+
item['media-type'],
|
187
|
+
:id => id,
|
188
|
+
:linear => linear ? "yes" : "no"
|
189
|
+
)
|
190
|
+
end
|
191
|
+
}
|
192
|
+
|
193
|
+
manifest.search('//opf:item', NAMESPACES[:opf]).each { |item|
|
194
|
+
id = item['id']
|
195
|
+
next if item['media-type'] == MIMETYPE_MAP['.ncx']
|
196
|
+
next if @book.components.detect { |cmpt| cmpt.attributes[:id] == id }
|
197
|
+
@book.add_resource(item['href'], item['media-type'], :id => id)
|
198
|
+
}
|
199
|
+
|
200
|
+
opf_doc.search("//opf:guide/opf:reference", NAMESPACES[:opf]).each { |ref|
|
201
|
+
if it = @book.all_files.detect { |cmpt| cmpt.src == ref['href'] }
|
202
|
+
it.attributes[:guide_type] = ref['type']
|
203
|
+
it.attributes[:guide] = ref['title']
|
204
|
+
end
|
205
|
+
}
|
206
|
+
end
|
207
|
+
|
208
|
+
|
209
|
+
def extract_chapters(zipfile, ncx_doc)
|
210
|
+
curse = lambda { |point|
|
211
|
+
chp = Peregrin::Chapter.new(
|
212
|
+
point.at_xpath('.//ncx:text', NAMESPACES[:ncx]).content,
|
213
|
+
point['playOrder'],
|
214
|
+
point.at_xpath('.//ncx:content', NAMESPACES[:ncx])['src']
|
215
|
+
)
|
216
|
+
point.children.each { |pt|
|
217
|
+
next unless pt.element? && pt.name == "navPoint"
|
218
|
+
chp.children.push(curse.call(pt))
|
219
|
+
}
|
220
|
+
chp
|
221
|
+
}
|
222
|
+
ncx_doc.at_xpath("//ncx:navMap", NAMESPACES[:ncx]).children.each { |pt|
|
223
|
+
next unless pt.element? && pt.name == "navPoint"
|
224
|
+
@book.chapters.push(curse.call(pt))
|
225
|
+
}
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
def extract_cover(zipfile, docs)
|
230
|
+
@book.cover = nil
|
231
|
+
|
232
|
+
# 1. Cover image referenced from metadata
|
233
|
+
if id = @book.property_for('cover')
|
234
|
+
res = @book.all_files.detect { |r| r.attributes[:id] == id }
|
235
|
+
end
|
236
|
+
|
237
|
+
# 2. First image in a component listed in the guide as 'cover'
|
238
|
+
res ||= @book.all_files.detect { |r| r.attributes[:guide_type] == 'cover' }
|
239
|
+
|
240
|
+
# 3. A component with the id of 'cover-image', or 'cover', or 'coverpage'.
|
241
|
+
['cover-image', 'cover', 'coverpage'].each { |cvr_id|
|
242
|
+
res ||= @book.all_files.detect { |r| r.attributes[:id] == cvr_id }
|
243
|
+
}
|
244
|
+
|
245
|
+
# 4. First image in first component.
|
246
|
+
res ||= @book.all_files.first
|
247
|
+
|
248
|
+
return unless res
|
249
|
+
|
250
|
+
if res.media_type.match(/^image\//)
|
251
|
+
@book.cover = res
|
252
|
+
else
|
253
|
+
path = from_opf_root(docs[:opf_root], res.src)
|
254
|
+
begin
|
255
|
+
doc = Nokogiri::XML::Document.parse(zipfile.read(path))
|
256
|
+
src = nil
|
257
|
+
if img = doc.at_css('img')
|
258
|
+
src = img['src']
|
259
|
+
elsif img = doc.at_xpath('//svg:image', NAMESPACES[:svg])
|
260
|
+
src = img['href']
|
261
|
+
end
|
262
|
+
if src
|
263
|
+
@book.cover = @book.resources.detect { |r| r.src == src }
|
264
|
+
end
|
265
|
+
rescue
|
266
|
+
#puts "Cover component is not an image or an XML document."
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
@book.cover
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
#---------------------------------------------------------------------------
|
275
|
+
# WRITING
|
276
|
+
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
277
|
+
|
278
|
+
def with_working_dir(path)
|
279
|
+
raise ArgumentError unless block_given?
|
280
|
+
@working_dir = File.join(
|
281
|
+
File.dirname(path),
|
282
|
+
File.basename(path, File.extname(path))
|
283
|
+
)
|
284
|
+
FileUtils.rm_rf(@working_dir)
|
285
|
+
FileUtils.mkdir_p(@working_dir)
|
286
|
+
yield
|
287
|
+
ensure
|
288
|
+
#FileUtils.rm_rf(@working_dir)
|
289
|
+
@working_dir = nil
|
290
|
+
end
|
291
|
+
|
292
|
+
|
293
|
+
def working_dir(*args)
|
294
|
+
File.join(*([@working_dir, args].flatten.compact))
|
295
|
+
end
|
296
|
+
|
297
|
+
|
298
|
+
def build_ocf
|
299
|
+
build_xml_file(working_dir(OCF_PATH)) { |xml|
|
300
|
+
xml.container(:xmlns => NAMESPACES[:ocf]["ocf"], :version => "1.0") {
|
301
|
+
xml.rootfiles {
|
302
|
+
xml.rootfile(
|
303
|
+
"full-path" => "OEBPS/#{OPF}.opf",
|
304
|
+
"media-type" => "application/oebps-package+xml"
|
305
|
+
)
|
306
|
+
}
|
307
|
+
}
|
308
|
+
}
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
def build_ncx
|
313
|
+
ncx_path = build_xml_file(working_dir(OEBPS, "#{NCX}.ncx")) { |xml|
|
314
|
+
xml.ncx('xmlns' => NAMESPACES[:ncx]["ncx"], :version => "2005-1") {
|
315
|
+
xml.head {
|
316
|
+
xml.meta(:name => "dtb:uid", :content => unique_identifier)
|
317
|
+
xml.meta(:name => "dtb:depth", :content => heading_depth)
|
318
|
+
xml.meta(:name => "dtb:totalPageCount", :content => "0")
|
319
|
+
xml.meta(:name => "dtb:maxPageNumber", :content => "0")
|
320
|
+
}
|
321
|
+
xml.docTitle {
|
322
|
+
xml.text_(@book.property_for('title'))
|
323
|
+
}
|
324
|
+
xml.navMap {
|
325
|
+
i = 0
|
326
|
+
curse = lambda { |children|
|
327
|
+
children.each { |chapter|
|
328
|
+
xml.navPoint(
|
329
|
+
:id => "navPoint#{i+=1}",
|
330
|
+
:playOrder => chapter.position
|
331
|
+
) {
|
332
|
+
xml.navLabel { xml.text_(chapter.title) }
|
333
|
+
xml.content(:src => chapter.src)
|
334
|
+
curse.call(chapter.children) if chapter.children.any?
|
335
|
+
} unless chapter.empty_leaf?
|
336
|
+
}
|
337
|
+
}
|
338
|
+
curse.call(@book.chapters)
|
339
|
+
}
|
340
|
+
}
|
341
|
+
}
|
342
|
+
@ncx_path = ncx_path
|
343
|
+
end
|
344
|
+
|
345
|
+
|
346
|
+
def write_components
|
347
|
+
# Linear components.
|
348
|
+
@book.components.each { |cmpt|
|
349
|
+
cmpt.attributes[:id] ||= File.basename(cmpt.src, File.extname(cmpt.src))
|
350
|
+
|
351
|
+
doc = Nokogiri::HTML::Document.parse(cmpt.contents)
|
352
|
+
html = root_to_xhtml(doc.root)
|
353
|
+
File.open(working_dir(OEBPS, cmpt.src), 'w') { |f| f.write(html) }
|
354
|
+
}
|
355
|
+
|
356
|
+
# Other components (@book.resources)
|
357
|
+
@book.resources.each { |res|
|
358
|
+
res.attributes[:id] ||= (
|
359
|
+
"#{File.dirname(res.src)}-#{File.basename(res.src)}"
|
360
|
+
).gsub(/[^\w]+/, '-').gsub(/^-+/, '').gsub(/^(\d)/, 'a-\1')
|
361
|
+
|
362
|
+
dest_path = working_dir(OEBPS, res.src)
|
363
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
364
|
+
@book.copy_resource_to(res, dest_path)
|
365
|
+
}
|
366
|
+
end
|
367
|
+
|
368
|
+
|
369
|
+
def build_opf
|
370
|
+
build_xml_file(working_dir(OEBPS, "#{OPF}.opf")) { |xml|
|
371
|
+
xml.package(
|
372
|
+
'xmlns' => "http://www.idpf.org/2007/opf",
|
373
|
+
'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
|
374
|
+
'version' => "2.0",
|
375
|
+
'unique-identifier' => 'bookid'
|
376
|
+
) {
|
377
|
+
xml.metadata {
|
378
|
+
xml['dc'].title(@book.property_for('title') || 'Untitled')
|
379
|
+
xml['dc'].identifier(unique_identifier, :id => 'bookid')
|
380
|
+
xml['dc'].language(@book.property_for('language') || 'en')
|
381
|
+
[
|
382
|
+
'creator',
|
383
|
+
'subject',
|
384
|
+
'description',
|
385
|
+
'publisher',
|
386
|
+
'contributor',
|
387
|
+
'date',
|
388
|
+
'source',
|
389
|
+
'relation',
|
390
|
+
'coverage',
|
391
|
+
'rights'
|
392
|
+
].each { |dc|
|
393
|
+
if val = @book.property_for(dc)
|
394
|
+
val.split(/\n/).each { |v|
|
395
|
+
xml['dc'].send(dc, v) if v
|
396
|
+
}
|
397
|
+
end
|
398
|
+
}
|
399
|
+
if @book.cover
|
400
|
+
cover_id = @book.cover.attributes[:id] || "cover"
|
401
|
+
xml.meta(:name => "cover", :content => cover_id)
|
402
|
+
end
|
403
|
+
}
|
404
|
+
xml.manifest {
|
405
|
+
@book.components.each { |item|
|
406
|
+
xml.item(
|
407
|
+
'id' => item.attributes[:id],
|
408
|
+
'href' => item.src,
|
409
|
+
'media-type' => MIMETYPE_MAP['.xhtml']
|
410
|
+
)
|
411
|
+
}
|
412
|
+
@book.resources.each { |item|
|
413
|
+
xml.item(
|
414
|
+
'id' => item.attributes[:id],
|
415
|
+
'href' => item.src,
|
416
|
+
'media-type' => item.media_type
|
417
|
+
)
|
418
|
+
}
|
419
|
+
xml.item(
|
420
|
+
'id' => NCX,
|
421
|
+
'href' => @ncx_path,
|
422
|
+
'media-type' => MIMETYPE_MAP['.ncx']
|
423
|
+
)
|
424
|
+
}
|
425
|
+
xml.spine(:toc => NCX) {
|
426
|
+
@book.components.each { |item|
|
427
|
+
xml.itemref(
|
428
|
+
:idref => item.attributes[:id],
|
429
|
+
:linear => item.attributes[:linear] || 'yes'
|
430
|
+
)
|
431
|
+
}
|
432
|
+
}
|
433
|
+
xml.guide {
|
434
|
+
guide_items = @book.components.select { |it| it.attributes[:guide] }
|
435
|
+
guide_items.each { |guide_item|
|
436
|
+
xml.reference(
|
437
|
+
:type => (
|
438
|
+
guide_item.attributes[:guide_type] ||
|
439
|
+
guide_item.attributes[:id]
|
440
|
+
),
|
441
|
+
:title => guide_item.attributes[:guide],
|
442
|
+
:href => guide_item.src
|
443
|
+
)
|
444
|
+
}
|
445
|
+
}
|
446
|
+
}
|
447
|
+
}
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
def zip_it_up(filename)
|
452
|
+
path = working_dir("..", filename)
|
453
|
+
File.open(working_dir("mimetype"), 'w') { |f|
|
454
|
+
f.write(MIMETYPE_MAP['.epub'])
|
455
|
+
}
|
456
|
+
File.unlink(path) if File.exists?(path)
|
457
|
+
cmd = [
|
458
|
+
"cd #{working_dir}",
|
459
|
+
"zip -0Xq ../#{filename} mimetype",
|
460
|
+
"zip -Xr9Dq ../#{filename} *"
|
461
|
+
]
|
462
|
+
`#{cmd.join(" && ")}`
|
463
|
+
path
|
464
|
+
end
|
465
|
+
|
466
|
+
|
467
|
+
def unique_identifier
|
468
|
+
@uid ||= @book.property_for('bookid') || random_string(12)
|
469
|
+
end
|
470
|
+
|
471
|
+
|
472
|
+
def random_string(len)
|
473
|
+
require 'digest/sha1'
|
474
|
+
s = Digest::SHA1.new
|
475
|
+
s << Time.now.to_s
|
476
|
+
s << String(Time.now.usec)
|
477
|
+
s << String(rand(0))
|
478
|
+
s << String($$)
|
479
|
+
str = s.hexdigest
|
480
|
+
str.slice(rand(str.size - len), len)
|
481
|
+
end
|
482
|
+
|
483
|
+
|
484
|
+
def heading_depth
|
485
|
+
max = 0
|
486
|
+
curr = 0
|
487
|
+
curse = lambda { |children|
|
488
|
+
children.each { |chp|
|
489
|
+
curr += 1
|
490
|
+
max = [curr, max].max
|
491
|
+
curse.call(chp.children) if chp.children.any?
|
492
|
+
curr -= 1
|
493
|
+
}
|
494
|
+
}
|
495
|
+
curse.call(@book.chapters)
|
496
|
+
max
|
497
|
+
end
|
498
|
+
|
499
|
+
|
500
|
+
def build_xml_file(path)
|
501
|
+
raise ArgumentError unless block_given?
|
502
|
+
builder = Nokogiri::XML::Builder.new(:encoding => 'UTF-8') { |xml|
|
503
|
+
yield(xml)
|
504
|
+
}
|
505
|
+
FileUtils.mkdir_p(File.dirname(path))
|
506
|
+
File.open(path, 'w') { |f|
|
507
|
+
builder.doc.write_xml_to(f, :encoding => 'UTF-8', :indent => 2)
|
508
|
+
}
|
509
|
+
path.gsub(/^#{working_dir(OEBPS)}\//, '')
|
510
|
+
end
|
511
|
+
|
512
|
+
|
513
|
+
def root_to_xhtml(root)
|
514
|
+
root.remove_attribute('manifest')
|
515
|
+
root.css(HTML5_TAGNAMES.join(', ')).each { |elem|
|
516
|
+
k = elem['class']
|
517
|
+
elem['class'] = "#{k.nil? || k.empty? ? '' : "#{k} " }#{elem.name}"
|
518
|
+
elem.name = "div"
|
519
|
+
}
|
520
|
+
root.remove_attribute('xmlns')
|
521
|
+
root.to_xhtml(:indent => 2, :encoding => root.document.encoding)
|
522
|
+
end
|
523
|
+
|
524
|
+
|
525
|
+
def from_opf_root(opf_root, *args)
|
526
|
+
if opf_root && !opf_root.empty? && opf_root != '.'
|
527
|
+
File.join(opf_root, *args)
|
528
|
+
else
|
529
|
+
File.join(*args)
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
|
534
|
+
def escape_for_xpath(str)
|
535
|
+
str.index("'") ? '"'+str+'"' : "'#{str}'"
|
536
|
+
end
|
537
|
+
|
538
|
+
|
539
|
+
class ValidationError < ::RuntimeError
|
540
|
+
|
541
|
+
def initialize(path = nil)
|
542
|
+
@path = path
|
543
|
+
end
|
544
|
+
|
545
|
+
end
|
546
|
+
|
547
|
+
class FileNotFound < ValidationError; end
|
548
|
+
class NotAZipArchive < ValidationError; end
|
549
|
+
class FailureLoadingOCF < ValidationError; end
|
550
|
+
class FailureLoadingOPF < ValidationError; end
|
551
|
+
class FailureLoadingNCX < ValidationError; end
|
552
|
+
|
553
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
class Peregrin::Ochook < Peregrin::Zhook
|
2
|
+
|
3
|
+
FORMAT = "Ochook"
|
4
|
+
MANIFEST_PATH = "ochook.manifest"
|
5
|
+
|
6
|
+
def self.validate(path)
|
7
|
+
path = path.gsub(/\/$/, '')
|
8
|
+
unless File.directory?(path)
|
9
|
+
raise DirectoryNotFound.new(path)
|
10
|
+
end
|
11
|
+
unless File.exists?(File.join(path, INDEX_PATH))
|
12
|
+
raise MissingIndexHTML.new(path)
|
13
|
+
end
|
14
|
+
unless File.exists?(File.join(path, COVER_PATH))
|
15
|
+
raise MissingCoverPNG.new(path)
|
16
|
+
end
|
17
|
+
unless File.exists?(File.join(path, MANIFEST_PATH))
|
18
|
+
raise MissingManifest.new(path)
|
19
|
+
end
|
20
|
+
|
21
|
+
doc = Nokogiri::HTML::Document.parse(IO.read(File.join(path, INDEX_PATH)))
|
22
|
+
raise IndexHTMLRootHasId.new(path) if doc.root['id']
|
23
|
+
unless doc.root['manifest'] = MANIFEST_PATH
|
24
|
+
raise IndexHTMLRootHasNoManifest.new(path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def self.read(path)
|
30
|
+
path = path.gsub(/\/$/, '')
|
31
|
+
validate(path)
|
32
|
+
book = Peregrin::Book.new
|
33
|
+
book.add_component(INDEX_PATH, IO.read(File.join(path, INDEX_PATH)))
|
34
|
+
Dir.glob(File.join(path, '**', '*')).each { |fpath|
|
35
|
+
ex = [INDEX_PATH, MANIFEST_PATH]
|
36
|
+
mpath = fpath.gsub(/^#{path}\//,'')
|
37
|
+
unless File.directory?(fpath) || ex.include?(mpath)
|
38
|
+
book.add_resource(mpath)
|
39
|
+
end
|
40
|
+
}
|
41
|
+
book.read_resource_proc = lambda { |resource|
|
42
|
+
IO.read(File.join(path, resource.src))
|
43
|
+
}
|
44
|
+
extract_properties_from_index(book)
|
45
|
+
new(book)
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
def initialize(book)
|
50
|
+
super
|
51
|
+
insert_manifest_attribute
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def write(dir)
|
56
|
+
FileUtils.rm_rf(dir) if File.directory?(dir)
|
57
|
+
FileUtils.mkdir_p(dir)
|
58
|
+
|
59
|
+
# Index
|
60
|
+
index_path = File.join(dir, INDEX_PATH)
|
61
|
+
File.open(index_path, 'w') { |f| f << htmlize(index) }
|
62
|
+
|
63
|
+
# Resources
|
64
|
+
@book.resources.each { |resource|
|
65
|
+
full_path = File.join(dir, resource.src)
|
66
|
+
FileUtils.mkdir_p(File.dirname(full_path))
|
67
|
+
File.open(full_path, 'w') { |f| f << @book.read_resource(resource) }
|
68
|
+
}
|
69
|
+
|
70
|
+
# Cover
|
71
|
+
unless @book.cover == COVER_PATH
|
72
|
+
cover_path = File.join(dir, COVER_PATH)
|
73
|
+
File.open(cover_path, 'wb') { |f| f << to_png_data(@book.cover) }
|
74
|
+
unless @book.resources.detect { |r| r.src == COVER_PATH }
|
75
|
+
@book.add_resource(COVER_PATH)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Manifest
|
80
|
+
manifest_path = File.join(dir, MANIFEST_PATH)
|
81
|
+
File.open(manifest_path, 'w') { |f| f << manifest.join("\n") }
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
def to_book(options = {})
|
86
|
+
remove_manifest_attribute
|
87
|
+
super(options)
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
protected
|
92
|
+
|
93
|
+
def manifest
|
94
|
+
manifest = ["CACHE MANIFEST", "", "NETWORK:", "*", "", "CACHE:", INDEX_PATH]
|
95
|
+
@book.resources.inject(manifest) { |mf, resource| mf << resource.src; mf }
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
def insert_manifest_attribute
|
100
|
+
index.at_xpath('/html').set_attribute('manifest', MANIFEST_PATH)
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def remove_manifest_attribute
|
105
|
+
index.at_xpath('/html').remove_attribute('manifest')
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
class DirectoryNotFound < ValidationError; end
|
110
|
+
class MissingManifest < ValidationError; end
|
111
|
+
class IndexHTMLRootHasNoManifest < ValidationError; end
|
112
|
+
|
113
|
+
end
|