peregrin 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +148 -0
- data/bin/peregrin +6 -0
- data/lib/formats/epub.rb +553 -0
- data/lib/formats/ochook.rb +113 -0
- data/lib/formats/zhook.rb +394 -0
- data/lib/peregrin/book.rb +87 -0
- data/lib/peregrin/chapter.rb +31 -0
- data/lib/peregrin/component.rb +12 -0
- data/lib/peregrin/componentizer.rb +118 -0
- data/lib/peregrin/outliner.rb +204 -0
- data/lib/peregrin/property.rb +16 -0
- data/lib/peregrin/resource.rb +24 -0
- data/lib/peregrin/version.rb +5 -0
- data/lib/peregrin/zip_patch.rb +11 -0
- data/lib/peregrin.rb +139 -0
- data/test/conversion_test.rb +80 -0
- data/test/formats/epub_test.rb +159 -0
- data/test/formats/ochook_test.rb +104 -0
- data/test/formats/zhook_test.rb +219 -0
- data/test/test_helper.rb +16 -0
- data/test/utils/componentizer_test.rb +78 -0
- data/test/utils/outliner_test.rb +49 -0
- metadata +135 -0
@@ -0,0 +1,394 @@
|
|
1
|
+
class Peregrin::Zhook
|
2
|
+
|
3
|
+
FORMAT = "Zhook"
|
4
|
+
|
5
|
+
FILE_EXT = ".zhook"
|
6
|
+
INDEX_PATH = "index.html"
|
7
|
+
COVER_PATH = "cover.png"
|
8
|
+
BODY_XPATH = '/html/body'
|
9
|
+
HEAD_XPATH = '/html/head'
|
10
|
+
|
11
|
+
# Raises an exception if file at path is not a valid Zhook. Otherwise
|
12
|
+
# returns true.
|
13
|
+
#
|
14
|
+
def self.validate(path)
|
15
|
+
raise FileNotFound.new(path) unless File.file?(path)
|
16
|
+
raise WrongExtension.new(path) unless File.extname(path) == FILE_EXT
|
17
|
+
begin
|
18
|
+
zf = Zip::Archive.open(path)
|
19
|
+
rescue
|
20
|
+
raise NotAZipArchive.new(path)
|
21
|
+
end
|
22
|
+
|
23
|
+
unless zf.find(INDEX_PATH)
|
24
|
+
raise MissingIndexHTML.new(path)
|
25
|
+
end
|
26
|
+
|
27
|
+
unless zf.find(COVER_PATH)
|
28
|
+
raise MissingCoverPNG.new(path)
|
29
|
+
end
|
30
|
+
|
31
|
+
doc = Nokogiri::HTML::Document.parse(zf.read(INDEX_PATH), nil, 'UTF-8')
|
32
|
+
raise IndexHTMLRootHasId.new(path) if doc.root['id']
|
33
|
+
|
34
|
+
ensure
|
35
|
+
zf.close if zf
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
# Unzips the file at path, generates a simple book object, passes to new.
|
40
|
+
#
|
41
|
+
def self.read(path)
|
42
|
+
validate(path)
|
43
|
+
book = Peregrin::Book.new
|
44
|
+
Zip::Archive.open(path) { |zf|
|
45
|
+
book.add_component(INDEX_PATH, zf.read(INDEX_PATH))
|
46
|
+
zf.each { |entry|
|
47
|
+
ze = entry.name
|
48
|
+
book.add_resource(ze) unless ze == INDEX_PATH || entry.directory?
|
49
|
+
}
|
50
|
+
}
|
51
|
+
book.read_resource_proc = lambda { |resource|
|
52
|
+
Zip::Archive.open(path) { |zipfile|
|
53
|
+
zipfile.read(resource.src)
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
extract_properties_from_index(book)
|
58
|
+
|
59
|
+
new(book)
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# Stitches together components of the internal book.
|
64
|
+
#
|
65
|
+
def initialize(book)
|
66
|
+
@book = book
|
67
|
+
|
68
|
+
if @book.components.length > 1
|
69
|
+
stitch_components(@book)
|
70
|
+
end
|
71
|
+
|
72
|
+
consolidate_properties(@book)
|
73
|
+
|
74
|
+
@book.chapters = outline_book(index)
|
75
|
+
|
76
|
+
@book.cover ||= (
|
77
|
+
@book.resources.detect { |r| r.src == COVER_PATH } ||
|
78
|
+
@book.add_resource(COVER_PATH)
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# Writes the internal book object to a .zhook file at the given path.
|
84
|
+
#
|
85
|
+
def write(path)
|
86
|
+
File.unlink(path) if File.exists?(path)
|
87
|
+
Zip::Archive.open(path, Zip::CREATE) { |zipfile|
|
88
|
+
zipfile.add_buffer(INDEX_PATH, htmlize(index))
|
89
|
+
@book.resources.each { |resource|
|
90
|
+
zipfile.add_buffer(resource.src, @book.read_resource(resource))
|
91
|
+
}
|
92
|
+
unless @book.cover.src == COVER_PATH
|
93
|
+
zipfile.add_buffer(COVER_PATH, to_png_data(@book.cover))
|
94
|
+
end
|
95
|
+
}
|
96
|
+
path
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Returns the internal book object.
|
101
|
+
#
|
102
|
+
def to_book(options = {})
|
103
|
+
bk = @book.deep_clone
|
104
|
+
|
105
|
+
# XPath => URI mapping tools
|
106
|
+
cmpt_xpaths = []
|
107
|
+
|
108
|
+
boilerplate_rel_links =
|
109
|
+
'<link rel="start" href="cover.html" />' +
|
110
|
+
'<link rel="contents" href="toc.html" />'
|
111
|
+
|
112
|
+
# Componentizing.
|
113
|
+
if options[:componentize]
|
114
|
+
componentizer = Peregrin::Componentizer.new(index)
|
115
|
+
componentizer.process(index.root.at_css('body'))
|
116
|
+
bk.components = componentizer.component_xpaths.collect { |xpath|
|
117
|
+
cmpt_xpaths.push(xpath)
|
118
|
+
doc = componentizer.generate_component(xpath)
|
119
|
+
Peregrin::Component.new(uri_for_xpath(xpath, cmpt_xpaths), doc)
|
120
|
+
}
|
121
|
+
|
122
|
+
# Add rel links and convert to html string
|
123
|
+
first_path = bk.components.first.src
|
124
|
+
last_path = bk.components.last.src
|
125
|
+
boilerplate_rel_links <<
|
126
|
+
'<link rel="first" href="'+bk.components.first.src+'" />' +
|
127
|
+
'<link rel="last" href="'+bk.components.last.src+'" />'
|
128
|
+
bk.components.each_with_index { |cmpt, i|
|
129
|
+
head = cmpt.contents.at_xpath(HEAD_XPATH)
|
130
|
+
prev_path = bk.components[i-1].src if (i-1) >= 0
|
131
|
+
next_path = bk.components[i+1].src if (i+1) < bk.components.size
|
132
|
+
head.add_child(boilerplate_rel_links)
|
133
|
+
head.add_child('<link rel="prev" href="'+prev_path+'" />') if prev_path
|
134
|
+
head.add_child('<link rel="next" href="'+next_path+'" />') if next_path
|
135
|
+
cmpt.contents = htmlize(cmpt.contents)
|
136
|
+
}
|
137
|
+
else
|
138
|
+
cmpt_xpaths.push(BODY_XPATH)
|
139
|
+
bk.components.clear
|
140
|
+
bk.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
|
141
|
+
end
|
142
|
+
|
143
|
+
# Outlining.
|
144
|
+
bk.chapters = outline_book(index, cmpt_xpaths)
|
145
|
+
|
146
|
+
if options[:componentize]
|
147
|
+
# Table of Contents
|
148
|
+
doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
|
149
|
+
curse = lambda { |children|
|
150
|
+
parts = children.collect { |chp|
|
151
|
+
chp.empty_leaf? ? nil : [chp.title, chp.src, chp.children]
|
152
|
+
}.compact
|
153
|
+
|
154
|
+
html.ol {
|
155
|
+
parts.each { |part|
|
156
|
+
html.li {
|
157
|
+
html.a(part[0], :href => part[1])
|
158
|
+
curse.call(part[2]) if part[2].any?
|
159
|
+
}
|
160
|
+
}
|
161
|
+
} if parts.any?
|
162
|
+
}
|
163
|
+
curse.call(bk.chapters)
|
164
|
+
}.doc
|
165
|
+
if doc.root
|
166
|
+
toc_doc = componentizer.generate_document(doc.root)
|
167
|
+
toc_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
|
168
|
+
bk.add_component(
|
169
|
+
"toc.html",
|
170
|
+
htmlize(toc_doc),
|
171
|
+
nil,
|
172
|
+
:linear => "no",
|
173
|
+
:guide => "Table of Contents",
|
174
|
+
:guide_type => "toc"
|
175
|
+
)
|
176
|
+
end
|
177
|
+
|
178
|
+
# List of Illustrations
|
179
|
+
figures = index.css('figure[id], div.figure[id]')
|
180
|
+
if figures.any?
|
181
|
+
doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
|
182
|
+
html.ol {
|
183
|
+
figures.each { |fig|
|
184
|
+
next unless caption = fig.at_css('figcaption, .figcaption')
|
185
|
+
n = fig
|
186
|
+
while n && n.respond_to?(:parent)
|
187
|
+
break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
|
188
|
+
n = n.parent
|
189
|
+
end
|
190
|
+
next unless cmpt_uri
|
191
|
+
html.li {
|
192
|
+
html.a(caption.content, :href => "#{cmpt_uri}##{fig['id']}")
|
193
|
+
}
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}.doc
|
197
|
+
loi_doc = componentizer.generate_document(doc.root)
|
198
|
+
loi_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
|
199
|
+
bk.add_component(
|
200
|
+
"loi.html",
|
201
|
+
htmlize(loi_doc),
|
202
|
+
nil,
|
203
|
+
:linear => "no",
|
204
|
+
:guide => "List of Illustrations",
|
205
|
+
:guide_type => "loi"
|
206
|
+
)
|
207
|
+
end
|
208
|
+
|
209
|
+
# Cover
|
210
|
+
doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
|
211
|
+
html.div(:id => "cover") {
|
212
|
+
html.img(:src => bk.cover.src, :alt => bk.property_for("title"))
|
213
|
+
}
|
214
|
+
}.doc
|
215
|
+
cover_doc = componentizer.generate_document(doc.root)
|
216
|
+
cover_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
|
217
|
+
bk.components.unshift(
|
218
|
+
Peregrin::Component.new(
|
219
|
+
"cover.html",
|
220
|
+
htmlize(cover_doc),
|
221
|
+
nil,
|
222
|
+
:linear => "no",
|
223
|
+
:guide => "Cover",
|
224
|
+
:guide_type => "cover"
|
225
|
+
)
|
226
|
+
)
|
227
|
+
end
|
228
|
+
|
229
|
+
bk
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
protected
|
234
|
+
|
235
|
+
def index
|
236
|
+
@index_document ||= Nokogiri::HTML::Document.parse(
|
237
|
+
@book.components.first.contents
|
238
|
+
)
|
239
|
+
end
|
240
|
+
|
241
|
+
|
242
|
+
# Takes a book with multiple components and joins them together,
|
243
|
+
# by creating article elements from every body element and appending them
|
244
|
+
# to the body of the first component.
|
245
|
+
#
|
246
|
+
def stitch_components(book)
|
247
|
+
node = Nokogiri::XML::Node.new('article', index)
|
248
|
+
bdy = index.at_xpath(BODY_XPATH)
|
249
|
+
head = index.at_xpath(HEAD_XPATH)
|
250
|
+
bdy.children.each { |ch|
|
251
|
+
node.add_child(ch)
|
252
|
+
}
|
253
|
+
bdy.add_child(node)
|
254
|
+
|
255
|
+
book.components.shift
|
256
|
+
while cmpt = book.components.shift
|
257
|
+
str = cmpt.contents
|
258
|
+
doc = Nokogiri::HTML::Document.parse(str)
|
259
|
+
art = doc.at_xpath(BODY_XPATH)
|
260
|
+
art.name = 'article'
|
261
|
+
bdy.add_child(art)
|
262
|
+
|
263
|
+
# Import all other unique elements from the head, like link & meta tags.
|
264
|
+
if dhead = doc.at_xpath(HEAD_XPATH)
|
265
|
+
dhead.children.each { |foreign_child|
|
266
|
+
next if foreign_child.name.downcase == "title"
|
267
|
+
next if head.children.any? { |index_child|
|
268
|
+
index_child.to_s == foreign_child.to_s
|
269
|
+
}
|
270
|
+
head.add_child(foreign_child.dup)
|
271
|
+
}
|
272
|
+
end
|
273
|
+
end
|
274
|
+
book.components.clear
|
275
|
+
book.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
# Takes the properties out of the book and ensures that there are matching
|
280
|
+
# meta tags in the index document.
|
281
|
+
#
|
282
|
+
def consolidate_properties(book)
|
283
|
+
head = index.at_xpath('/html/head')
|
284
|
+
head.css('meta[name]').each { |meta| meta.remove }
|
285
|
+
book.properties.each { |property|
|
286
|
+
# FIXME: handle properties with attributes?
|
287
|
+
meta = Nokogiri::XML::Node.new('meta', index)
|
288
|
+
meta['name'] = property.key
|
289
|
+
meta['content'] = property.value
|
290
|
+
head.add_child(meta)
|
291
|
+
}
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
def outline_book(doc, cmpt_xpaths = [BODY_XPATH])
|
296
|
+
unless defined?(@outliner) && @outliner
|
297
|
+
@outliner = Peregrin::Outliner.new(doc)
|
298
|
+
@outliner.process(doc.at_css('body'))
|
299
|
+
end
|
300
|
+
|
301
|
+
i = 0
|
302
|
+
curse = lambda { |sxn|
|
303
|
+
chapter = Peregrin::Chapter.new(sxn.heading_text, i+=1)
|
304
|
+
|
305
|
+
# identify any relevant child sections
|
306
|
+
children = sxn.sections.collect { |ch|
|
307
|
+
curse.call(ch) unless ch.empty?
|
308
|
+
}.compact
|
309
|
+
chapter.children = children if children.any?
|
310
|
+
|
311
|
+
# Find the component parent
|
312
|
+
n = sxn.node || sxn.heading
|
313
|
+
while n && n.respond_to?(:parent)
|
314
|
+
break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
|
315
|
+
n = n.parent
|
316
|
+
end
|
317
|
+
|
318
|
+
if cmpt_uri
|
319
|
+
# get URI for section
|
320
|
+
sid = sxn.heading['id'] if sxn.heading
|
321
|
+
cmpt_uri += "#"+sid if sid && !sid.empty?
|
322
|
+
chapter.src = cmpt_uri
|
323
|
+
end
|
324
|
+
|
325
|
+
chapter
|
326
|
+
}
|
327
|
+
|
328
|
+
result = curse.call(@outliner.result_root).children
|
329
|
+
while result && result.length == 1 && result.first.title.nil?
|
330
|
+
result = result.first.children
|
331
|
+
end
|
332
|
+
result
|
333
|
+
end
|
334
|
+
|
335
|
+
|
336
|
+
def uri_for_xpath(xpath, cmpt_xpaths = [BODY_XPATH])
|
337
|
+
return nil unless cmpt_xpaths.include?(xpath)
|
338
|
+
i = cmpt_xpaths.index(xpath)
|
339
|
+
(i == 0) ? "index.html" : "part#{"%03d" % i}.html"
|
340
|
+
end
|
341
|
+
|
342
|
+
|
343
|
+
def htmlize(doc)
|
344
|
+
"<!DOCTYPE html>\n"+doc.root.to_html
|
345
|
+
end
|
346
|
+
|
347
|
+
|
348
|
+
def to_png_data(resource)
|
349
|
+
return if resource.nil?
|
350
|
+
if File.extname(resource.src) == ".png"
|
351
|
+
return @book.read_resource(resource)
|
352
|
+
else
|
353
|
+
raise ConvertUtilityMissing unless `which convert`
|
354
|
+
out = nil
|
355
|
+
IO.popen("convert - png:-", "r+") { |io|
|
356
|
+
io.write(@book.read_resource(resource))
|
357
|
+
io.close_write
|
358
|
+
out = io.read
|
359
|
+
}
|
360
|
+
out
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
|
365
|
+
def self.extract_properties_from_index(book)
|
366
|
+
doc = Nokogiri::HTML::Document.parse(
|
367
|
+
book.components.first.contents
|
368
|
+
)
|
369
|
+
doc.css('html head meta[name]').each { |meta|
|
370
|
+
name = meta['name']
|
371
|
+
content = meta['content']
|
372
|
+
book.add_property(name, content)
|
373
|
+
}
|
374
|
+
end
|
375
|
+
|
376
|
+
|
377
|
+
class ValidationError < ::RuntimeError
|
378
|
+
|
379
|
+
def initialize(path = nil)
|
380
|
+
@path = path
|
381
|
+
end
|
382
|
+
|
383
|
+
end
|
384
|
+
|
385
|
+
class FileNotFound < ValidationError; end
|
386
|
+
class WrongExtension < ValidationError; end
|
387
|
+
class NotAZipArchive < ValidationError; end
|
388
|
+
class MissingIndexHTML < ValidationError; end
|
389
|
+
class MissingCoverPNG < ValidationError; end
|
390
|
+
class IndexHTMLRootHasId < ValidationError; end
|
391
|
+
|
392
|
+
class ConvertUtilityMissing < RuntimeError; end
|
393
|
+
|
394
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
class Peregrin::Book
|
2
|
+
|
3
|
+
# Unique identifier for this book
|
4
|
+
attr_accessor :identifier
|
5
|
+
|
6
|
+
# An array of Components
|
7
|
+
attr_accessor :components
|
8
|
+
|
9
|
+
# A tree of Chapters. Top-level chapters in this array, each with
|
10
|
+
# children arrays.
|
11
|
+
attr_accessor :chapters
|
12
|
+
|
13
|
+
# An array of Properties.
|
14
|
+
attr_accessor :properties
|
15
|
+
|
16
|
+
# An array of Resources.
|
17
|
+
attr_accessor :resources
|
18
|
+
|
19
|
+
# A Resource that is used for the book cover.
|
20
|
+
attr_accessor :cover
|
21
|
+
|
22
|
+
# A proc that copies a resource to the given destination.
|
23
|
+
attr_writer :read_resource_proc
|
24
|
+
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@components = []
|
28
|
+
@chapters = []
|
29
|
+
@properties = []
|
30
|
+
@resources = []
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def all_files
|
35
|
+
@components + @resources
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def add_component(*args)
|
40
|
+
@components.push(Peregrin::Component.new(*args)).last
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def add_resource(*args)
|
45
|
+
@resources.push(Peregrin::Resource.new(*args)).last
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
def add_chapter(*args)
|
50
|
+
@chapters.push(Peregrin::Chapter.new(*args)).last
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def add_property(*args)
|
55
|
+
@properties.push(Peregrin::Property.new(*args)).last
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def property_for(key)
|
60
|
+
key = key.to_s
|
61
|
+
prop = @properties.detect { |p| p.key == key }
|
62
|
+
prop ? prop.value : nil
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
def read_resource(resource_path)
|
67
|
+
@read_resource_proc.call(resource_path) if @read_resource_proc
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
def copy_resource_to(resource_path, dest_path)
|
72
|
+
File.open(dest_path, 'w') { |f|
|
73
|
+
f << read_resource(resource_path)
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
def deep_clone
|
79
|
+
@read_resource_proc ||= nil
|
80
|
+
tmp = @read_resource_proc
|
81
|
+
@read_resource_proc = nil
|
82
|
+
clone = Marshal.load(Marshal.dump(self))
|
83
|
+
clone.read_resource_proc = @read_resource_proc = tmp
|
84
|
+
clone
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Books have nested sections with headings - each of these is a chapter.
|
2
|
+
#
|
3
|
+
# TODO: flag whether a chapter is linkable?
|
4
|
+
#
|
5
|
+
class Peregrin::Chapter
|
6
|
+
|
7
|
+
attr_accessor :title, :src, :children, :position
|
8
|
+
|
9
|
+
def initialize(title, pos, src = nil)
|
10
|
+
@title = title.gsub(/[\r\n]/,' ') if title
|
11
|
+
@src = src
|
12
|
+
@position = pos.to_i
|
13
|
+
@children = []
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
def add_child(child_title, child_pos, child_src = nil)
|
18
|
+
chp = Peregrin::Chapter.new(child_title, child_pos, child_src)
|
19
|
+
children.push(chp)
|
20
|
+
chp
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# A chapter is an empty leaf if you can't link to it or any of its children.
|
25
|
+
# Typically you wouldn't show an empty-leaf chapter in a Table of Contents.
|
26
|
+
#
|
27
|
+
def empty_leaf?
|
28
|
+
src.nil? && children.all? { |ch| ch.empty_leaf? }
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# A component is a section of the book's linear text.
|
2
|
+
#
|
3
|
+
class Peregrin::Component < Peregrin::Resource
|
4
|
+
|
5
|
+
attr_accessor :contents
|
6
|
+
|
7
|
+
def initialize(src, contents = nil, media_type = nil, attributes = {})
|
8
|
+
@contents = contents
|
9
|
+
super(src, media_type, attributes)
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
class Peregrin::Componentizer
|
2
|
+
|
3
|
+
attr_reader :component_xpaths
|
4
|
+
|
5
|
+
|
6
|
+
def initialize(doc)
|
7
|
+
@document = doc
|
8
|
+
@component_xpaths = []
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
# Build a list of xpaths for nodes that can be turned into standalone
|
13
|
+
# components.
|
14
|
+
#
|
15
|
+
def process(from)
|
16
|
+
@component_xpaths = []
|
17
|
+
walk(from)
|
18
|
+
@component_xpaths.reject! { |xpath| emptied?(xpath) }
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def generate_component(xpath)
|
23
|
+
raise "Not a component: #{xpath}" unless @component_xpaths.include?(xpath)
|
24
|
+
node = @document.at_xpath(xpath)
|
25
|
+
generate_document(node)
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
# Creates a new document with the same root and head nodes, but with
|
30
|
+
# a body that just contains the nodes at the given xpath.
|
31
|
+
#
|
32
|
+
def generate_document(node)
|
33
|
+
# Clean up the "shell" document.
|
34
|
+
@shell_document ||= @document.dup
|
35
|
+
bdy = @shell_document.at_xpath('/html/body')
|
36
|
+
bdy.children.remove
|
37
|
+
|
38
|
+
# Find the node we're going to copy into the shell document.
|
39
|
+
# Create a deep clone of it. Remove any children of it that are
|
40
|
+
# componentizable in their own right.
|
41
|
+
ndup = node.dup
|
42
|
+
node.children.collect { |ch|
|
43
|
+
next unless component_xpaths.include?(ch.path)
|
44
|
+
dpath = ch.path.sub(/^#{Regexp.escape(node.path)}/, ndup.path)
|
45
|
+
ndup.children.detect { |dch| dch.path == dpath }
|
46
|
+
}.compact.each { |ch|
|
47
|
+
ch.unlink
|
48
|
+
}
|
49
|
+
|
50
|
+
# Append the node to the body of the shell (or replace the body, if
|
51
|
+
# the node is a body itself).
|
52
|
+
if node.name.downcase == "body"
|
53
|
+
bdy.replace(ndup)
|
54
|
+
else
|
55
|
+
bdy.add_child(ndup)
|
56
|
+
end
|
57
|
+
|
58
|
+
@shell_document.dup
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Writes the componentizable node at the given xpath to the given
|
63
|
+
# filesystem path.
|
64
|
+
#
|
65
|
+
# If you provide a block, you get the new document object,
|
66
|
+
# and you are expected to return the string containing its HTML form --
|
67
|
+
# in this way you can tweak the HTML output. Default is simply: doc.to_html
|
68
|
+
#
|
69
|
+
def write_component(xpath, path, &blk)
|
70
|
+
new_doc = generate_component(xpath)
|
71
|
+
out = block_given? ? blk.call(new_doc) : new_doc.to_html
|
72
|
+
File.open(path, 'w') { |f| f.write(out) }
|
73
|
+
out
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
protected
|
78
|
+
|
79
|
+
# The recursive method for walking the tree - checks if the current node
|
80
|
+
# is a component, then checks each child of the current node.
|
81
|
+
#
|
82
|
+
def walk(node)
|
83
|
+
return unless componentizable?(node)
|
84
|
+
@component_xpaths.push(node.path)
|
85
|
+
node.children.each { |c| walk(c) }
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
# True if the node meets the criteria for being componentizable:
|
90
|
+
# 1) Is a body or article element (or a div.article)?
|
91
|
+
# 2) Are all subsequent siblings also componentizable?
|
92
|
+
#
|
93
|
+
def componentizable?(node)
|
94
|
+
begin
|
95
|
+
return false unless (
|
96
|
+
%w[body article].include?(node.name.downcase) ||
|
97
|
+
(
|
98
|
+
node.name.downcase == "div" &&
|
99
|
+
node['class'] &&
|
100
|
+
node['class'].match(/\barticle\b/)
|
101
|
+
)
|
102
|
+
)
|
103
|
+
end while node = node.next
|
104
|
+
true
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
# True if all children are either componentizable or blank text nodes.
|
109
|
+
#
|
110
|
+
def emptied?(xpath)
|
111
|
+
node = @document.at_xpath(xpath)
|
112
|
+
node.children.all? { |ch|
|
113
|
+
@component_xpaths.include?(ch.path) ||
|
114
|
+
(ch.text? && ch.content.strip.empty?)
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|