peregrin 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,394 @@
1
+ class Peregrin::Zhook
2
+
3
+ FORMAT = "Zhook"
4
+
5
+ FILE_EXT = ".zhook"
6
+ INDEX_PATH = "index.html"
7
+ COVER_PATH = "cover.png"
8
+ BODY_XPATH = '/html/body'
9
+ HEAD_XPATH = '/html/head'
10
+
11
+ # Raises an exception if file at path is not a valid Zhook. Otherwise
12
+ # returns true.
13
+ #
14
+ def self.validate(path)
15
+ raise FileNotFound.new(path) unless File.file?(path)
16
+ raise WrongExtension.new(path) unless File.extname(path) == FILE_EXT
17
+ begin
18
+ zf = Zip::Archive.open(path)
19
+ rescue
20
+ raise NotAZipArchive.new(path)
21
+ end
22
+
23
+ unless zf.find(INDEX_PATH)
24
+ raise MissingIndexHTML.new(path)
25
+ end
26
+
27
+ unless zf.find(COVER_PATH)
28
+ raise MissingCoverPNG.new(path)
29
+ end
30
+
31
+ doc = Nokogiri::HTML::Document.parse(zf.read(INDEX_PATH), nil, 'UTF-8')
32
+ raise IndexHTMLRootHasId.new(path) if doc.root['id']
33
+
34
+ ensure
35
+ zf.close if zf
36
+ end
37
+
38
+
39
+ # Unzips the file at path, generates a simple book object, passes to new.
40
+ #
41
+ def self.read(path)
42
+ validate(path)
43
+ book = Peregrin::Book.new
44
+ Zip::Archive.open(path) { |zf|
45
+ book.add_component(INDEX_PATH, zf.read(INDEX_PATH))
46
+ zf.each { |entry|
47
+ ze = entry.name
48
+ book.add_resource(ze) unless ze == INDEX_PATH || entry.directory?
49
+ }
50
+ }
51
+ book.read_resource_proc = lambda { |resource|
52
+ Zip::Archive.open(path) { |zipfile|
53
+ zipfile.read(resource.src)
54
+ }
55
+ }
56
+
57
+ extract_properties_from_index(book)
58
+
59
+ new(book)
60
+ end
61
+
62
+
63
+ # Stitches together components of the internal book.
64
+ #
65
+ def initialize(book)
66
+ @book = book
67
+
68
+ if @book.components.length > 1
69
+ stitch_components(@book)
70
+ end
71
+
72
+ consolidate_properties(@book)
73
+
74
+ @book.chapters = outline_book(index)
75
+
76
+ @book.cover ||= (
77
+ @book.resources.detect { |r| r.src == COVER_PATH } ||
78
+ @book.add_resource(COVER_PATH)
79
+ )
80
+ end
81
+
82
+
83
+ # Writes the internal book object to a .zhook file at the given path.
84
+ #
85
+ def write(path)
86
+ File.unlink(path) if File.exists?(path)
87
+ Zip::Archive.open(path, Zip::CREATE) { |zipfile|
88
+ zipfile.add_buffer(INDEX_PATH, htmlize(index))
89
+ @book.resources.each { |resource|
90
+ zipfile.add_buffer(resource.src, @book.read_resource(resource))
91
+ }
92
+ unless @book.cover.src == COVER_PATH
93
+ zipfile.add_buffer(COVER_PATH, to_png_data(@book.cover))
94
+ end
95
+ }
96
+ path
97
+ end
98
+
99
+
100
+ # Returns the internal book object.
101
+ #
102
+ def to_book(options = {})
103
+ bk = @book.deep_clone
104
+
105
+ # XPath => URI mapping tools
106
+ cmpt_xpaths = []
107
+
108
+ boilerplate_rel_links =
109
+ '<link rel="start" href="cover.html" />' +
110
+ '<link rel="contents" href="toc.html" />'
111
+
112
+ # Componentizing.
113
+ if options[:componentize]
114
+ componentizer = Peregrin::Componentizer.new(index)
115
+ componentizer.process(index.root.at_css('body'))
116
+ bk.components = componentizer.component_xpaths.collect { |xpath|
117
+ cmpt_xpaths.push(xpath)
118
+ doc = componentizer.generate_component(xpath)
119
+ Peregrin::Component.new(uri_for_xpath(xpath, cmpt_xpaths), doc)
120
+ }
121
+
122
+ # Add rel links and convert to html string
123
+ first_path = bk.components.first.src
124
+ last_path = bk.components.last.src
125
+ boilerplate_rel_links <<
126
+ '<link rel="first" href="'+bk.components.first.src+'" />' +
127
+ '<link rel="last" href="'+bk.components.last.src+'" />'
128
+ bk.components.each_with_index { |cmpt, i|
129
+ head = cmpt.contents.at_xpath(HEAD_XPATH)
130
+ prev_path = bk.components[i-1].src if (i-1) >= 0
131
+ next_path = bk.components[i+1].src if (i+1) < bk.components.size
132
+ head.add_child(boilerplate_rel_links)
133
+ head.add_child('<link rel="prev" href="'+prev_path+'" />') if prev_path
134
+ head.add_child('<link rel="next" href="'+next_path+'" />') if next_path
135
+ cmpt.contents = htmlize(cmpt.contents)
136
+ }
137
+ else
138
+ cmpt_xpaths.push(BODY_XPATH)
139
+ bk.components.clear
140
+ bk.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
141
+ end
142
+
143
+ # Outlining.
144
+ bk.chapters = outline_book(index, cmpt_xpaths)
145
+
146
+ if options[:componentize]
147
+ # Table of Contents
148
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
149
+ curse = lambda { |children|
150
+ parts = children.collect { |chp|
151
+ chp.empty_leaf? ? nil : [chp.title, chp.src, chp.children]
152
+ }.compact
153
+
154
+ html.ol {
155
+ parts.each { |part|
156
+ html.li {
157
+ html.a(part[0], :href => part[1])
158
+ curse.call(part[2]) if part[2].any?
159
+ }
160
+ }
161
+ } if parts.any?
162
+ }
163
+ curse.call(bk.chapters)
164
+ }.doc
165
+ if doc.root
166
+ toc_doc = componentizer.generate_document(doc.root)
167
+ toc_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
168
+ bk.add_component(
169
+ "toc.html",
170
+ htmlize(toc_doc),
171
+ nil,
172
+ :linear => "no",
173
+ :guide => "Table of Contents",
174
+ :guide_type => "toc"
175
+ )
176
+ end
177
+
178
+ # List of Illustrations
179
+ figures = index.css('figure[id], div.figure[id]')
180
+ if figures.any?
181
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
182
+ html.ol {
183
+ figures.each { |fig|
184
+ next unless caption = fig.at_css('figcaption, .figcaption')
185
+ n = fig
186
+ while n && n.respond_to?(:parent)
187
+ break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
188
+ n = n.parent
189
+ end
190
+ next unless cmpt_uri
191
+ html.li {
192
+ html.a(caption.content, :href => "#{cmpt_uri}##{fig['id']}")
193
+ }
194
+ }
195
+ }
196
+ }.doc
197
+ loi_doc = componentizer.generate_document(doc.root)
198
+ loi_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
199
+ bk.add_component(
200
+ "loi.html",
201
+ htmlize(loi_doc),
202
+ nil,
203
+ :linear => "no",
204
+ :guide => "List of Illustrations",
205
+ :guide_type => "loi"
206
+ )
207
+ end
208
+
209
+ # Cover
210
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
211
+ html.div(:id => "cover") {
212
+ html.img(:src => bk.cover.src, :alt => bk.property_for("title"))
213
+ }
214
+ }.doc
215
+ cover_doc = componentizer.generate_document(doc.root)
216
+ cover_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
217
+ bk.components.unshift(
218
+ Peregrin::Component.new(
219
+ "cover.html",
220
+ htmlize(cover_doc),
221
+ nil,
222
+ :linear => "no",
223
+ :guide => "Cover",
224
+ :guide_type => "cover"
225
+ )
226
+ )
227
+ end
228
+
229
+ bk
230
+ end
231
+
232
+
233
+ protected
234
+
235
+ def index
236
+ @index_document ||= Nokogiri::HTML::Document.parse(
237
+ @book.components.first.contents
238
+ )
239
+ end
240
+
241
+
242
+ # Takes a book with multiple components and joins them together,
243
+ # by creating article elements from every body element and appending them
244
+ # to the body of the first component.
245
+ #
246
+ def stitch_components(book)
247
+ node = Nokogiri::XML::Node.new('article', index)
248
+ bdy = index.at_xpath(BODY_XPATH)
249
+ head = index.at_xpath(HEAD_XPATH)
250
+ bdy.children.each { |ch|
251
+ node.add_child(ch)
252
+ }
253
+ bdy.add_child(node)
254
+
255
+ book.components.shift
256
+ while cmpt = book.components.shift
257
+ str = cmpt.contents
258
+ doc = Nokogiri::HTML::Document.parse(str)
259
+ art = doc.at_xpath(BODY_XPATH)
260
+ art.name = 'article'
261
+ bdy.add_child(art)
262
+
263
+ # Import all other unique elements from the head, like link & meta tags.
264
+ if dhead = doc.at_xpath(HEAD_XPATH)
265
+ dhead.children.each { |foreign_child|
266
+ next if foreign_child.name.downcase == "title"
267
+ next if head.children.any? { |index_child|
268
+ index_child.to_s == foreign_child.to_s
269
+ }
270
+ head.add_child(foreign_child.dup)
271
+ }
272
+ end
273
+ end
274
+ book.components.clear
275
+ book.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
276
+ end
277
+
278
+
279
+ # Takes the properties out of the book and ensures that there are matching
280
+ # meta tags in the index document.
281
+ #
282
+ def consolidate_properties(book)
283
+ head = index.at_xpath('/html/head')
284
+ head.css('meta[name]').each { |meta| meta.remove }
285
+ book.properties.each { |property|
286
+ # FIXME: handle properties with attributes?
287
+ meta = Nokogiri::XML::Node.new('meta', index)
288
+ meta['name'] = property.key
289
+ meta['content'] = property.value
290
+ head.add_child(meta)
291
+ }
292
+ end
293
+
294
+
295
+ def outline_book(doc, cmpt_xpaths = [BODY_XPATH])
296
+ unless defined?(@outliner) && @outliner
297
+ @outliner = Peregrin::Outliner.new(doc)
298
+ @outliner.process(doc.at_css('body'))
299
+ end
300
+
301
+ i = 0
302
+ curse = lambda { |sxn|
303
+ chapter = Peregrin::Chapter.new(sxn.heading_text, i+=1)
304
+
305
+ # identify any relevant child sections
306
+ children = sxn.sections.collect { |ch|
307
+ curse.call(ch) unless ch.empty?
308
+ }.compact
309
+ chapter.children = children if children.any?
310
+
311
+ # Find the component parent
312
+ n = sxn.node || sxn.heading
313
+ while n && n.respond_to?(:parent)
314
+ break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
315
+ n = n.parent
316
+ end
317
+
318
+ if cmpt_uri
319
+ # get URI for section
320
+ sid = sxn.heading['id'] if sxn.heading
321
+ cmpt_uri += "#"+sid if sid && !sid.empty?
322
+ chapter.src = cmpt_uri
323
+ end
324
+
325
+ chapter
326
+ }
327
+
328
+ result = curse.call(@outliner.result_root).children
329
+ while result && result.length == 1 && result.first.title.nil?
330
+ result = result.first.children
331
+ end
332
+ result
333
+ end
334
+
335
+
336
+ def uri_for_xpath(xpath, cmpt_xpaths = [BODY_XPATH])
337
+ return nil unless cmpt_xpaths.include?(xpath)
338
+ i = cmpt_xpaths.index(xpath)
339
+ (i == 0) ? "index.html" : "part#{"%03d" % i}.html"
340
+ end
341
+
342
+
343
+ def htmlize(doc)
344
+ "<!DOCTYPE html>\n"+doc.root.to_html
345
+ end
346
+
347
+
348
+ def to_png_data(resource)
349
+ return if resource.nil?
350
+ if File.extname(resource.src) == ".png"
351
+ return @book.read_resource(resource)
352
+ else
353
+ raise ConvertUtilityMissing unless `which convert`
354
+ out = nil
355
+ IO.popen("convert - png:-", "r+") { |io|
356
+ io.write(@book.read_resource(resource))
357
+ io.close_write
358
+ out = io.read
359
+ }
360
+ out
361
+ end
362
+ end
363
+
364
+
365
+ def self.extract_properties_from_index(book)
366
+ doc = Nokogiri::HTML::Document.parse(
367
+ book.components.first.contents
368
+ )
369
+ doc.css('html head meta[name]').each { |meta|
370
+ name = meta['name']
371
+ content = meta['content']
372
+ book.add_property(name, content)
373
+ }
374
+ end
375
+
376
+
377
+ class ValidationError < ::RuntimeError
378
+
379
+ def initialize(path = nil)
380
+ @path = path
381
+ end
382
+
383
+ end
384
+
385
+ class FileNotFound < ValidationError; end
386
+ class WrongExtension < ValidationError; end
387
+ class NotAZipArchive < ValidationError; end
388
+ class MissingIndexHTML < ValidationError; end
389
+ class MissingCoverPNG < ValidationError; end
390
+ class IndexHTMLRootHasId < ValidationError; end
391
+
392
+ class ConvertUtilityMissing < RuntimeError; end
393
+
394
+ end
@@ -0,0 +1,87 @@
1
+ class Peregrin::Book
2
+
3
+ # Unique identifier for this book
4
+ attr_accessor :identifier
5
+
6
+ # An array of Components
7
+ attr_accessor :components
8
+
9
+ # A tree of Chapters. Top-level chapters in this array, each with
10
+ # children arrays.
11
+ attr_accessor :chapters
12
+
13
+ # An array of Properties.
14
+ attr_accessor :properties
15
+
16
+ # An array of Resources.
17
+ attr_accessor :resources
18
+
19
+ # A Resource that is used for the book cover.
20
+ attr_accessor :cover
21
+
22
+ # A proc that copies a resource to the given destination.
23
+ attr_writer :read_resource_proc
24
+
25
+
26
+ def initialize
27
+ @components = []
28
+ @chapters = []
29
+ @properties = []
30
+ @resources = []
31
+ end
32
+
33
+
34
+ def all_files
35
+ @components + @resources
36
+ end
37
+
38
+
39
+ def add_component(*args)
40
+ @components.push(Peregrin::Component.new(*args)).last
41
+ end
42
+
43
+
44
+ def add_resource(*args)
45
+ @resources.push(Peregrin::Resource.new(*args)).last
46
+ end
47
+
48
+
49
+ def add_chapter(*args)
50
+ @chapters.push(Peregrin::Chapter.new(*args)).last
51
+ end
52
+
53
+
54
+ def add_property(*args)
55
+ @properties.push(Peregrin::Property.new(*args)).last
56
+ end
57
+
58
+
59
+ def property_for(key)
60
+ key = key.to_s
61
+ prop = @properties.detect { |p| p.key == key }
62
+ prop ? prop.value : nil
63
+ end
64
+
65
+
66
+ def read_resource(resource_path)
67
+ @read_resource_proc.call(resource_path) if @read_resource_proc
68
+ end
69
+
70
+
71
+ def copy_resource_to(resource_path, dest_path)
72
+ File.open(dest_path, 'w') { |f|
73
+ f << read_resource(resource_path)
74
+ }
75
+ end
76
+
77
+
78
+ def deep_clone
79
+ @read_resource_proc ||= nil
80
+ tmp = @read_resource_proc
81
+ @read_resource_proc = nil
82
+ clone = Marshal.load(Marshal.dump(self))
83
+ clone.read_resource_proc = @read_resource_proc = tmp
84
+ clone
85
+ end
86
+
87
+ end
@@ -0,0 +1,31 @@
1
+ # Books have nested sections with headings - each of these is a chapter.
2
+ #
3
+ # TODO: flag whether a chapter is linkable?
4
+ #
5
+ class Peregrin::Chapter
6
+
7
+ attr_accessor :title, :src, :children, :position
8
+
9
+ def initialize(title, pos, src = nil)
10
+ @title = title.gsub(/[\r\n]/,' ') if title
11
+ @src = src
12
+ @position = pos.to_i
13
+ @children = []
14
+ end
15
+
16
+
17
+ def add_child(child_title, child_pos, child_src = nil)
18
+ chp = Peregrin::Chapter.new(child_title, child_pos, child_src)
19
+ children.push(chp)
20
+ chp
21
+ end
22
+
23
+
24
+ # A chapter is an empty leaf if you can't link to it or any of its children.
25
+ # Typically you wouldn't show an empty-leaf chapter in a Table of Contents.
26
+ #
27
+ def empty_leaf?
28
+ src.nil? && children.all? { |ch| ch.empty_leaf? }
29
+ end
30
+
31
+ end
@@ -0,0 +1,12 @@
1
+ # A component is a section of the book's linear text.
2
+ #
3
+ class Peregrin::Component < Peregrin::Resource
4
+
5
+ attr_accessor :contents
6
+
7
+ def initialize(src, contents = nil, media_type = nil, attributes = {})
8
+ @contents = contents
9
+ super(src, media_type, attributes)
10
+ end
11
+
12
+ end
@@ -0,0 +1,118 @@
1
+ class Peregrin::Componentizer
2
+
3
+ attr_reader :component_xpaths
4
+
5
+
6
+ def initialize(doc)
7
+ @document = doc
8
+ @component_xpaths = []
9
+ end
10
+
11
+
12
+ # Build a list of xpaths for nodes that can be turned into standalone
13
+ # components.
14
+ #
15
+ def process(from)
16
+ @component_xpaths = []
17
+ walk(from)
18
+ @component_xpaths.reject! { |xpath| emptied?(xpath) }
19
+ end
20
+
21
+
22
+ def generate_component(xpath)
23
+ raise "Not a component: #{xpath}" unless @component_xpaths.include?(xpath)
24
+ node = @document.at_xpath(xpath)
25
+ generate_document(node)
26
+ end
27
+
28
+
29
+ # Creates a new document with the same root and head nodes, but with
30
+ # a body that just contains the nodes at the given xpath.
31
+ #
32
+ def generate_document(node)
33
+ # Clean up the "shell" document.
34
+ @shell_document ||= @document.dup
35
+ bdy = @shell_document.at_xpath('/html/body')
36
+ bdy.children.remove
37
+
38
+ # Find the node we're going to copy into the shell document.
39
+ # Create a deep clone of it. Remove any children of it that are
40
+ # componentizable in their own right.
41
+ ndup = node.dup
42
+ node.children.collect { |ch|
43
+ next unless component_xpaths.include?(ch.path)
44
+ dpath = ch.path.sub(/^#{Regexp.escape(node.path)}/, ndup.path)
45
+ ndup.children.detect { |dch| dch.path == dpath }
46
+ }.compact.each { |ch|
47
+ ch.unlink
48
+ }
49
+
50
+ # Append the node to the body of the shell (or replace the body, if
51
+ # the node is a body itself).
52
+ if node.name.downcase == "body"
53
+ bdy.replace(ndup)
54
+ else
55
+ bdy.add_child(ndup)
56
+ end
57
+
58
+ @shell_document.dup
59
+ end
60
+
61
+
62
+ # Writes the componentizable node at the given xpath to the given
63
+ # filesystem path.
64
+ #
65
+ # If you provide a block, you get the new document object,
66
+ # and you are expected to return the string containing its HTML form --
67
+ # in this way you can tweak the HTML output. Default is simply: doc.to_html
68
+ #
69
+ def write_component(xpath, path, &blk)
70
+ new_doc = generate_component(xpath)
71
+ out = block_given? ? blk.call(new_doc) : new_doc.to_html
72
+ File.open(path, 'w') { |f| f.write(out) }
73
+ out
74
+ end
75
+
76
+
77
+ protected
78
+
79
+ # The recursive method for walking the tree - checks if the current node
80
+ # is a component, then checks each child of the current node.
81
+ #
82
+ def walk(node)
83
+ return unless componentizable?(node)
84
+ @component_xpaths.push(node.path)
85
+ node.children.each { |c| walk(c) }
86
+ end
87
+
88
+
89
+ # True if the node meets the criteria for being componentizable:
90
+ # 1) Is a body or article element (or a div.article)?
91
+ # 2) Are all subsequent siblings also componentizable?
92
+ #
93
+ def componentizable?(node)
94
+ begin
95
+ return false unless (
96
+ %w[body article].include?(node.name.downcase) ||
97
+ (
98
+ node.name.downcase == "div" &&
99
+ node['class'] &&
100
+ node['class'].match(/\barticle\b/)
101
+ )
102
+ )
103
+ end while node = node.next
104
+ true
105
+ end
106
+
107
+
108
+ # True if all children are either componentizable or blank text nodes.
109
+ #
110
+ def emptied?(xpath)
111
+ node = @document.at_xpath(xpath)
112
+ node.children.all? { |ch|
113
+ @component_xpaths.include?(ch.path) ||
114
+ (ch.text? && ch.content.strip.empty?)
115
+ }
116
+ end
117
+
118
+ end