peregrin 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ class Peregrin::Zhook
2
+
3
+ FORMAT = "Zhook"
4
+
5
+ FILE_EXT = ".zhook"
6
+ INDEX_PATH = "index.html"
7
+ COVER_PATH = "cover.png"
8
+ BODY_XPATH = '/html/body'
9
+ HEAD_XPATH = '/html/head'
10
+
11
+ # Raises an exception if file at path is not a valid Zhook. Otherwise
12
+ # returns true.
13
+ #
14
+ def self.validate(path)
15
+ raise FileNotFound.new(path) unless File.file?(path)
16
+ raise WrongExtension.new(path) unless File.extname(path) == FILE_EXT
17
+ begin
18
+ zf = Zip::Archive.open(path)
19
+ rescue
20
+ raise NotAZipArchive.new(path)
21
+ end
22
+
23
+ unless zf.find(INDEX_PATH)
24
+ raise MissingIndexHTML.new(path)
25
+ end
26
+
27
+ unless zf.find(COVER_PATH)
28
+ raise MissingCoverPNG.new(path)
29
+ end
30
+
31
+ doc = Nokogiri::HTML::Document.parse(zf.read(INDEX_PATH), nil, 'UTF-8')
32
+ raise IndexHTMLRootHasId.new(path) if doc.root['id']
33
+
34
+ ensure
35
+ zf.close if zf
36
+ end
37
+
38
+
39
+ # Unzips the file at path, generates a simple book object, passes to new.
40
+ #
41
+ def self.read(path)
42
+ validate(path)
43
+ book = Peregrin::Book.new
44
+ Zip::Archive.open(path) { |zf|
45
+ book.add_component(INDEX_PATH, zf.read(INDEX_PATH))
46
+ zf.each { |entry|
47
+ ze = entry.name
48
+ book.add_resource(ze) unless ze == INDEX_PATH || entry.directory?
49
+ }
50
+ }
51
+ book.read_resource_proc = lambda { |resource|
52
+ Zip::Archive.open(path) { |zipfile|
53
+ zipfile.read(resource.src)
54
+ }
55
+ }
56
+
57
+ extract_properties_from_index(book)
58
+
59
+ new(book)
60
+ end
61
+
62
+
63
+ # Stitches together components of the internal book.
64
+ #
65
+ def initialize(book)
66
+ @book = book
67
+
68
+ if @book.components.length > 1
69
+ stitch_components(@book)
70
+ end
71
+
72
+ consolidate_properties(@book)
73
+
74
+ @book.chapters = outline_book(index)
75
+
76
+ @book.cover ||= (
77
+ @book.resources.detect { |r| r.src == COVER_PATH } ||
78
+ @book.add_resource(COVER_PATH)
79
+ )
80
+ end
81
+
82
+
83
+ # Writes the internal book object to a .zhook file at the given path.
84
+ #
85
+ def write(path)
86
+ File.unlink(path) if File.exists?(path)
87
+ Zip::Archive.open(path, Zip::CREATE) { |zipfile|
88
+ zipfile.add_buffer(INDEX_PATH, htmlize(index))
89
+ @book.resources.each { |resource|
90
+ zipfile.add_buffer(resource.src, @book.read_resource(resource))
91
+ }
92
+ unless @book.cover.src == COVER_PATH
93
+ zipfile.add_buffer(COVER_PATH, to_png_data(@book.cover))
94
+ end
95
+ }
96
+ path
97
+ end
98
+
99
+
100
+ # Returns the internal book object.
101
+ #
102
+ def to_book(options = {})
103
+ bk = @book.deep_clone
104
+
105
+ # XPath => URI mapping tools
106
+ cmpt_xpaths = []
107
+
108
+ boilerplate_rel_links =
109
+ '<link rel="start" href="cover.html" />' +
110
+ '<link rel="contents" href="toc.html" />'
111
+
112
+ # Componentizing.
113
+ if options[:componentize]
114
+ componentizer = Peregrin::Componentizer.new(index)
115
+ componentizer.process(index.root.at_css('body'))
116
+ bk.components = componentizer.component_xpaths.collect { |xpath|
117
+ cmpt_xpaths.push(xpath)
118
+ doc = componentizer.generate_component(xpath)
119
+ Peregrin::Component.new(uri_for_xpath(xpath, cmpt_xpaths), doc)
120
+ }
121
+
122
+ # Add rel links and convert to html string
123
+ first_path = bk.components.first.src
124
+ last_path = bk.components.last.src
125
+ boilerplate_rel_links <<
126
+ '<link rel="first" href="'+bk.components.first.src+'" />' +
127
+ '<link rel="last" href="'+bk.components.last.src+'" />'
128
+ bk.components.each_with_index { |cmpt, i|
129
+ head = cmpt.contents.at_xpath(HEAD_XPATH)
130
+ prev_path = bk.components[i-1].src if (i-1) >= 0
131
+ next_path = bk.components[i+1].src if (i+1) < bk.components.size
132
+ head.add_child(boilerplate_rel_links)
133
+ head.add_child('<link rel="prev" href="'+prev_path+'" />') if prev_path
134
+ head.add_child('<link rel="next" href="'+next_path+'" />') if next_path
135
+ cmpt.contents = htmlize(cmpt.contents)
136
+ }
137
+ else
138
+ cmpt_xpaths.push(BODY_XPATH)
139
+ bk.components.clear
140
+ bk.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
141
+ end
142
+
143
+ # Outlining.
144
+ bk.chapters = outline_book(index, cmpt_xpaths)
145
+
146
+ if options[:componentize]
147
+ # Table of Contents
148
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
149
+ curse = lambda { |children|
150
+ parts = children.collect { |chp|
151
+ chp.empty_leaf? ? nil : [chp.title, chp.src, chp.children]
152
+ }.compact
153
+
154
+ html.ol {
155
+ parts.each { |part|
156
+ html.li {
157
+ html.a(part[0], :href => part[1])
158
+ curse.call(part[2]) if part[2].any?
159
+ }
160
+ }
161
+ } if parts.any?
162
+ }
163
+ curse.call(bk.chapters)
164
+ }.doc
165
+ if doc.root
166
+ toc_doc = componentizer.generate_document(doc.root)
167
+ toc_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
168
+ bk.add_component(
169
+ "toc.html",
170
+ htmlize(toc_doc),
171
+ nil,
172
+ :linear => "no",
173
+ :guide => "Table of Contents",
174
+ :guide_type => "toc"
175
+ )
176
+ end
177
+
178
+ # List of Illustrations
179
+ figures = index.css('figure[id], div.figure[id]')
180
+ if figures.any?
181
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
182
+ html.ol {
183
+ figures.each { |fig|
184
+ next unless caption = fig.at_css('figcaption, .figcaption')
185
+ n = fig
186
+ while n && n.respond_to?(:parent)
187
+ break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
188
+ n = n.parent
189
+ end
190
+ next unless cmpt_uri
191
+ html.li {
192
+ html.a(caption.content, :href => "#{cmpt_uri}##{fig['id']}")
193
+ }
194
+ }
195
+ }
196
+ }.doc
197
+ loi_doc = componentizer.generate_document(doc.root)
198
+ loi_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
199
+ bk.add_component(
200
+ "loi.html",
201
+ htmlize(loi_doc),
202
+ nil,
203
+ :linear => "no",
204
+ :guide => "List of Illustrations",
205
+ :guide_type => "loi"
206
+ )
207
+ end
208
+
209
+ # Cover
210
+ doc = Nokogiri::HTML::Builder.new(:encoding => 'UTF-8') { |html|
211
+ html.div(:id => "cover") {
212
+ html.img(:src => bk.cover.src, :alt => bk.property_for("title"))
213
+ }
214
+ }.doc
215
+ cover_doc = componentizer.generate_document(doc.root)
216
+ cover_doc.at_xpath(HEAD_XPATH).add_child(boilerplate_rel_links)
217
+ bk.components.unshift(
218
+ Peregrin::Component.new(
219
+ "cover.html",
220
+ htmlize(cover_doc),
221
+ nil,
222
+ :linear => "no",
223
+ :guide => "Cover",
224
+ :guide_type => "cover"
225
+ )
226
+ )
227
+ end
228
+
229
+ bk
230
+ end
231
+
232
+
233
+ protected
234
+
235
+ def index
236
+ @index_document ||= Nokogiri::HTML::Document.parse(
237
+ @book.components.first.contents
238
+ )
239
+ end
240
+
241
+
242
+ # Takes a book with multiple components and joins them together,
243
+ # by creating article elements from every body element and appending them
244
+ # to the body of the first component.
245
+ #
246
+ def stitch_components(book)
247
+ node = Nokogiri::XML::Node.new('article', index)
248
+ bdy = index.at_xpath(BODY_XPATH)
249
+ head = index.at_xpath(HEAD_XPATH)
250
+ bdy.children.each { |ch|
251
+ node.add_child(ch)
252
+ }
253
+ bdy.add_child(node)
254
+
255
+ book.components.shift
256
+ while cmpt = book.components.shift
257
+ str = cmpt.contents
258
+ doc = Nokogiri::HTML::Document.parse(str)
259
+ art = doc.at_xpath(BODY_XPATH)
260
+ art.name = 'article'
261
+ bdy.add_child(art)
262
+
263
+ # Import all other unique elements from the head, like link & meta tags.
264
+ if dhead = doc.at_xpath(HEAD_XPATH)
265
+ dhead.children.each { |foreign_child|
266
+ next if foreign_child.name.downcase == "title"
267
+ next if head.children.any? { |index_child|
268
+ index_child.to_s == foreign_child.to_s
269
+ }
270
+ head.add_child(foreign_child.dup)
271
+ }
272
+ end
273
+ end
274
+ book.components.clear
275
+ book.add_component(uri_for_xpath(BODY_XPATH), htmlize(index))
276
+ end
277
+
278
+
279
+ # Takes the properties out of the book and ensures that there are matching
280
+ # meta tags in the index document.
281
+ #
282
+ def consolidate_properties(book)
283
+ head = index.at_xpath('/html/head')
284
+ head.css('meta[name]').each { |meta| meta.remove }
285
+ book.properties.each { |property|
286
+ # FIXME: handle properties with attributes?
287
+ meta = Nokogiri::XML::Node.new('meta', index)
288
+ meta['name'] = property.key
289
+ meta['content'] = property.value
290
+ head.add_child(meta)
291
+ }
292
+ end
293
+
294
+
295
+ def outline_book(doc, cmpt_xpaths = [BODY_XPATH])
296
+ unless defined?(@outliner) && @outliner
297
+ @outliner = Peregrin::Outliner.new(doc)
298
+ @outliner.process(doc.at_css('body'))
299
+ end
300
+
301
+ i = 0
302
+ curse = lambda { |sxn|
303
+ chapter = Peregrin::Chapter.new(sxn.heading_text, i+=1)
304
+
305
+ # identify any relevant child sections
306
+ children = sxn.sections.collect { |ch|
307
+ curse.call(ch) unless ch.empty?
308
+ }.compact
309
+ chapter.children = children if children.any?
310
+
311
+ # Find the component parent
312
+ n = sxn.node || sxn.heading
313
+ while n && n.respond_to?(:parent)
314
+ break if cmpt_uri = uri_for_xpath(n.path, cmpt_xpaths)
315
+ n = n.parent
316
+ end
317
+
318
+ if cmpt_uri
319
+ # get URI for section
320
+ sid = sxn.heading['id'] if sxn.heading
321
+ cmpt_uri += "#"+sid if sid && !sid.empty?
322
+ chapter.src = cmpt_uri
323
+ end
324
+
325
+ chapter
326
+ }
327
+
328
+ result = curse.call(@outliner.result_root).children
329
+ while result && result.length == 1 && result.first.title.nil?
330
+ result = result.first.children
331
+ end
332
+ result
333
+ end
334
+
335
+
336
+ def uri_for_xpath(xpath, cmpt_xpaths = [BODY_XPATH])
337
+ return nil unless cmpt_xpaths.include?(xpath)
338
+ i = cmpt_xpaths.index(xpath)
339
+ (i == 0) ? "index.html" : "part#{"%03d" % i}.html"
340
+ end
341
+
342
+
343
+ def htmlize(doc)
344
+ "<!DOCTYPE html>\n"+doc.root.to_html
345
+ end
346
+
347
+
348
+ def to_png_data(resource)
349
+ return if resource.nil?
350
+ if File.extname(resource.src) == ".png"
351
+ return @book.read_resource(resource)
352
+ else
353
+ raise ConvertUtilityMissing unless `which convert`
354
+ out = nil
355
+ IO.popen("convert - png:-", "r+") { |io|
356
+ io.write(@book.read_resource(resource))
357
+ io.close_write
358
+ out = io.read
359
+ }
360
+ out
361
+ end
362
+ end
363
+
364
+
365
+ def self.extract_properties_from_index(book)
366
+ doc = Nokogiri::HTML::Document.parse(
367
+ book.components.first.contents
368
+ )
369
+ doc.css('html head meta[name]').each { |meta|
370
+ name = meta['name']
371
+ content = meta['content']
372
+ book.add_property(name, content)
373
+ }
374
+ end
375
+
376
+
377
+ class ValidationError < ::RuntimeError
378
+
379
+ def initialize(path = nil)
380
+ @path = path
381
+ end
382
+
383
+ end
384
+
385
+ class FileNotFound < ValidationError; end
386
+ class WrongExtension < ValidationError; end
387
+ class NotAZipArchive < ValidationError; end
388
+ class MissingIndexHTML < ValidationError; end
389
+ class MissingCoverPNG < ValidationError; end
390
+ class IndexHTMLRootHasId < ValidationError; end
391
+
392
+ class ConvertUtilityMissing < RuntimeError; end
393
+
394
+ end
@@ -0,0 +1,87 @@
1
+ class Peregrin::Book
2
+
3
+ # Unique identifier for this book
4
+ attr_accessor :identifier
5
+
6
+ # An array of Components
7
+ attr_accessor :components
8
+
9
+ # A tree of Chapters. Top-level chapters in this array, each with
10
+ # children arrays.
11
+ attr_accessor :chapters
12
+
13
+ # An array of Properties.
14
+ attr_accessor :properties
15
+
16
+ # An array of Resources.
17
+ attr_accessor :resources
18
+
19
+ # A Resource that is used for the book cover.
20
+ attr_accessor :cover
21
+
22
+ # A proc that copies a resource to the given destination.
23
+ attr_writer :read_resource_proc
24
+
25
+
26
+ def initialize
27
+ @components = []
28
+ @chapters = []
29
+ @properties = []
30
+ @resources = []
31
+ end
32
+
33
+
34
+ def all_files
35
+ @components + @resources
36
+ end
37
+
38
+
39
+ def add_component(*args)
40
+ @components.push(Peregrin::Component.new(*args)).last
41
+ end
42
+
43
+
44
+ def add_resource(*args)
45
+ @resources.push(Peregrin::Resource.new(*args)).last
46
+ end
47
+
48
+
49
+ def add_chapter(*args)
50
+ @chapters.push(Peregrin::Chapter.new(*args)).last
51
+ end
52
+
53
+
54
+ def add_property(*args)
55
+ @properties.push(Peregrin::Property.new(*args)).last
56
+ end
57
+
58
+
59
+ def property_for(key)
60
+ key = key.to_s
61
+ prop = @properties.detect { |p| p.key == key }
62
+ prop ? prop.value : nil
63
+ end
64
+
65
+
66
+ def read_resource(resource_path)
67
+ @read_resource_proc.call(resource_path) if @read_resource_proc
68
+ end
69
+
70
+
71
+ def copy_resource_to(resource_path, dest_path)
72
+ File.open(dest_path, 'w') { |f|
73
+ f << read_resource(resource_path)
74
+ }
75
+ end
76
+
77
+
78
+ def deep_clone
79
+ @read_resource_proc ||= nil
80
+ tmp = @read_resource_proc
81
+ @read_resource_proc = nil
82
+ clone = Marshal.load(Marshal.dump(self))
83
+ clone.read_resource_proc = @read_resource_proc = tmp
84
+ clone
85
+ end
86
+
87
+ end
@@ -0,0 +1,31 @@
1
+ # Books have nested sections with headings - each of these is a chapter.
2
+ #
3
+ # TODO: flag whether a chapter is linkable?
4
+ #
5
+ class Peregrin::Chapter
6
+
7
+ attr_accessor :title, :src, :children, :position
8
+
9
+ def initialize(title, pos, src = nil)
10
+ @title = title.gsub(/[\r\n]/,' ') if title
11
+ @src = src
12
+ @position = pos.to_i
13
+ @children = []
14
+ end
15
+
16
+
17
+ def add_child(child_title, child_pos, child_src = nil)
18
+ chp = Peregrin::Chapter.new(child_title, child_pos, child_src)
19
+ children.push(chp)
20
+ chp
21
+ end
22
+
23
+
24
+ # A chapter is an empty leaf if you can't link to it or any of its children.
25
+ # Typically you wouldn't show an empty-leaf chapter in a Table of Contents.
26
+ #
27
+ def empty_leaf?
28
+ src.nil? && children.all? { |ch| ch.empty_leaf? }
29
+ end
30
+
31
+ end
@@ -0,0 +1,12 @@
1
+ # A component is a section of the book's linear text.
2
+ #
3
+ class Peregrin::Component < Peregrin::Resource
4
+
5
+ attr_accessor :contents
6
+
7
+ def initialize(src, contents = nil, media_type = nil, attributes = {})
8
+ @contents = contents
9
+ super(src, media_type, attributes)
10
+ end
11
+
12
+ end
@@ -0,0 +1,118 @@
1
+ class Peregrin::Componentizer
2
+
3
+ attr_reader :component_xpaths
4
+
5
+
6
+ def initialize(doc)
7
+ @document = doc
8
+ @component_xpaths = []
9
+ end
10
+
11
+
12
+ # Build a list of xpaths for nodes that can be turned into standalone
13
+ # components.
14
+ #
15
+ def process(from)
16
+ @component_xpaths = []
17
+ walk(from)
18
+ @component_xpaths.reject! { |xpath| emptied?(xpath) }
19
+ end
20
+
21
+
22
+ def generate_component(xpath)
23
+ raise "Not a component: #{xpath}" unless @component_xpaths.include?(xpath)
24
+ node = @document.at_xpath(xpath)
25
+ generate_document(node)
26
+ end
27
+
28
+
29
+ # Creates a new document with the same root and head nodes, but with
30
+ # a body that just contains the nodes at the given xpath.
31
+ #
32
+ def generate_document(node)
33
+ # Clean up the "shell" document.
34
+ @shell_document ||= @document.dup
35
+ bdy = @shell_document.at_xpath('/html/body')
36
+ bdy.children.remove
37
+
38
+ # Find the node we're going to copy into the shell document.
39
+ # Create a deep clone of it. Remove any children of it that are
40
+ # componentizable in their own right.
41
+ ndup = node.dup
42
+ node.children.collect { |ch|
43
+ next unless component_xpaths.include?(ch.path)
44
+ dpath = ch.path.sub(/^#{Regexp.escape(node.path)}/, ndup.path)
45
+ ndup.children.detect { |dch| dch.path == dpath }
46
+ }.compact.each { |ch|
47
+ ch.unlink
48
+ }
49
+
50
+ # Append the node to the body of the shell (or replace the body, if
51
+ # the node is a body itself).
52
+ if node.name.downcase == "body"
53
+ bdy.replace(ndup)
54
+ else
55
+ bdy.add_child(ndup)
56
+ end
57
+
58
+ @shell_document.dup
59
+ end
60
+
61
+
62
+ # Writes the componentizable node at the given xpath to the given
63
+ # filesystem path.
64
+ #
65
+ # If you provide a block, you get the new document object,
66
+ # and you are expected to return the string containing its HTML form --
67
+ # in this way you can tweak the HTML output. Default is simply: doc.to_html
68
+ #
69
+ def write_component(xpath, path, &blk)
70
+ new_doc = generate_component(xpath)
71
+ out = block_given? ? blk.call(new_doc) : new_doc.to_html
72
+ File.open(path, 'w') { |f| f.write(out) }
73
+ out
74
+ end
75
+
76
+
77
+ protected
78
+
79
+ # The recursive method for walking the tree - checks if the current node
80
+ # is a component, then checks each child of the current node.
81
+ #
82
+ def walk(node)
83
+ return unless componentizable?(node)
84
+ @component_xpaths.push(node.path)
85
+ node.children.each { |c| walk(c) }
86
+ end
87
+
88
+
89
+ # True if the node meets the criteria for being componentizable:
90
+ # 1) Is a body or article element (or a div.article)?
91
+ # 2) Are all subsequent siblings also componentizable?
92
+ #
93
+ def componentizable?(node)
94
+ begin
95
+ return false unless (
96
+ %w[body article].include?(node.name.downcase) ||
97
+ (
98
+ node.name.downcase == "div" &&
99
+ node['class'] &&
100
+ node['class'].match(/\barticle\b/)
101
+ )
102
+ )
103
+ end while node = node.next
104
+ true
105
+ end
106
+
107
+
108
+ # True if all children are either componentizable or blank text nodes.
109
+ #
110
+ def emptied?(xpath)
111
+ node = @document.at_xpath(xpath)
112
+ node.children.all? { |ch|
113
+ @component_xpaths.include?(ch.path) ||
114
+ (ch.text? && ch.content.strip.empty?)
115
+ }
116
+ end
117
+
118
+ end