tartancloth 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tartancloth.rb CHANGED
@@ -1,335 +1,417 @@
1
- require 'bluecloth'
2
- require 'nokogiri'
3
-
4
-
5
-
6
- class TartanCloth
7
-
8
- VERSION = "0.0.1"
9
-
10
- attr_accessor :title
11
-
12
- def initialize( markdown_file, title = nil )
13
- @markdown_file = markdown_file
14
- @title = title
15
- end
16
-
17
- def to_html_file( html_file )
18
-
19
- File.open( html_file, 'w') do |f|
20
- f << to_html()
21
- end
22
- end
23
-
24
- ###
25
- # Convert a markdown source file to HTML. If a header element with text TOC
26
- # exists within the markdown document, a Table of Contents will be generated
27
- # and inserted at that location.
28
- #
29
- # The TOC will only contain header (h1-h6) elements from the location of the
30
- # TOC header to the end of the document
31
- def to_html
32
- bc = BlueCloth::new( File::read( @markdown_file ), header_labels: true )
33
- content = bc.to_html
34
-
35
- content = build_toc( content )
36
-
37
- html = ""
38
-
39
- # Add a well formed HTML5 header.
40
- html << html_header(title)
41
-
42
- # Add the body content.
43
- html << content
44
-
45
- # Add the document closing tags.
46
- html << html_footer()
47
- end
48
-
49
- ###
50
- # Build a TOC based on headers located within HTML content.
51
- # If a header element with text TOC exists within the markdown document, a
52
- # Table of Contents will be generated and inserted at that location.
53
- #
54
- # The TOC will only contain header (h1-h6) elements from the location of the
55
- # TOC header to the end of the document
56
- def build_toc( html_content )
57
- # Generate Nokogiri elements from HTML
58
- doc = Nokogiri::HTML::DocumentFragment.parse( html_content )
59
-
60
- # Find the TOC header
61
- toc = find_toc_header(doc)
62
-
63
- # Just return what was passed to us if there's no TOC.
64
- return html_content if toc.nil?
65
-
66
- # Get all headers in the document, starting from the TOC header.
67
- headers = get_headers(doc, toc)
68
-
69
- # Build the link info for the TOC.
70
- toc_links = []
71
- headers.each do |element|
72
- toc_links << link_hash(element)
73
- end
74
-
75
- # Convert link info to markdown.
76
- toc_md = toc_to_markdown(toc_links)
77
-
78
- # Convert the TOC markdown to HTML
79
- bc = BlueCloth::new( toc_md, pseudoprotocols: true )
80
- toc_content = bc.to_html
81
-
82
- # Convert the TOC HTML to Nokogiri elements.
83
- toc_html = Nokogiri::HTML::DocumentFragment.parse(toc_content)
84
-
85
- # Add toc class to the <ul> element
86
- toc_html.css('ul').add_class('toc')
87
-
88
- # Insert the TOC content before the toc element
89
- toc.before(toc_html.children)
90
-
91
- # Remove the TOC header placeholder element.
92
- toc.remove
93
-
94
- # Return the HTML
95
- doc.to_html
96
- end
97
-
98
- ###
99
- # Convert an array of link hashes to markdown
100
- #
101
- # toc_links - hash of links
102
- # returns - markdown content
103
- def toc_to_markdown(toc_links)
104
- markdown = "## Table of Contents\n\n"
105
- toc_links.each do |link_data|
106
- text = link_data[:text]
107
- link = link_data[:link]
108
- klass = link_data[:klass]
109
- markdown << "+ [[#{text}](##{link})](class:#{klass})\n"
110
- end
111
- markdown << "\n"
112
- end
113
-
114
- ###
115
- # return the TOC header element or nil
116
- #
117
- # doc - Nokogiri DocumentFragment
118
- def find_toc_header(doc)
119
- return nil unless doc
120
-
121
- doc.children.each do |element|
122
- return element if is_toc_header(element)
123
- end
124
-
125
- return nil
126
- end
127
-
128
- ###
129
- # returns true if the element is a header (h1-h6) element
130
- def is_header_element(element)
131
- %w(h1 h2 h3 h4 h5 h6).include? element.name
132
- end
133
-
134
- ###
135
- # returns true when a header (h1-h6) element contains the text: TOC
136
- def is_toc_header(element)
137
- return (is_header_element(element) && element.text == 'TOC')
138
- end
139
-
140
- ###
141
- # Create an array of all header (h1-h6) elements in an HTML document
142
- # starting from a specific element
143
- #
144
- # starting_element - element to start parsing from
145
- # returns - array of Nokogiri elements
146
- def get_headers(doc, starting_element)
147
- headers = []
148
- capture = false
149
-
150
- doc.children.each do |element|
151
- unless capture
152
- capture = true if element == starting_element
153
- next
154
- end # unless
155
-
156
- headers << element if is_header_element(element)
157
- end
158
-
159
- headers
160
- end
161
-
162
- ###
163
- # Build a link hash for an element containing the text, link,
164
- # and a children array.
165
- #
166
- # element - Nokogiri element
167
- def link_hash(element)
168
- # The previous element should be a simple anchor.
169
- # Get the actual link value from the anchor.
170
- a = element.previous_element
171
- anchor_link = a.attributes['name'].value if a.name == 'a'
172
-
173
- # Store the header text (link text) and the anchor link and a class for styling.
174
- { text: element.text, link: anchor_link, klass: "#{element.name}toc" }
175
- end
176
-
177
- # Create an HTML5 header
178
- #
179
- # returns - HTML header and body open tags
180
- def html_header(title)
181
- styles = css()
182
- header = <<HTML_HEADER
183
- <!DOCTYPE html>
184
- <html>
185
- <head><title>#{title}</title></head>
186
-
187
- #{styles}
188
-
189
- <body>
190
- <div class="content">
191
- <div class="rendered-content">
192
-
193
- HTML_HEADER
194
- end
195
-
196
- ###
197
- # Create some stylish CSS
198
- #
199
- # returns - html style element
200
- def css()
201
- styles = <<CSS
202
- <style media="screen" type="text/css">
203
- <!--
204
- body {
205
- font-family: Arial, sans-serif;
206
- }
207
-
208
- .content {
209
- margin: 0 auto;
210
- min-height: 100%;
211
- padding: 0 0 100px;
212
- width: 980px;
213
- border: 1px solid #ccc;
214
- border-radius: 5px;
215
- }
216
-
217
- .rendered-content {
218
- padding: 10px;
219
- }
220
-
221
- /* Fancy HR styles based on http://css-tricks.com/examples/hrs/ */
222
- hr {
223
- border: 0;
224
- height: 1px;
225
- background-image: -webkit-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
226
- background-image: -moz-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
227
- background-image: -ms-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
228
- background-image: -o-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
229
- }
230
-
231
- h1 {
232
- font-size: 24px;
233
- font-weight: normal;
234
- line-height: 1.25;
235
- }
236
-
237
- h2 {
238
- font-size: 20px;
239
- font-weight: normal;
240
- line-height: 1.5;
241
- }
242
-
243
- h3 {
244
- font-size: 16px;
245
- font-weight: bold;
246
- line-height: 1.5625;
247
- }
248
-
249
- h4 {
250
- font-size: 14px;
251
- font-weight: bold;
252
- line-height: 1.5;
253
- }
254
-
255
- h5 {
256
- font-size: 12px;
257
- font-weight: bold;
258
- line-height: 1.66;
259
- text-transform: uppercase;
260
- }
261
-
262
- h6 {
263
- font-size: 12px;
264
- font-style: italic;
265
- font-weight: bold;
266
- line-height: 1.66;
267
- text-transform: uppercase;
268
- }
269
-
270
- pre {
271
- margin-left: 2em;
272
- display: block;
273
- background: #f5f5f5;
274
- font-family: monospace;
275
- border: 1px solid #ccc;
276
- border-radius: 2px;
277
- padding: 1px 3px;
278
- }
279
-
280
- code {
281
- background: #f5f5f5;
282
- font-family: monospace;
283
- border: 1px solid #ccc;
284
- border-radius: 2px;
285
- padding: 1px 3px;
286
- }
287
-
288
- pre, code {
289
- font-size: 12px;
290
- line-height: 1.4;
291
- }
292
-
293
- pre code {
294
- border: 0;
295
- padding: 0;
296
- }
297
-
298
- ul.toc li {
299
- list-style: none;
300
- font-size: 14px;
301
- }
302
-
303
- ul.toc li span.h3toc {
304
- margin-left: 20px;
305
- }
306
-
307
- ul.toc li span.h4toc {
308
- margin-left: 40px;
309
- }
310
-
311
- ul.toc li span.h5toc {
312
- margin-left: 60px;
313
- }
314
-
315
- ul.toc li span.h6toc {
316
- margin-left: 80px;
317
- }
318
- -->
319
- </style>
320
- CSS
321
- end
322
-
323
- ###
324
- # returns - HTML closing tags
325
- def html_footer()
326
- footer = <<HTML_FOOTER
327
-
328
- </div> <!-- .content -->
329
- </div> <!-- .rendered-content -->
330
- </body>
331
- </html>
332
- HTML_FOOTER
333
- end
334
-
335
- end
1
+ require 'bluecloth'
2
+ require 'nokogiri'
3
+
4
+
5
+
6
+ class TartanCloth
7
+
8
+ VERSION = "0.0.2"
9
+
10
+ attr_accessor :title
11
+
12
+ def initialize( markdown_file, title = nil )
13
+ @markdown_file = markdown_file
14
+ @title = title
15
+ end
16
+
17
+ ###
18
+ # Convert a markdown source file to HTML. If a header element with text TOC
19
+ # exists within the markdown document, a Table of Contents will be generated
20
+ # and inserted at that location.
21
+ #
22
+ # The TOC will only contain header (h1-h6) elements from the location of the
23
+ # TOC header to the end of the document
24
+ def to_html
25
+ html = ""
26
+
27
+ # Add a well formed HTML5 header.
28
+ html << html_header(title)
29
+
30
+ # Add the body content.
31
+ html << body_html()
32
+
33
+ # Add the document closing tags.
34
+ html << html_footer()
35
+ end
36
+
37
+ ###
38
+ # The same as to_html() but writes the HTML to a file.
39
+ #
40
+ # html_file - path to file
41
+ def to_html_file( html_file )
42
+
43
+ File.open( html_file, 'w') do |f|
44
+ f << to_html()
45
+ end
46
+ end
47
+
48
+ ###
49
+ # Build TOC and return body content (including TOC).
50
+ # Returned HTML does NOT include doc headers, footer, or stylesheet.
51
+ #
52
+ # returns HTML that forms the body of the document
53
+ def body_html
54
+ bc = BlueCloth::new( File::read( @markdown_file ), header_labels: true )
55
+ body = bc.to_html
56
+
57
+ body = build_toc( body )
58
+ end
59
+
60
+ private
61
+
62
+ ###
63
+ # Build a TOC based on headers located within HTML content.
64
+ # If a header element with text TOC exists within the markdown document, a
65
+ # Table of Contents will be generated and inserted at that location.
66
+ #
67
+ # The TOC will only contain header (h1-h6) elements from the location of the
68
+ # TOC header to the end of the document
69
+ def build_toc( html_content )
70
+ # Generate Nokogiri elements from HTML
71
+ doc = Nokogiri::HTML::DocumentFragment.parse( html_content )
72
+
73
+ # Make sure all header anchors are unique.
74
+ make_header_anchors_unique(doc)
75
+
76
+ # Find the TOC header
77
+ toc = find_toc_header(doc)
78
+
79
+ # Just return what was passed to us if there's no TOC.
80
+ return html_content if toc.nil?
81
+
82
+ # Get all headers in the document, starting from the TOC header.
83
+ headers = get_headers(doc, toc)
84
+
85
+ # Build the link info for the TOC.
86
+ toc_links = []
87
+ headers.each do |element|
88
+ toc_links << link_hash(element)
89
+ end
90
+
91
+ # Convert link info to markdown.
92
+ toc_md = toc_to_markdown(toc_links)
93
+
94
+ # Convert the TOC markdown to HTML
95
+ bc = BlueCloth::new( toc_md, pseudoprotocols: true )
96
+ toc_content = bc.to_html
97
+
98
+ # Convert the TOC HTML to Nokogiri elements.
99
+ toc_html = Nokogiri::HTML::DocumentFragment.parse(toc_content)
100
+
101
+ # Add toc class to the <ul> element
102
+ toc_html.css('ul').add_class('toc')
103
+
104
+ # Insert the TOC content before the toc element
105
+ toc.before(toc_html.children)
106
+
107
+ # Remove the TOC header placeholder element.
108
+ toc.remove
109
+
110
+ # Return the HTML
111
+ doc.to_html
112
+ end
113
+
114
+ ###
115
+ # Convert an array of link hashes to markdown
116
+ #
117
+ # toc_links - hash of links
118
+ # returns - markdown content
119
+ def toc_to_markdown(toc_links)
120
+ markdown = "## Table of Contents\n\n"
121
+ toc_links.each do |link_data|
122
+ text = link_data[:text]
123
+ link = link_data[:link]
124
+ klass = link_data[:klass]
125
+ markdown << "+ [[#{text}](##{link})](class:#{klass})\n"
126
+ end
127
+ markdown << "\n"
128
+ end
129
+
130
+ ###
131
+ # return the TOC header element or nil
132
+ #
133
+ # doc - Nokogiri DocumentFragment
134
+ def find_toc_header(doc)
135
+ return nil unless doc
136
+
137
+ doc.children.each do |element|
138
+ return element if is_toc_header(element)
139
+ end
140
+
141
+ return nil
142
+ end
143
+
144
+ ###
145
+ # returns true if the element is a header (h1-h6) element
146
+ def is_header_element(element)
147
+ %w(h1 h2 h3 h4 h5 h6).include? element.name
148
+ end
149
+
150
+ ###
151
+ # returns true when a header (h1-h6) element contains the text: TOC
152
+ def is_toc_header(element)
153
+ return (is_header_element(element) && element.text == 'TOC')
154
+ end
155
+
156
+ ###
157
+ # Create an array of all header (h1-h6) elements in an HTML document
158
+ # starting from a specific element
159
+ #
160
+ # starting_element - element to start parsing from, if starting element is
161
+ # nil, all headers will be collected.
162
+ # returns - array of Nokogiri elements
163
+ def get_headers(doc, starting_element = nil)
164
+ headers = []
165
+ capture = (starting_element.nil? ? true : false)
166
+
167
+ doc.children.each do |element|
168
+ unless capture
169
+ capture = true if element == starting_element
170
+ next
171
+ end # unless
172
+
173
+ headers << element if is_header_element(element)
174
+ end
175
+
176
+ headers
177
+ end
178
+
179
+ ###
180
+ # Build a link hash for an element containing the text, link,
181
+ # and a children array.
182
+ #
183
+ # element - Nokogiri element
184
+ def link_hash(element)
185
+ anchor = get_anchor_for_header(element)
186
+ anchor_link = get_link(anchor)
187
+
188
+ # Store the header text (link text) and the anchor link and a class for styling.
189
+ { text: element.text, link: anchor_link, klass: "#{element.name}toc" }
190
+ end
191
+
192
+ ###
193
+ # Return the previous element which should be an anchor
194
+ def get_anchor_for_header(element)
195
+ # The previous element should be a simple anchor.
196
+ # Get the actual link value from the anchor.
197
+ anchor = element.previous_element
198
+ anchor = nil unless anchor.name == 'a'
199
+ anchor
200
+ end
201
+
202
+ ###
203
+ # Return the link from an element, if it's an anchor
204
+ # returns "" otherwise
205
+ #
206
+ # anchor - Nokogiri::Node
207
+ def get_link(anchor)
208
+ link = ""
209
+ link = anchor.attributes['name'].value if anchor.name == 'a'
210
+ link
211
+ end
212
+
213
+ ###
214
+ # Sets an anchor's link to a value.
215
+ # Does nothing if element isn't an anchor.
216
+ #
217
+ # anchor - Nokogiri::Node
218
+ # link - link text to set on anchor
219
+ def set_link(anchor, link)
220
+ anchor.attributes['name'].value = link if anchor.name == 'a'
221
+ end
222
+
223
+ ###
224
+ # Identical headers will have identical anchors. Modify them so each anchor
225
+ # is unique.
226
+ def make_header_anchors_unique(doc)
227
+ headers = get_headers(doc)
228
+
229
+ # Get anchors for each header.
230
+ anchors = []
231
+ headers.each do |h|
232
+ anchors << get_anchor_for_header(h)
233
+ end
234
+
235
+ anchor_collection = {}
236
+ anchors.each do |a|
237
+ # Get the link
238
+ link = get_link(a)
239
+
240
+ # Get the current link count, will be nil if it's the first time.
241
+ link_count = anchor_collection[link]
242
+
243
+ if link_count.nil?
244
+ # First time we've seen this link
245
+ link_count = 0
246
+
247
+ # Store it in the collection
248
+ anchor_collection[link] = link_count
249
+ else
250
+ # Link already exists, modify it (add .#)
251
+ set_link(a, "#{link}.#{link_count}")
252
+
253
+ # Update the count for the next time we find this link
254
+ anchor_collection[link] = link_count + 1
255
+ end # if
256
+ end
257
+ end
258
+
259
+ # Create an HTML5 header
260
+ #
261
+ # returns - HTML header and body open tags
262
+ def html_header(title)
263
+ styles = css()
264
+ header = <<HTML_HEADER
265
+ <!DOCTYPE html>
266
+ <html>
267
+ <head><title>#{title}</title></head>
268
+
269
+ #{styles}
270
+
271
+ <body>
272
+ <div class="content">
273
+ <div class="rendered-content">
274
+
275
+ HTML_HEADER
276
+ end
277
+
278
+ ###
279
+ # Create some stylish CSS
280
+ #
281
+ # returns - html style element
282
+ def css()
283
+ styles = <<CSS
284
+ <style media="screen" type="text/css">
285
+ <!--
286
+ body {
287
+ font-family: Arial, sans-serif;
288
+ }
289
+
290
+ .content {
291
+ margin: 0 auto;
292
+ min-height: 100%;
293
+ padding: 0 0 100px;
294
+ width: 980px;
295
+ border: 1px solid #ccc;
296
+ border-radius: 5px;
297
+ }
298
+
299
+ .rendered-content {
300
+ padding: 10px;
301
+ }
302
+
303
+ /* Fancy HR styles based on http://css-tricks.com/examples/hrs/ */
304
+ hr {
305
+ border: 0;
306
+ height: 1px;
307
+ background-image: -webkit-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
308
+ background-image: -moz-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
309
+ background-image: -ms-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
310
+ background-image: -o-linear-gradient(left, rgba(200,200,200,1), rgba(200,200,200,0.5), rgba(200,200,200,0));
311
+ }
312
+
313
+ h1 {
314
+ font-size: 24px;
315
+ font-weight: normal;
316
+ line-height: 1.25;
317
+ }
318
+
319
+ h2 {
320
+ font-size: 20px;
321
+ font-weight: normal;
322
+ line-height: 1.5;
323
+ }
324
+
325
+ h3 {
326
+ font-size: 16px;
327
+ font-weight: bold;
328
+ line-height: 1.5625;
329
+ }
330
+
331
+ h4 {
332
+ font-size: 14px;
333
+ font-weight: bold;
334
+ line-height: 1.5;
335
+ }
336
+
337
+ h5 {
338
+ font-size: 12px;
339
+ font-weight: bold;
340
+ line-height: 1.66;
341
+ text-transform: uppercase;
342
+ }
343
+
344
+ h6 {
345
+ font-size: 12px;
346
+ font-style: italic;
347
+ font-weight: bold;
348
+ line-height: 1.66;
349
+ text-transform: uppercase;
350
+ }
351
+
352
+ pre {
353
+ margin-left: 2em;
354
+ display: block;
355
+ background: #f5f5f5;
356
+ font-family: monospace;
357
+ border: 1px solid #ccc;
358
+ border-radius: 2px;
359
+ padding: 1px 3px;
360
+ }
361
+
362
+ code {
363
+ background: #f5f5f5;
364
+ font-family: monospace;
365
+ border: 1px solid #ccc;
366
+ border-radius: 2px;
367
+ padding: 1px 3px;
368
+ }
369
+
370
+ pre, code {
371
+ font-size: 12px;
372
+ line-height: 1.4;
373
+ }
374
+
375
+ pre code {
376
+ border: 0;
377
+ padding: 0;
378
+ }
379
+
380
+ ul.toc li {
381
+ list-style: none;
382
+ font-size: 14px;
383
+ }
384
+
385
+ ul.toc li span.h3toc {
386
+ margin-left: 20px;
387
+ }
388
+
389
+ ul.toc li span.h4toc {
390
+ margin-left: 40px;
391
+ }
392
+
393
+ ul.toc li span.h5toc {
394
+ margin-left: 60px;
395
+ }
396
+
397
+ ul.toc li span.h6toc {
398
+ margin-left: 80px;
399
+ }
400
+ -->
401
+ </style>
402
+ CSS
403
+ end
404
+
405
+ ###
406
+ # returns - HTML closing tags
407
+ def html_footer()
408
+ footer = <<HTML_FOOTER
409
+
410
+ </div> <!-- .content -->
411
+ </div> <!-- .rendered-content -->
412
+ </body>
413
+ </html>
414
+ HTML_FOOTER
415
+ end
416
+
417
+ end