polytexnic 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/.pull_requests/1371777257 +0 -0
  4. data/.pull_requests/1371927975 +0 -0
  5. data/.pull_requests/1372804345 +0 -0
  6. data/.pull_requests/1374784075 +0 -0
  7. data/.pull_requests/1375304853 +0 -0
  8. data/.pull_requests/1375408308 +0 -0
  9. data/.pull_requests/1375409462 +0 -0
  10. data/.pull_requests/1375410668 +0 -0
  11. data/.pull_requests/1375472132 +0 -0
  12. data/.pull_requests/1375485496 +0 -0
  13. data/.pull_requests/1375487548 +0 -0
  14. data/.pull_requests/1375492835 +0 -0
  15. data/.pull_requests/1375497765 +0 -0
  16. data/.pull_requests/1375559547 +0 -0
  17. data/.pull_requests/1375589063 +0 -0
  18. data/.pull_requests/1375841786 +0 -0
  19. data/.pull_requests/1376352634 +0 -0
  20. data/.pull_requests/1376353299 +0 -0
  21. data/.pull_requests/1376449284 +0 -0
  22. data/.pull_requests/1376452696 +0 -0
  23. data/.pull_requests/1376454166 +0 -0
  24. data/.pull_requests/1376532291 +0 -0
  25. data/.pull_requests/1376625487 +0 -0
  26. data/.pull_requests/1376690108 +0 -0
  27. data/.pull_requests/1376699046 +0 -0
  28. data/.pull_requests/1376707642 +0 -0
  29. data/.pull_requests/1377230284 +0 -0
  30. data/.pull_requests/1379118478 +0 -0
  31. data/.pull_requests/1379123150 +0 -0
  32. data/.pull_requests/1380221847 +0 -0
  33. data/.pull_requests/1380589654 +0 -0
  34. data/.pull_requests/1380673142 +0 -0
  35. data/.pull_requests/1380850800 +0 -0
  36. data/.pull_requests/1381001264 +0 -0
  37. data/.pull_requests/1381005204 +0 -0
  38. data/.pull_requests/1381103022 +0 -0
  39. data/.pull_requests/1381252832 +0 -0
  40. data/.pull_requests/1381276624 +0 -0
  41. data/.pull_requests/1381344234 +0 -0
  42. data/.pull_requests/1381385297 +0 -0
  43. data/.pull_requests/1381427498 +0 -0
  44. data/.pull_requests/1381429761 +0 -0
  45. data/.pull_requests/1381873684 +0 -0
  46. data/.pull_requests/1382045490 +0 -0
  47. data/.pull_requests/1382056384 +0 -0
  48. data/.pull_requests/1382405223 +0 -0
  49. data/.pull_requests/1382478400 +0 -0
  50. data/.pull_requests/1382479780 +0 -0
  51. data/.pull_requests/1382485483 +0 -0
  52. data/.pull_requests/1382569911 +0 -0
  53. data/.pull_requests/1382646199 +0 -0
  54. data/.pull_requests/1382649778 +0 -0
  55. data/.pull_requests/1382660987 +0 -0
  56. data/.pull_requests/1382743927 +0 -0
  57. data/.pull_requests/1382840347 +0 -0
  58. data/.pull_requests/1383077676 +0 -0
  59. data/.pull_requests/1383086948 +0 -0
  60. data/.pull_requests/1383161978 +0 -0
  61. data/.pull_requests/1383263695 +0 -0
  62. data/.pull_requests/1383274008 +0 -0
  63. data/.pull_requests/1383327328 +0 -0
  64. data/.rspec +2 -0
  65. data/.ruby-gemset +1 -0
  66. data/.ruby-version +1 -0
  67. data/Gemfile +15 -0
  68. data/Guardfile +15 -0
  69. data/LICENSE.txt +22 -0
  70. data/README.md +21 -0
  71. data/Rakefile +2 -0
  72. data/lib/polytexnic/literal.rb +299 -0
  73. data/lib/polytexnic/postprocessor.rb +28 -0
  74. data/lib/polytexnic/postprocessors/html.rb +1139 -0
  75. data/lib/polytexnic/postprocessors/latex.rb +18 -0
  76. data/lib/polytexnic/postprocessors/polytex.rb +44 -0
  77. data/lib/polytexnic/preprocessor.rb +23 -0
  78. data/lib/polytexnic/preprocessors/html.rb +349 -0
  79. data/lib/polytexnic/preprocessors/latex.rb +43 -0
  80. data/lib/polytexnic/preprocessors/polytex.rb +127 -0
  81. data/lib/polytexnic/utils.rb +176 -0
  82. data/lib/polytexnic/version.rb +3 -0
  83. data/lib/polytexnic.rb +92 -0
  84. data/notes/pandoc.md +41 -0
  85. data/polytexnic.gemspec +28 -0
  86. data/polytexnic_commands.sty +5 -0
  87. data/precompiled_binaries/tralics +0 -0
  88. data/spec/fixtures/code_listing.tex +14 -0
  89. data/spec/fixtures/figures.tex +8 -0
  90. data/spec/fixtures/inline_math.html +4 -0
  91. data/spec/fixtures/inline_math.tex +3 -0
  92. data/spec/fixtures/math_environments.html +50 -0
  93. data/spec/fixtures/math_environments.tex +56 -0
  94. data/spec/fixtures/section_xrefs.tex +9 -0
  95. data/spec/fixtures/sidebar.tex +10 -0
  96. data/spec/fixtures/tables.tex +8 -0
  97. data/spec/fixtures/verbatim_environments.html +11 -0
  98. data/spec/fixtures/verbatim_environments.tex +13 -0
  99. data/spec/integration_spec.rb +34 -0
  100. data/spec/markdown_to_polytex_spec.rb +192 -0
  101. data/spec/resemble_matcher_spec.rb +69 -0
  102. data/spec/spec_helper.rb +38 -0
  103. data/spec/support/resemble_matcher.rb +100 -0
  104. data/spec/to_html/asides_spec.rb +42 -0
  105. data/spec/to_html/chapters_and_sections_spec.rb +268 -0
  106. data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
  107. data/spec/to_html/codelistings_spec.rb +70 -0
  108. data/spec/to_html/core_spec.rb +227 -0
  109. data/spec/to_html/eqref_spec.rb +32 -0
  110. data/spec/to_html/footnote_spec.rb +164 -0
  111. data/spec/to_html/graphics_and_figures_spec.rb +358 -0
  112. data/spec/to_html/lists_spec.rb +103 -0
  113. data/spec/to_html/literal_environments/code_spec.rb +141 -0
  114. data/spec/to_html/literal_environments/math_spec.rb +255 -0
  115. data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
  116. data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
  117. data/spec/to_html/quotations_and_verse_spec.rb +86 -0
  118. data/spec/to_html/table_of_contents_spec.rb +93 -0
  119. data/spec/to_html/table_spec.rb +269 -0
  120. data/spec/to_html/text_formatting_spec.rb +50 -0
  121. data/spec/to_latex_spec.rb +197 -0
  122. data/tasks/bin/ruby_tests +41 -0
  123. data/tasks/run_tests_with_both_rubies.rake +5 -0
  124. data/tmp/.gitkeep +0 -0
  125. metadata +286 -0
@@ -0,0 +1,1139 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Postprocessor
4
+ module Html
5
+
6
+ # Converts Tralics XML output to HTML.
7
+ def xml_to_html(xml)
8
+ doc = Nokogiri::XML(xml)
9
+ emphasis(doc)
10
+ boldface(doc)
11
+ small_caps(doc)
12
+ typewriter(doc)
13
+ skips(doc)
14
+ verbatim(doc)
15
+ code(doc)
16
+ metacode(doc)
17
+ quote(doc)
18
+ verse(doc)
19
+ itemize(doc)
20
+ enumerate(doc)
21
+ item(doc)
22
+ remove_errors(doc)
23
+ set_ids(doc)
24
+ chapters_and_sections(doc)
25
+ subsection(doc)
26
+ subsubsection(doc)
27
+ headings(doc)
28
+ sout(doc)
29
+ kode(doc)
30
+ filepath(doc)
31
+ codelistings(doc)
32
+ backslash_break(doc)
33
+ spaces(doc)
34
+ asides(doc)
35
+ center(doc)
36
+ title(doc)
37
+ doc = smart_single_quotes(doc)
38
+ tex_logos(doc)
39
+ restore_literal(doc)
40
+ restore_inline_verbatim(doc)
41
+ make_cross_references(doc)
42
+ hrefs(doc)
43
+ graphics_and_figures(doc)
44
+ images_and_imageboxes(doc)
45
+ tables(doc)
46
+ math(doc)
47
+ frontmatter(doc)
48
+ mainmatter(doc)
49
+ footnotes(doc)
50
+ table_of_contents(doc)
51
+ convert_to_html(doc)
52
+ end
53
+
54
+ private
55
+
56
+ # Handles output of \emph{} and \textit{}.
57
+ def emphasis(doc)
58
+ doc.xpath('//hi[@rend="it"]').each do |node|
59
+ node.name = 'em'
60
+ node.remove_attribute('rend')
61
+ end
62
+ end
63
+
64
+ # Handles output of \textbf{}.
65
+ def boldface(doc)
66
+ doc.xpath('//hi[@rend="bold"]').each do |node|
67
+ node.name = 'strong'
68
+ node.remove_attribute('rend')
69
+ end
70
+ end
71
+
72
+ # Handles output of \textsc{}.
73
+ def small_caps(doc)
74
+ doc.xpath('//hi[@rend="sc"]').each do |node|
75
+ node.name = 'span'
76
+ node['class'] = 'sc'
77
+ node.remove_attribute('rend')
78
+ end
79
+ end
80
+
81
+ # Handles \bigskip, etc.
82
+ def skips(doc)
83
+ doc.xpath('//p[@spacebefore]').each do |node|
84
+ node['style'] = "margin-top: #{node['spacebefore']}"
85
+ node.remove_attribute('spacebefore')
86
+ end
87
+ end
88
+
89
+ # Handles output of \texttt{}.
90
+ def typewriter(doc)
91
+ doc.xpath('//hi[@rend="tt"]').each do |node|
92
+ node.name = 'span'
93
+ node['class'] = 'tt'
94
+ node.remove_attribute('rend')
95
+ end
96
+ end
97
+
98
+ # Handles verbatim and Verbatim environments.
99
+ # \begin{verbatim}
100
+ # <stuff>
101
+ # \end{verbatim}
102
+ # and
103
+ # \begin{Verbatim}
104
+ # <stuff>
105
+ # \end{Verbatim}
106
+ # Note that verbatim is a built-in LaTeX environment, whereas
107
+ # Verbatim is loaded by the Verbatim package (and used by the
108
+ # code environment).
109
+ def verbatim(doc)
110
+ doc.xpath('//verbatim').each do |node|
111
+ node.name = 'pre'
112
+ node['class'] = 'verbatim'
113
+ end
114
+ doc.xpath('//Verbatim').each do |node|
115
+ node.name = 'pre'
116
+ node['class'] = 'verbatim'
117
+ end
118
+ end
119
+
120
+ # Handles code environments.
121
+ # \begin{code}
122
+ # <code>
123
+ # \end{code}
124
+ def code(doc)
125
+ doc.xpath('//code').each do |node|
126
+ node.name = 'div'
127
+ node['class'] = 'code'
128
+ end
129
+ end
130
+
131
+ # Handles metacode environments.
132
+ # \begin{metacode}
133
+ # <code>
134
+ # \end{metacode}
135
+ def metacode(doc)
136
+ doc.xpath('//metacode').each do |node|
137
+ node.name = 'div'
138
+ node['class'] = 'code'
139
+ end
140
+ end
141
+
142
+ # Handles math environments.
143
+ # Included are
144
+ # \begin{equation}
145
+ # <equation>
146
+ # \end{equation}
147
+ # and all the AMS-LaTeX variants defined in
148
+ # Preprocessor#math_environments.
149
+ # We also handle inline/display math of the form \(x\) and \[y\].
150
+ def math(doc)
151
+ # math environments
152
+ doc.xpath('//equation//texmath[@textype="equation"]').each do |node|
153
+ node.name = 'div'
154
+ node['class'] = 'equation'
155
+ node.content = literal_cache[node.content.strip] + "\n"
156
+ clean_node node, ['textype', 'type']
157
+ node.parent.replace(node)
158
+ begin
159
+ # Mimic default Tralics behavior of giving paragraph tags after
160
+ # math a 'noindent' class. This allows the HTML to be styled with
161
+ # CSS in a way that replicates the default behavior of LaTeX, where
162
+ # math can be included in a paragraph. In such a case, paragraphs
163
+ # are indented by default, but text after math environments isn't
164
+ # indented. In HTML, including a math div inside a p tag is illegal,
165
+ # so the next best thing is to add a 'noindent' class to the p tag
166
+ # following the math. Most documents won't use this, as the HTML
167
+ # convention is not to indent paragraphs anyway, but we want to
168
+ # support that case for completeness (mainly because Tralics does).
169
+ next_paragraph = node.next_sibling
170
+ next_paragraph['noindent'] = 'true'
171
+ rescue
172
+ # We rescue nil in case the math isn't followed by any text.
173
+ nil
174
+ end
175
+ end
176
+ doc.xpath('//equation//texmath[@textype="equation*"]').each do |node|
177
+ node.name = 'div'
178
+ node['class'] = 'equation'
179
+ node.content = literal_cache[node.content.strip] + "\n"
180
+ clean_node node, ['textype', 'type']
181
+ node.parent.replace(node)
182
+ begin
183
+ # Mimic default Tralics behavior of giving paragraph tags after
184
+ # math a 'noindent' class. This allows the HTML to be styled with
185
+ # CSS in a way that replicates the default behavior of LaTeX, where
186
+ # math can be included in a paragraph. In such a case, paragraphs
187
+ # are indented by default, but text after math environments isn't
188
+ # indented. In HTML, including a math div inside a p tag is illegal,
189
+ # so the next best thing is to add a 'noindent' class to the p tag
190
+ # following the math. Most documents won't use this, as the HTML
191
+ # convention is not to indent paragraphs anyway, but we want to
192
+ # support that case for completeness (mainly because Tralics does).
193
+ next_paragraph = node.next_sibling
194
+ next_paragraph['noindent'] = 'true'
195
+ rescue
196
+ # We rescue nil in case the math isn't followed by any text.
197
+ nil
198
+ end
199
+ end
200
+
201
+ # Paragraphs with noindent
202
+ # See the long comment above.
203
+ doc.xpath('//p[@noindent="true"]').each do |node|
204
+ node['class'] = 'noindent'
205
+ node.remove_attribute('noindent')
206
+ end
207
+
208
+ # inline math
209
+ doc.xpath('//inline').each do |node|
210
+ node.name = 'span'
211
+ node.content = literal_cache[node.content.strip]
212
+ node['class'] = 'inline_math'
213
+ clean_node node, ['textype', 'type']
214
+ end
215
+ end
216
+
217
+ # Handles frontmatter (if any).
218
+ def frontmatter(doc)
219
+ doc.xpath('//frontmatter').each do |node|
220
+ node.name = 'div'
221
+ node['id'] = 'frontmatter'
222
+ node['data-number'] = 0
223
+ end
224
+ end
225
+
226
+ # Handles mainmatter.
227
+ def mainmatter(doc)
228
+ doc.xpath('//mainmatter').each do |node|
229
+ node.parent << node.children
230
+ node.remove
231
+ end
232
+ end
233
+
234
+ # Processes and places footnotes.
235
+ def footnotes(doc)
236
+ footnotes = Hash.new { |h, k| h[k] = [] }
237
+ doc.xpath('//note[@place="foot"]').each do |footnote|
238
+ footnotes[chapter_number(footnote)] << footnote
239
+ end
240
+ # Handle chapters 1 through n-1.
241
+ doc.xpath('//div[@class="chapter"]').each_with_index do |chapter, i|
242
+ make_footnotes(footnotes, i, chapter)
243
+ end
244
+ # Place the footnotes for Chapter n (if any).
245
+ final_chapter_number = doc.xpath('//div[@class="chapter"]').length
246
+ make_footnotes(footnotes, final_chapter_number)
247
+ rewrite_contents(footnotes)
248
+ end
249
+
250
+ # Returns a unique CSS id for the footnotes of a given chapter.
251
+ def footnotes_id(chapter_number)
252
+ "cha-#{chapter_number}_footnotes"
253
+ end
254
+
255
+ # Returns a unique CSS id for footnote n in given chapter.
256
+ def footnote_id(chapter_number, n)
257
+ "cha-#{chapter_number}_footnote-#{n}"
258
+ end
259
+
260
+ # Returns the href needed to link to footnote n.
261
+ def footnote_href(chapter_number, n)
262
+ "##{footnote_id(chapter_number, n)}"
263
+ end
264
+
265
+ # Returns a unique CSS id for the footnote reference.
266
+ def footnote_ref_id(chapter_number, n)
267
+ "cha-#{chapter_number}_footnote-ref-#{n}"
268
+ end
269
+
270
+ # Returns the href needed to link to reference for footnote n.
271
+ def footnote_ref_href(chapter_number, n)
272
+ "##{footnote_ref_id(chapter_number, n)}"
273
+ end
274
+
275
+ def make_footnotes(footnotes, previous_chapter_number, chapter = nil)
276
+ unless (chapter_footnotes = footnotes[previous_chapter_number]).empty?
277
+ doc = chapter_footnotes.first.document
278
+ footnotes_node = footnotes_list(footnotes, previous_chapter_number)
279
+ place_footnotes(footnotes_node, previous_chapter_number, chapter)
280
+ end
281
+ end
282
+
283
+ # Returns a list of footnotes ready for placement.
284
+ def footnotes_list(footnotes, chapter_number)
285
+ doc = footnotes.values[0][0].document
286
+ # For symbolic footnotes, we want to suppress numbers, which can be
287
+ # done in CSS, but it doesn't work in many EPUB & MOBI readers.
288
+ # As a kludge, we switch to ul in this case, which looks nicer.
289
+ list_type = footnote_symbols? ? 'ul' : 'ol'
290
+ footnotes_node = Nokogiri::XML::Node.new(list_type, doc)
291
+ footnotes_node['class'] = 'footnotes'
292
+ footnotes_node['class'] += ' nonumbers' if footnote_symbols?
293
+ footnotes[chapter_number].each_with_index do |footnote, i|
294
+ n = i + 1
295
+ note = Nokogiri::XML::Node.new('li', doc)
296
+ note['id'] = footnote_id(chapter_number, n)
297
+ reflink = Nokogiri::XML::Node.new('a', doc)
298
+ reflink['class'] = 'arrow'
299
+ reflink.content = "↑"
300
+ reflink['href'] = footnote_ref_href(chapter_number, n)
301
+ html = "#{footnote.inner_html} #{reflink.to_xhtml}"
302
+ html = "<sup>#{fnsymbol(i)}</sup> #{html}" if footnote_symbols?
303
+ note.inner_html = html
304
+ footnotes_node.add_child note
305
+ end
306
+ footnotes_node
307
+ end
308
+
309
+ # Places footnotes for Chapter n-1 just before Chapter n.
310
+ def place_footnotes(footnotes_node, chapter_number, chapter = nil)
311
+ doc = footnotes_node.document
312
+ footnotes_wrapper_node = Nokogiri::XML::Node.new('div', doc)
313
+ footnotes_wrapper_node['id'] = footnotes_id(chapter_number)
314
+ footnotes_wrapper_node.add_child footnotes_node
315
+ if chapter.nil?
316
+ doc.children.last.add_child(footnotes_wrapper_node)
317
+ else
318
+ chapter.add_previous_sibling(footnotes_wrapper_node)
319
+ end
320
+ end
321
+
322
+ # Rewrites contents of each footnote with its corresponding number.
323
+ def rewrite_contents(footnotes)
324
+ footnotes.each do |chapter_number, chapter_footnotes|
325
+ chapter_footnotes.each_with_index do |node, i|
326
+ n = i + 1
327
+ node.name = 'sup'
328
+ clean_node node, %w{place id id-text data-tralics-id data-number}
329
+ node['id'] = footnote_ref_id(chapter_number, n)
330
+ node['class'] = 'footnote'
331
+ link = Nokogiri::XML::Node.new('a', node.document)
332
+ link['href'] = footnote_href(chapter_number, n)
333
+ content = footnote_symbols? ? fnsymbol(i) : n.to_s
334
+ link.content = content
335
+ node.inner_html = link
336
+ # Add an inter-sentence space if appropriate.
337
+ previous_character = node.previous_sibling.content[-1]
338
+ end_of_sentence = %w[. ! ?].include?(previous_character)
339
+ after = node.next_sibling
340
+ end_of_paragraph = after.nil? || after.content.strip.empty?
341
+ if end_of_sentence && !end_of_paragraph
342
+ space = Nokogiri::XML::Node.new('span', node.document)
343
+ space['class'] = 'intersentencespace'
344
+ node['class'] += ' intersentence'
345
+ node.add_next_sibling(space)
346
+ end
347
+ end
348
+ end
349
+ end
350
+
351
+ # Returns the nth footnote symbol for use in non-numerical footnotes.
352
+ # By using the modulus operator %, we arrange to loop around to the
353
+ # front if the number footnotes exceeds the number of symbols.
354
+ def fnsymbol(n)
355
+ symbols = %w[* † ‡ § ¶ ‖ ** †† ‡‡]
356
+ symbols[n % symbols.size]
357
+ end
358
+
359
+ # Returns the chapter number for a given node.
360
+ # Every node is inside some div that has a 'data-number' attribute,
361
+ # so recursively search the parents to find it.
362
+ # Then return the first number in the value, e.g., "1" in "1.2".
363
+ def chapter_number(node)
364
+ number = node['data-number']
365
+ if number && !number.empty?
366
+ number.split('.').first.to_i
367
+ else
368
+ chapter_number(node.parent)
369
+ end
370
+ end
371
+
372
+ # Handles logos for TeX and LaTeX.
373
+ def tex_logos(doc)
374
+ doc.xpath('//TeX').each do |node|
375
+ node.replace(Nokogiri::XML::fragment(tex))
376
+ end
377
+ doc.xpath('//LaTeX').each do |node|
378
+ node.replace(Nokogiri::XML::fragment(latex))
379
+ end
380
+ end
381
+
382
+ # Returns HTML for a nicely styled TeX logo.
383
+ def tex
384
+ %(<span class="texhtml" style="font-family: 'CMU Serif', cmr10, LMRoman10-Regular, 'Times New Roman', 'Nimbus Roman No9 L', Times, serif;">T<span style="text-transform: uppercase; vertical-align: -0.5ex; margin-left: -0.1667em; margin-right: -0.125em;">E</span>X</span>)
385
+ end
386
+
387
+ # Returns HTML for a nicely styled LaTeX logo.
388
+ def latex
389
+ %(<span class="texhtml" style="font-family: 'CMU Serif', cmr10, LMRoman10-Regular, 'Times New Roman', 'Nimbus Roman No9 L', Times, serif;">L<span style="text-transform: uppercase; font-size: 70%; margin-left: -0.36em; vertical-align: 0.3em; line-height: 0; margin-right: -0.15em;">A</span>T<span style="text-transform: uppercase; margin-left: -0.1667em; vertical-align: -0.5ex; line-height: 0; margin-right: -0.125em;">E</span>X</span>)
390
+ end
391
+
392
+ # Handles \begin{quote} ... \end{quote}.
393
+ def quote(doc)
394
+ doc.xpath('//p[@rend="quoted"]').each do |node|
395
+ clean_node node, 'rend'
396
+ node.name = 'blockquote'
397
+ node['class'] = 'quote'
398
+ end
399
+ end
400
+
401
+ # Handles \begin{verse} ... \end{verse}.
402
+ def verse(doc)
403
+ doc.xpath('//p[@rend="verse"]').each do |node|
404
+ clean_node node, %w{rend noindent}
405
+ node.name = 'blockquote'
406
+ node['class'] = 'verse'
407
+ end
408
+ end
409
+
410
+ # Converts itemized lists to uls.
411
+ def itemize(doc)
412
+ doc.xpath('//list[@type="simple"]').each do |node|
413
+ clean_node node, 'type'
414
+ node.name = 'ul'
415
+ end
416
+ end
417
+
418
+ # Converts enumerated lists to ols.
419
+ def enumerate(doc)
420
+ doc.xpath('//list[@type="ordered"]').each do |node|
421
+ clean_node node, 'type'
422
+ node.name = 'ol'
423
+ end
424
+ end
425
+
426
+ # Returns the node for a list item (li).
427
+ def item(doc)
428
+ doc.xpath('//item').each do |node|
429
+ clean_node node, %w{id-text id label}
430
+ node.name = 'li'
431
+ node.inner_html = node.at_css('p').inner_html
432
+ end
433
+ end
434
+
435
+ # Removes remaining errors.
436
+ def remove_errors(doc)
437
+ doc.xpath('//error').map(&:remove)
438
+ end
439
+
440
+ # Set the Tralics ids.
441
+ def set_ids(doc)
442
+ doc.xpath('//*[@id]').each do |node|
443
+ # TODO: make whitelist of non-tralics id's
444
+ next if node['id'] =~ /footnote/
445
+
446
+ node['data-tralics-id'] = node['id']
447
+ convert_labels(node)
448
+ clean_node node, %w{data-label}
449
+ end
450
+ # Replace '<unexpected>' tags with their children.
451
+ doc.xpath('//unexpected').each do |node|
452
+ node.parent.children = node.children
453
+ node.remove
454
+ end
455
+ doc.xpath('//figure').each do |node|
456
+ if unexpected = node.at_css('unexpected')
457
+ # Tralics puts in an 'unexpected' tag sometimes.
458
+ label = node.at_css('data-label')
459
+ node['id'] = pipeline_label(label)
460
+ unexpected.remove
461
+ clean_node node, %w{data-label}
462
+ elsif label = node.at_css('data-label')
463
+ node['id'] = pipeline_label(label)
464
+ label.remove
465
+ clean_node node, %w{data-label}
466
+ end
467
+ end
468
+ doc.xpath('//table').each do |node|
469
+ if unexpected = node.at_css('unexpected')
470
+ # Tralics puts in an 'unexpected' tag sometimes.
471
+ label = node.at_css('data-label')
472
+ node['id'] = pipeline_label(label)
473
+ unexpected.remove
474
+ clean_node node, %w{data-label}
475
+ elsif label = node.at_css('data-label')
476
+ node['id'] = pipeline_label(label)
477
+ label.remove
478
+ clean_node node, %w{data-label}
479
+ end
480
+ end
481
+ doc.xpath('//equation').each do |node|
482
+ if label = node.at_css('data-label')
483
+ node.at_css('texmath')['id'] = pipeline_label(label)
484
+ label.remove
485
+ end
486
+ end
487
+ end
488
+
489
+ # Convert data-labels to valid CSS ids.
490
+ def convert_labels(node)
491
+ node.children.each do |child|
492
+ if child.name == 'data-label'
493
+ node['id'] = pipeline_label(child)
494
+ child.remove
495
+ break
496
+ end
497
+ end
498
+ end
499
+
500
+ # Restores the label.
501
+ # Tralics does weird stuff with underscores, so they are subbed out
502
+ # so that they can be passed through the pipeline intact. This is where
503
+ # we restore them.
504
+ def pipeline_label(node)
505
+ node.inner_html.gsub(underscore_digest, '_')
506
+ end
507
+
508
+ # Processes the <head> tag given a section node.
509
+ # Supports chapter, section, and subsection.
510
+ def make_headings(doc, node, name)
511
+ head_node = node.children.first
512
+ head_node.name = name
513
+ a = doc.create_element 'a'
514
+ a['href'] = "##{node['id']}" unless node['id'].nil?
515
+ a['class'] = 'heading'
516
+ a << head_node.children
517
+ head_node << a
518
+ end
519
+
520
+ # Converts div0 to chapters and sections depending on node type.
521
+ def chapters_and_sections(doc)
522
+ doc.xpath('//div0').each do |node|
523
+ node.name = 'div'
524
+ if node['type'] == 'chapter'
525
+ node['class'] = 'chapter'
526
+ heading = 'h1'
527
+ else
528
+ node['class'] = 'section'
529
+ heading = 'h2'
530
+ end
531
+ if node['rend'] == 'nonumber'
532
+ node['class'] += '-star'
533
+ end
534
+ clean_node node, %w{type rend}
535
+ make_headings(doc, node, heading)
536
+ end
537
+ end
538
+
539
+ # Converts div1 to subsections.
540
+ def subsection(doc)
541
+ doc.xpath('//div1').each do |node|
542
+ node.name = 'div'
543
+ node['class'] = 'subsection'
544
+ if node['rend'] == 'nonumber'
545
+ node['class'] += '-star'
546
+ end
547
+ clean_node node, %w{rend}
548
+ make_headings(doc, node, 'h3')
549
+ end
550
+ end
551
+
552
+ # Converts div2 to subsections.
553
+ def subsubsection(doc)
554
+ doc.xpath('//div2').each do |node|
555
+ node.name = 'div'
556
+ node['class'] = 'subsubsection'
557
+ clean_node node, %w{rend}
558
+ make_headings(doc, node, 'h4')
559
+ end
560
+ end
561
+
562
+ # Converts heading elements to the proper spans.
563
+ # Headings are used in theorem-like environments like asides.
564
+ def headings(doc)
565
+ doc.xpath('//heading').each do |node|
566
+ node.name = 'span'
567
+ node['class'] = 'description'
568
+ end
569
+ end
570
+
571
+ # Converts strikeout text (\sout) to the proper tag.
572
+ def sout(doc)
573
+ doc.xpath('//sout').each do |node|
574
+ node.name = 'del'
575
+ end
576
+ end
577
+
578
+ # Converts inline code (\kode) to the proper tag.
579
+ def kode(doc)
580
+ doc.xpath('//kode').each do |node|
581
+ node.name = 'code'
582
+ end
583
+ end
584
+
585
+ # Converts filesystem path (\filepath) to the proper tag.
586
+ def filepath(doc)
587
+ doc.xpath('//filepath').each do |node|
588
+ node.name = 'span'
589
+ node['class'] = 'filepath'
590
+ end
591
+ end
592
+
593
+ # Builds the full heading for codelisting-like environments.
594
+ # The full heading, such as "Listing 1.1: Foo bars." needs to be
595
+ # extracted and manipulated to produce the right tags and classes.
596
+ def build_heading(node, css_class)
597
+ node.name = 'div'
598
+ node['class'] = css_class
599
+
600
+ heading = node.at_css('p')
601
+ heading.attributes.each do |key, value|
602
+ node.set_attribute(key, value)
603
+ heading.remove_attribute(key)
604
+ end
605
+ heading.name = 'div'
606
+ heading['class'] = 'heading'
607
+
608
+ number = heading.at_css('strong')
609
+ number.name = 'span'
610
+ number['class'] = 'number'
611
+ if css_class == 'codelisting'
612
+ number.content += ':'
613
+ else
614
+ number.content += '.'
615
+ end
616
+
617
+ heading
618
+ end
619
+
620
+ # Processes codelisting environments.
621
+ def codelistings(doc)
622
+ doc.xpath('//codelisting').each do |node|
623
+ heading = build_heading(node, 'codelisting')
624
+ code = heading.at_css('div.code')
625
+ node.add_child(code)
626
+ end
627
+ end
628
+
629
+ # Add in breaks from '\\'.
630
+ # We use a span instead of '<br />' because breaks can't be styled
631
+ # easily, and are also invalid in some contexts where we want a
632
+ # break (e.g., inside h1 tags).
633
+ def backslash_break(doc)
634
+ doc.xpath('//backslashbreak').each do |node|
635
+ node.name = 'span'
636
+ node['class'] = 'break'
637
+ end
638
+ end
639
+
640
+ # Handles normal, thin, and intersentence spaces.
641
+ def spaces(doc)
642
+ doc.xpath('//thinspace').each do |node|
643
+ node.name = 'span'
644
+ node['class'] = 'thinspace'
645
+ node.inner_html = '&thinsp;'
646
+ end
647
+ doc.xpath('//normalspace').each do |node|
648
+ node.replace(' ')
649
+ end
650
+ doc.xpath('//intersentencespace').each do |node|
651
+ node.name = 'span'
652
+ node['class'] = 'intersentencespace'
653
+ end
654
+ end
655
+
656
+ # Processes boxes/asides.
657
+ def asides(doc)
658
+ doc.xpath('//aside').each do |node|
659
+ build_heading(node, 'aside')
660
+ end
661
+ end
662
+
663
+ # Processes centered elements.
664
+ def center(doc)
665
+ doc.xpath('//center').each do |node|
666
+ node.name = 'div'
667
+ node['class'] = 'center'
668
+ end
669
+ end
670
+
671
+ # Handles the title, author, date, etc., produced by \maketitle.
672
+ def title(doc)
673
+ doc.xpath('//maketitle').each do |node|
674
+ node.name = 'div'
675
+ node['id'] = 'title_page'
676
+ %w{title subtitle author date}.each do |field|
677
+ title_element = maketitle_elements[field]
678
+ if title_element
679
+ type = %w{title subtitle}.include?(field) ? 'h1' : 'h2'
680
+ el = Nokogiri::XML::Node.new(type, doc)
681
+ raw = Polytexnic::Pipeline.new(title_element).to_html
682
+ content = Nokogiri::HTML.fragment(raw).at_css('p')
683
+ unless (content.nil? && field == 'date')
684
+ el.inner_html = content.inner_html.strip
685
+ el['class'] = field
686
+ node.add_child el
687
+ end
688
+ elsif field == 'date'
689
+ # Date is missing, so insert today's date.
690
+ el = Nokogiri::XML::Node.new('h2', doc)
691
+ el['class'] = field
692
+ el.inner_html = Date.today.strftime("%A, %b %e")
693
+ node.add_child el
694
+ end
695
+ end
696
+ end
697
+ end
698
+
699
+ # Converts text to smart single quotes and apostrophes.
700
+ # This means `foo bar' and "don't" is converted to to use nice curly
701
+ # "smart" quotes and apostrophes.
702
+ # We don't bother with double quotes because Tralics already handles
703
+ # those.
704
+ def smart_single_quotes(doc)
705
+ s = doc.to_xml
706
+ s.gsub!('`', '‘')
707
+ s.gsub!("'", '’')
708
+ Nokogiri::XML(s)
709
+ end
710
+
711
+ # Restores literal environments (verbatim, code, math, etc.).
712
+ # These environments are hashed and passed through the pipeline
713
+ # so that Tralics doesn't process them.
714
+ def restore_literal(doc)
715
+ doc.xpath('//literal').each do |node|
716
+ raw_content = literal_cache[node.content]
717
+ node.parent.content = escape_backslashes(raw_content)
718
+ node.remove
719
+ end
720
+ # Restore equation references.
721
+ doc.xpath('//eqref').each do |node|
722
+ node.content = literal_cache[node.content]
723
+ node.name = 'span'
724
+ node['class'] = 'eqref'
725
+ end
726
+ # Restore non-ASCII unicode
727
+ doc.xpath('//unicode').each do |node|
728
+ node.content = literal_cache[node.content]
729
+ node.name = 'span'
730
+ node['class'] = 'unicode'
731
+ end
732
+ end
733
+
734
+ # Restores things inside \verb+...+
735
+ def restore_inline_verbatim(doc)
736
+ doc.xpath('//inlineverbatim').each do |node|
737
+ node.content = literal_cache[node.content]
738
+ node.name = 'span'
739
+ node['class'] = 'inline_verbatim'
740
+ end
741
+ end
742
+
743
+ # Creates linked cross-references.
744
+ def make_cross_references(doc)
745
+ # build numbering tree
746
+ doc.xpath('//*[@data-tralics-id]').each do |node|
747
+ node['data-number'] = formatted_number(node)
748
+ clean_node node, 'id-text'
749
+ # Add number span
750
+ if (head = node.css('h1 a, h2 a, h3 a').first)
751
+ el = doc.create_element 'span'
752
+ number = node['data-number']
753
+ prefix = (@cha.nil? || number.match(/\./)) ? '' : 'Chapter '
754
+ el.content = prefix + node['data-number'] + ' '
755
+ el['class'] = 'number'
756
+ chapter_name = head.children.first
757
+ if chapter_name.nil?
758
+ head.add_child(el)
759
+ else
760
+ chapter_name.add_previous_sibling(el)
761
+ end
762
+ end
763
+ end
764
+
765
+ targets = doc.xpath("//*[@data-tralics-id]")
766
+ target_cache = {}
767
+ targets.each do |target|
768
+ target_cache[target['data-tralics-id']] = target
769
+ end
770
+
771
+ doc.xpath('//ref').each do |node|
772
+ node.name = 'span'
773
+ target = target_cache[node['target']]
774
+ if target.nil?
775
+ node['class'] = 'undefined_ref'
776
+ node.content = node['target']
777
+ else
778
+ node['class'] = 'ref'
779
+ node.content = target['data-number']
780
+ end
781
+ clean_node node, 'target'
782
+ end
783
+
784
+ doc.xpath('//*[@target]').each do |node|
785
+ node['href'] = "##{node['target'].gsub(':', '-')}"
786
+ node['class'] = 'hyperref'
787
+ clean_node node, 'target'
788
+ end
789
+ end
790
+
791
+ # Returns the formatted number appropriate for the node.
792
+ # E.g., "2.1" for a section.
793
+ # Note: sets @cha as a side-effect. Yes, this is gross.
794
+ def formatted_number(node)
795
+ if node['class'] == 'chapter'
796
+ # Tralics numbers figures & equations
797
+ # overall, not per chapter, so we need
798
+ # counters.
799
+ @equation = 0
800
+ @figure = 0
801
+ @cha = node['id-text']
802
+ elsif node['class'] == 'section'
803
+ @sec = node['id-text']
804
+ label_number(@cha, @sec)
805
+ elsif node['class'] == 'subsection'
806
+ @subsec = node['id-text']
807
+ label_number(@cha, @sec, @subsec)
808
+ elsif node['class'] == 'subsubsection'
809
+ @ssubsec = node['id-text']
810
+ label_number(@cha, @sec, @subsec, @ssubsec)
811
+ elsif node['textype'] == 'equation'
812
+ if @cha.nil?
813
+ @equation = node['id-text']
814
+ else
815
+ @equation += 1
816
+ end
817
+ label_number(@cha, @equation)
818
+ elsif node['class'] == 'codelisting'
819
+ node['id-text']
820
+ elsif node['class'] == 'aside'
821
+ node['id-text']
822
+ elsif node.name == 'table' && node['id-text']
823
+ @table = node['id-text']
824
+ label_number(@cha, @table)
825
+ elsif node.name == 'figure'
826
+ if @cha.nil?
827
+ @figure = node['id-text']
828
+ else
829
+ @figure += 1
830
+ end
831
+ label_number(@cha, @figure)
832
+ end
833
+ end
834
+
835
+ # Returns a label number for use in headings.
836
+ # For example, label_number("1", "2") returns "1.2".
837
+ def label_number(*args)
838
+ args.compact.join('.')
839
+ end
840
+
841
+ def hrefs(doc)
842
+ doc.xpath('//xref').each do |node|
843
+ node.name = 'a'
844
+ node['href'] = literal_cache[node['url']]
845
+ # Put a class on hrefs containing TeX to allow a style override.
846
+ node.traverse do |descendant|
847
+ if descendant['class'] == 'texhtml'
848
+ node['class'] = 'tex'
849
+ break
850
+ end
851
+ end
852
+ clean_node node, 'url'
853
+ end
854
+ end
855
+
856
+ # Handles both \includegraphics and figure environments.
857
+ # The unified treatment comes from Tralics using the <figure> tag
858
+ # in both cases.
859
+ def graphics_and_figures(doc)
860
+ doc.xpath('//figure').each do |node|
861
+ process_graphic(node, klass: 'figure')
862
+ end
863
+ end
864
+
865
+ # Processes a graphic, including the description.
866
+ def process_graphic(node, options={})
867
+ klass = options[:klass]
868
+ node.name = 'div'
869
+ raw_graphic = (node['rend'] == 'inline')
870
+ unless raw_graphic
871
+ if node['class']
872
+ node['class'] += " #{klass}"
873
+ else
874
+ node['class'] = klass
875
+ end
876
+ end
877
+ if internal_paragraph = node.at_css('p')
878
+ clean_node internal_paragraph, 'rend'
879
+ end
880
+ if node['file'] && node['extension']
881
+ extension = node['extension']
882
+ # Support PDF images in PDF documents and PNGs in HTML.
883
+ extension = 'png' if extension == 'pdf'
884
+ filename = "#{node['file']}.#{extension}"
885
+ alt = File.basename(node['file'])
886
+ img = %(<img src="#{filename}" alt="#{alt}" />)
887
+ graphic = %(<div class="graphics">#{img}</div>)
888
+ graphic_node = Nokogiri::HTML.fragment(graphic)
889
+ if description_node = node.children.first
890
+ description_node.add_previous_sibling(graphic_node)
891
+ else
892
+ node.add_child(graphic_node)
893
+ end
894
+ clean_node node, %w[file extension rend]
895
+ end
896
+ add_caption(node, name: 'figure') unless raw_graphic
897
+ end
898
+
899
+ # Handles \image and \imagebox commands.
900
+ def images_and_imageboxes(doc)
901
+ doc.xpath('//image').each do |node|
902
+ handle_image(node, klass: 'image')
903
+ end
904
+
905
+ doc.xpath('//imagebox').each do |node|
906
+ handle_image(node, klass: 'image box')
907
+ end
908
+ end
909
+
910
+ # Processes custom image environment to use a div and the right class.
911
+ def handle_image(node, options={})
912
+ klass = options[:klass]
913
+ container = node.parent
914
+ container.name = 'div'
915
+ container['class'] = 'graphics ' + klass
916
+ node.name = 'img'
917
+ node['src'] = node.content.gsub(underscore_digest, '_')
918
+ node['alt'] = node['src'].split('.').first
919
+ node.content = ""
920
+ end
921
+
922
+ # Adds a caption to a node.
923
+ # This works for figures and tables (at the least).
924
+ def add_caption(node, options={})
925
+ name = options[:name].to_s.capitalize
926
+ doc = node.document
927
+ full_caption = Nokogiri::XML::Node.new('div', doc)
928
+ full_caption['class'] = 'caption'
929
+ n = node['data-number']
930
+ if description_node = node.at_css('head')
931
+ h = %(<span class="header">#{name} #{n}: </span>)
932
+ d = %(<span class="description">#{description_node.inner_html}</span>)
933
+ description_node.remove
934
+ full_caption.inner_html = Nokogiri::HTML.fragment(h + d)
935
+ else
936
+ header = %(<span class="header">#{name} #{n}</span>)
937
+ full_caption.inner_html = header
938
+ end
939
+ node.add_child(full_caption)
940
+ clean_node node, ['id-text']
941
+ end
942
+
943
+ # Converts XML to HTML tables.
944
+ def tables(doc)
945
+ doc.xpath('//table/row/cell').each do |node|
946
+ node.name = 'td'
947
+ if node['cols']
948
+ node['colspan'] = node['cols']
949
+ end
950
+ end
951
+ doc.xpath('//table/row').each do |node|
952
+ node.name = 'tr'
953
+ klass = []
954
+ if node['top-border'] == 'true'
955
+ klass << 'top_border'
956
+ clean_node node, %w[top-border]
957
+ end
958
+ if node['bottom-border'] == 'true'
959
+ klass << 'bottom_border'
960
+ clean_node node, %w[bottom-border]
961
+ end
962
+ node['class'] = klass.join(' ') unless klass.empty?
963
+ end
964
+ tabular_count = 0
965
+ doc.xpath('//table').each do |node|
966
+ if tabular?(node)
967
+ node['class'] = 'tabular'
968
+ clean_node node, %w[rend]
969
+ add_cell_alignment(node, tabular_count)
970
+ tabular_count += 1
971
+ elsif table?(node)
972
+ node.name = 'div'
973
+ node['class'] = 'table'
974
+ unless node.at_css('table')
975
+ inner_table = Nokogiri::XML::Node.new('table', doc)
976
+ inner_table['class'] = 'tabular'
977
+ inner_table.children = node.children
978
+ add_cell_alignment(inner_table, tabular_count)
979
+ tabular_count += 1
980
+ node.add_child(inner_table)
981
+ end
982
+ clean_node node, %w[rend]
983
+ add_caption(node, name: 'table')
984
+ end
985
+ end
986
+ end
987
+
988
+ # Adds the alignment (left, center, right) plus the border (if any).
989
+ def add_cell_alignment(table, tabular_count)
990
+ alignments = @tabular_alignment_cache[tabular_count]
991
+ cell_alignments = alignments.scan(/(\|*(?:l|c|r)\|*)/).flatten
992
+ table.css('tr').each do |row|
993
+ row.css('td').zip(cell_alignments).each do |cell, alignment|
994
+ if custom_alignment?(cell)
995
+ cell['class'] = custom_class(cell)
996
+ else
997
+ cell['class'] = alignment_class(alignment)
998
+ end
999
+ clean_node cell, %w[halign right-border left-border cols]
1000
+ end
1001
+ end
1002
+ end
1003
+
1004
+ # Returns true if the cell comes with custom alignment.
1005
+ # This is the case with a multicolumn row.
1006
+ def custom_alignment?(cell)
1007
+ cell['cols']
1008
+ end
1009
+
1010
+ # Returns the custom class for a cell.
1011
+ def custom_class(cell)
1012
+ [].tap do |klass|
1013
+ klass << 'left_border' if cell['left-border']
1014
+ klass << "align_#{cell['halign']}" if cell['halign']
1015
+ klass << 'right_border' if cell['right-border']
1016
+ klass << 'top-border' if cell['top-border']
1017
+ end.join(' ')
1018
+ end
1019
+
1020
+ # Returns the CSS class corresponding to the given table alignment.
1021
+ def alignment_class(alignment)
1022
+ alignment.sub('l', 'align_left')
1023
+ .sub('r', 'align_right')
1024
+ .sub('c', 'align_center')
1025
+ .sub(/^\|/, 'left_border ')
1026
+ .sub(/\|$/, ' right_border')
1027
+ end
1028
+
1029
+ # Returns true if a table node is from a 'tabular' environment.
1030
+ # Tralics converts both
1031
+ # \begin{table}...
1032
+ # and
1033
+ # \begin{tabular}
1034
+ # to <table> tags, so we have to disambiguate them.
1035
+ def tabular?(table)
1036
+ table['rend'] == 'inline'
1037
+ end
1038
+
1039
+ # Returns true if a table node is from a 'table' environment.
1040
+ # The make_cross_references method tags such tables with a
1041
+ # 'data-number' attribute, so we use that to detect 'table' envs.
1042
+ def table?(table)
1043
+ !table['data-number'].nil?
1044
+ end
1045
+
1046
+ # Trims empty paragraphs.
1047
+ # Sometimes a <p></p> creeps in due to idiosyncrasies of the
1048
+ # Tralics conversion.
1049
+ def trim_empty_paragraphs(string)
1050
+ string.gsub!(/<p>\s*<\/p>/, '')
1051
+ end
1052
+
1053
+ # Converts a document to HTML.
1054
+ # Because there's no way to know which elements are block-level
1055
+ # (and hence can't be nested inside a paragraph tag), we first extract
1056
+ # an HTML fragment by converting the document to HTML, and then use
1057
+ # Nokogiri's HTML.fragment method to read it in and emit valid markup.
1058
+ # This process transforms, e.g., the invalid
1059
+ # <p>Preformatted text: <pre>text</pre> foo</p>
1060
+ # to the valid
1061
+ # <p>Preformatted text:</p> <pre>text</pre> <p>foo</p>
1062
+ def convert_to_html(doc)
1063
+ highlight_source_code(doc)
1064
+ File.open(@highlight_cache_filename, 'wb') do |f|
1065
+ f.write(highlight_cache.to_msgpack)
1066
+ end
1067
+ body = doc.at_css('document').children.to_xhtml
1068
+ Nokogiri::HTML.fragment(body).to_xhtml.tap do |html|
1069
+ trim_empty_paragraphs(html)
1070
+ end
1071
+ end
1072
+
1073
+ # Handles table of contents (if present).
1074
+ # This code could no doubt be made much shorter, but probably at the
1075
+ # cost of clarity.
1076
+ def table_of_contents(doc)
1077
+ toc = doc.at_css('tableofcontents')
1078
+ return if toc.nil?
1079
+ toc.add_previous_sibling('<h1 class="contents">Contents</h1>')
1080
+ toc.name = 'div'
1081
+ toc['id'] = 'table_of_contents'
1082
+ toc.remove_attribute 'depth'
1083
+ html = []
1084
+ current_depth = 0
1085
+ doc.css('div').each do |node|
1086
+ case node['class']
1087
+ when 'chapter'
1088
+ html << '<ul>' if current_depth == 0
1089
+ while current_depth > 1
1090
+ close_list(html)
1091
+ current_depth -= 1
1092
+ end
1093
+ current_depth = 1
1094
+ insert_li(html, node)
1095
+ when 'section'
1096
+ open_list(html) if current_depth == 1
1097
+ while current_depth > 2
1098
+ close_list(html)
1099
+ current_depth -= 1
1100
+ end
1101
+ current_depth = 2
1102
+ insert_li(html, node)
1103
+ when 'subsection'
1104
+ open_list(html) if current_depth == 2
1105
+ while current_depth > 3
1106
+ close_list(html)
1107
+ current_depth -= 1
1108
+ end
1109
+ current_depth = 3
1110
+ insert_li(html, node)
1111
+ end
1112
+ end
1113
+ toc.add_child(Nokogiri::HTML::DocumentFragment.parse(html.join))
1114
+ end
1115
+
1116
+ def open_list(html, li=true)
1117
+ html << '<li>' if li
1118
+ html << '<ul>'
1119
+ end
1120
+
1121
+ def close_list(html, li=true)
1122
+ html << '</ul>'
1123
+ html << '</li>' if li
1124
+ end
1125
+
1126
+ def insert_li(html, node)
1127
+ open = %(<li class="#{node['class']}">)
1128
+ link = node.at_css('a.heading')
1129
+ link['class'] += ' hyperref'
1130
+ html << open << link.to_xhtml << '</li>'
1131
+ end
1132
+
1133
+ # Cleans a node by removing all the given attributes.
1134
+ def clean_node(node, attributes)
1135
+ [*attributes].each { |a| node.remove_attribute a }
1136
+ end
1137
+ end
1138
+ end
1139
+ end