polytexnic 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.pull_requests/1371777257 +0 -0
- data/.pull_requests/1371927975 +0 -0
- data/.pull_requests/1372804345 +0 -0
- data/.pull_requests/1374784075 +0 -0
- data/.pull_requests/1375304853 +0 -0
- data/.pull_requests/1375408308 +0 -0
- data/.pull_requests/1375409462 +0 -0
- data/.pull_requests/1375410668 +0 -0
- data/.pull_requests/1375472132 +0 -0
- data/.pull_requests/1375485496 +0 -0
- data/.pull_requests/1375487548 +0 -0
- data/.pull_requests/1375492835 +0 -0
- data/.pull_requests/1375497765 +0 -0
- data/.pull_requests/1375559547 +0 -0
- data/.pull_requests/1375589063 +0 -0
- data/.pull_requests/1375841786 +0 -0
- data/.pull_requests/1376352634 +0 -0
- data/.pull_requests/1376353299 +0 -0
- data/.pull_requests/1376449284 +0 -0
- data/.pull_requests/1376452696 +0 -0
- data/.pull_requests/1376454166 +0 -0
- data/.pull_requests/1376532291 +0 -0
- data/.pull_requests/1376625487 +0 -0
- data/.pull_requests/1376690108 +0 -0
- data/.pull_requests/1376699046 +0 -0
- data/.pull_requests/1376707642 +0 -0
- data/.pull_requests/1377230284 +0 -0
- data/.pull_requests/1379118478 +0 -0
- data/.pull_requests/1379123150 +0 -0
- data/.pull_requests/1380221847 +0 -0
- data/.pull_requests/1380589654 +0 -0
- data/.pull_requests/1380673142 +0 -0
- data/.pull_requests/1380850800 +0 -0
- data/.pull_requests/1381001264 +0 -0
- data/.pull_requests/1381005204 +0 -0
- data/.pull_requests/1381103022 +0 -0
- data/.pull_requests/1381252832 +0 -0
- data/.pull_requests/1381276624 +0 -0
- data/.pull_requests/1381344234 +0 -0
- data/.pull_requests/1381385297 +0 -0
- data/.pull_requests/1381427498 +0 -0
- data/.pull_requests/1381429761 +0 -0
- data/.pull_requests/1381873684 +0 -0
- data/.pull_requests/1382045490 +0 -0
- data/.pull_requests/1382056384 +0 -0
- data/.pull_requests/1382405223 +0 -0
- data/.pull_requests/1382478400 +0 -0
- data/.pull_requests/1382479780 +0 -0
- data/.pull_requests/1382485483 +0 -0
- data/.pull_requests/1382569911 +0 -0
- data/.pull_requests/1382646199 +0 -0
- data/.pull_requests/1382649778 +0 -0
- data/.pull_requests/1382660987 +0 -0
- data/.pull_requests/1382743927 +0 -0
- data/.pull_requests/1382840347 +0 -0
- data/.pull_requests/1383077676 +0 -0
- data/.pull_requests/1383086948 +0 -0
- data/.pull_requests/1383161978 +0 -0
- data/.pull_requests/1383263695 +0 -0
- data/.pull_requests/1383274008 +0 -0
- data/.pull_requests/1383327328 +0 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Guardfile +15 -0
- data/LICENSE.txt +22 -0
- data/README.md +21 -0
- data/Rakefile +2 -0
- data/lib/polytexnic/literal.rb +299 -0
- data/lib/polytexnic/postprocessor.rb +28 -0
- data/lib/polytexnic/postprocessors/html.rb +1139 -0
- data/lib/polytexnic/postprocessors/latex.rb +18 -0
- data/lib/polytexnic/postprocessors/polytex.rb +44 -0
- data/lib/polytexnic/preprocessor.rb +23 -0
- data/lib/polytexnic/preprocessors/html.rb +349 -0
- data/lib/polytexnic/preprocessors/latex.rb +43 -0
- data/lib/polytexnic/preprocessors/polytex.rb +127 -0
- data/lib/polytexnic/utils.rb +176 -0
- data/lib/polytexnic/version.rb +3 -0
- data/lib/polytexnic.rb +92 -0
- data/notes/pandoc.md +41 -0
- data/polytexnic.gemspec +28 -0
- data/polytexnic_commands.sty +5 -0
- data/precompiled_binaries/tralics +0 -0
- data/spec/fixtures/code_listing.tex +14 -0
- data/spec/fixtures/figures.tex +8 -0
- data/spec/fixtures/inline_math.html +4 -0
- data/spec/fixtures/inline_math.tex +3 -0
- data/spec/fixtures/math_environments.html +50 -0
- data/spec/fixtures/math_environments.tex +56 -0
- data/spec/fixtures/section_xrefs.tex +9 -0
- data/spec/fixtures/sidebar.tex +10 -0
- data/spec/fixtures/tables.tex +8 -0
- data/spec/fixtures/verbatim_environments.html +11 -0
- data/spec/fixtures/verbatim_environments.tex +13 -0
- data/spec/integration_spec.rb +34 -0
- data/spec/markdown_to_polytex_spec.rb +192 -0
- data/spec/resemble_matcher_spec.rb +69 -0
- data/spec/spec_helper.rb +38 -0
- data/spec/support/resemble_matcher.rb +100 -0
- data/spec/to_html/asides_spec.rb +42 -0
- data/spec/to_html/chapters_and_sections_spec.rb +268 -0
- data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
- data/spec/to_html/codelistings_spec.rb +70 -0
- data/spec/to_html/core_spec.rb +227 -0
- data/spec/to_html/eqref_spec.rb +32 -0
- data/spec/to_html/footnote_spec.rb +164 -0
- data/spec/to_html/graphics_and_figures_spec.rb +358 -0
- data/spec/to_html/lists_spec.rb +103 -0
- data/spec/to_html/literal_environments/code_spec.rb +141 -0
- data/spec/to_html/literal_environments/math_spec.rb +255 -0
- data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
- data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
- data/spec/to_html/quotations_and_verse_spec.rb +86 -0
- data/spec/to_html/table_of_contents_spec.rb +93 -0
- data/spec/to_html/table_spec.rb +269 -0
- data/spec/to_html/text_formatting_spec.rb +50 -0
- data/spec/to_latex_spec.rb +197 -0
- data/tasks/bin/ruby_tests +41 -0
- data/tasks/run_tests_with_both_rubies.rake +5 -0
- data/tmp/.gitkeep +0 -0
- metadata +286 -0
@@ -0,0 +1,1139 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module Polytexnic
|
3
|
+
module Postprocessor
|
4
|
+
module Html
|
5
|
+
|
6
|
+
# Converts Tralics XML output to HTML.
|
7
|
+
def xml_to_html(xml)
|
8
|
+
doc = Nokogiri::XML(xml)
|
9
|
+
emphasis(doc)
|
10
|
+
boldface(doc)
|
11
|
+
small_caps(doc)
|
12
|
+
typewriter(doc)
|
13
|
+
skips(doc)
|
14
|
+
verbatim(doc)
|
15
|
+
code(doc)
|
16
|
+
metacode(doc)
|
17
|
+
quote(doc)
|
18
|
+
verse(doc)
|
19
|
+
itemize(doc)
|
20
|
+
enumerate(doc)
|
21
|
+
item(doc)
|
22
|
+
remove_errors(doc)
|
23
|
+
set_ids(doc)
|
24
|
+
chapters_and_sections(doc)
|
25
|
+
subsection(doc)
|
26
|
+
subsubsection(doc)
|
27
|
+
headings(doc)
|
28
|
+
sout(doc)
|
29
|
+
kode(doc)
|
30
|
+
filepath(doc)
|
31
|
+
codelistings(doc)
|
32
|
+
backslash_break(doc)
|
33
|
+
spaces(doc)
|
34
|
+
asides(doc)
|
35
|
+
center(doc)
|
36
|
+
title(doc)
|
37
|
+
doc = smart_single_quotes(doc)
|
38
|
+
tex_logos(doc)
|
39
|
+
restore_literal(doc)
|
40
|
+
restore_inline_verbatim(doc)
|
41
|
+
make_cross_references(doc)
|
42
|
+
hrefs(doc)
|
43
|
+
graphics_and_figures(doc)
|
44
|
+
images_and_imageboxes(doc)
|
45
|
+
tables(doc)
|
46
|
+
math(doc)
|
47
|
+
frontmatter(doc)
|
48
|
+
mainmatter(doc)
|
49
|
+
footnotes(doc)
|
50
|
+
table_of_contents(doc)
|
51
|
+
convert_to_html(doc)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Handles output of \emph{} and \textit{}.
|
57
|
+
def emphasis(doc)
|
58
|
+
doc.xpath('//hi[@rend="it"]').each do |node|
|
59
|
+
node.name = 'em'
|
60
|
+
node.remove_attribute('rend')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Handles output of \textbf{}.
|
65
|
+
def boldface(doc)
|
66
|
+
doc.xpath('//hi[@rend="bold"]').each do |node|
|
67
|
+
node.name = 'strong'
|
68
|
+
node.remove_attribute('rend')
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Handles output of \textsc{}.
|
73
|
+
def small_caps(doc)
|
74
|
+
doc.xpath('//hi[@rend="sc"]').each do |node|
|
75
|
+
node.name = 'span'
|
76
|
+
node['class'] = 'sc'
|
77
|
+
node.remove_attribute('rend')
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Handles \bigskip, etc.
|
82
|
+
def skips(doc)
|
83
|
+
doc.xpath('//p[@spacebefore]').each do |node|
|
84
|
+
node['style'] = "margin-top: #{node['spacebefore']}"
|
85
|
+
node.remove_attribute('spacebefore')
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Handles output of \texttt{}.
|
90
|
+
def typewriter(doc)
|
91
|
+
doc.xpath('//hi[@rend="tt"]').each do |node|
|
92
|
+
node.name = 'span'
|
93
|
+
node['class'] = 'tt'
|
94
|
+
node.remove_attribute('rend')
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Handles verbatim and Verbatim environments.
|
99
|
+
# \begin{verbatim}
|
100
|
+
# <stuff>
|
101
|
+
# \end{verbatim}
|
102
|
+
# and
|
103
|
+
# \begin{Verbatim}
|
104
|
+
# <stuff>
|
105
|
+
# \end{Verbatim}
|
106
|
+
# Note that verbatim is a built-in LaTeX environment, whereas
|
107
|
+
# Verbatim is loaded by the Verbatim package (and used by the
|
108
|
+
# code environment).
|
109
|
+
def verbatim(doc)
|
110
|
+
doc.xpath('//verbatim').each do |node|
|
111
|
+
node.name = 'pre'
|
112
|
+
node['class'] = 'verbatim'
|
113
|
+
end
|
114
|
+
doc.xpath('//Verbatim').each do |node|
|
115
|
+
node.name = 'pre'
|
116
|
+
node['class'] = 'verbatim'
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Handles code environments.
|
121
|
+
# \begin{code}
|
122
|
+
# <code>
|
123
|
+
# \end{code}
|
124
|
+
def code(doc)
|
125
|
+
doc.xpath('//code').each do |node|
|
126
|
+
node.name = 'div'
|
127
|
+
node['class'] = 'code'
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Handles metacode environments.
|
132
|
+
# \begin{metacode}
|
133
|
+
# <code>
|
134
|
+
# \end{metacode}
|
135
|
+
def metacode(doc)
|
136
|
+
doc.xpath('//metacode').each do |node|
|
137
|
+
node.name = 'div'
|
138
|
+
node['class'] = 'code'
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Handles math environments.
|
143
|
+
# Included are
|
144
|
+
# \begin{equation}
|
145
|
+
# <equation>
|
146
|
+
# \end{equation}
|
147
|
+
# and all the AMS-LaTeX variants defined in
|
148
|
+
# Preprocessor#math_environments.
|
149
|
+
# We also handle inline/display math of the form \(x\) and \[y\].
|
150
|
+
def math(doc)
|
151
|
+
# math environments
|
152
|
+
doc.xpath('//equation//texmath[@textype="equation"]').each do |node|
|
153
|
+
node.name = 'div'
|
154
|
+
node['class'] = 'equation'
|
155
|
+
node.content = literal_cache[node.content.strip] + "\n"
|
156
|
+
clean_node node, ['textype', 'type']
|
157
|
+
node.parent.replace(node)
|
158
|
+
begin
|
159
|
+
# Mimic default Tralics behavior of giving paragraph tags after
|
160
|
+
# math a 'noindent' class. This allows the HTML to be styled with
|
161
|
+
# CSS in a way that replicates the default behavior of LaTeX, where
|
162
|
+
# math can be included in a paragraph. In such a case, paragraphs
|
163
|
+
# are indented by default, but text after math environments isn't
|
164
|
+
# indented. In HTML, including a math div inside a p tag is illegal,
|
165
|
+
# so the next best thing is to add a 'noindent' class to the p tag
|
166
|
+
# following the math. Most documents won't use this, as the HTML
|
167
|
+
# convention is not to indent paragraphs anyway, but we want to
|
168
|
+
# support that case for completeness (mainly because Tralics does).
|
169
|
+
next_paragraph = node.next_sibling
|
170
|
+
next_paragraph['noindent'] = 'true'
|
171
|
+
rescue
|
172
|
+
# We rescue nil in case the math isn't followed by any text.
|
173
|
+
nil
|
174
|
+
end
|
175
|
+
end
|
176
|
+
doc.xpath('//equation//texmath[@textype="equation*"]').each do |node|
|
177
|
+
node.name = 'div'
|
178
|
+
node['class'] = 'equation'
|
179
|
+
node.content = literal_cache[node.content.strip] + "\n"
|
180
|
+
clean_node node, ['textype', 'type']
|
181
|
+
node.parent.replace(node)
|
182
|
+
begin
|
183
|
+
# Mimic default Tralics behavior of giving paragraph tags after
|
184
|
+
# math a 'noindent' class. This allows the HTML to be styled with
|
185
|
+
# CSS in a way that replicates the default behavior of LaTeX, where
|
186
|
+
# math can be included in a paragraph. In such a case, paragraphs
|
187
|
+
# are indented by default, but text after math environments isn't
|
188
|
+
# indented. In HTML, including a math div inside a p tag is illegal,
|
189
|
+
# so the next best thing is to add a 'noindent' class to the p tag
|
190
|
+
# following the math. Most documents won't use this, as the HTML
|
191
|
+
# convention is not to indent paragraphs anyway, but we want to
|
192
|
+
# support that case for completeness (mainly because Tralics does).
|
193
|
+
next_paragraph = node.next_sibling
|
194
|
+
next_paragraph['noindent'] = 'true'
|
195
|
+
rescue
|
196
|
+
# We rescue nil in case the math isn't followed by any text.
|
197
|
+
nil
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Paragraphs with noindent
|
202
|
+
# See the long comment above.
|
203
|
+
doc.xpath('//p[@noindent="true"]').each do |node|
|
204
|
+
node['class'] = 'noindent'
|
205
|
+
node.remove_attribute('noindent')
|
206
|
+
end
|
207
|
+
|
208
|
+
# inline math
|
209
|
+
doc.xpath('//inline').each do |node|
|
210
|
+
node.name = 'span'
|
211
|
+
node.content = literal_cache[node.content.strip]
|
212
|
+
node['class'] = 'inline_math'
|
213
|
+
clean_node node, ['textype', 'type']
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Handles frontmatter (if any).
|
218
|
+
def frontmatter(doc)
|
219
|
+
doc.xpath('//frontmatter').each do |node|
|
220
|
+
node.name = 'div'
|
221
|
+
node['id'] = 'frontmatter'
|
222
|
+
node['data-number'] = 0
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Handles mainmatter.
|
227
|
+
def mainmatter(doc)
|
228
|
+
doc.xpath('//mainmatter').each do |node|
|
229
|
+
node.parent << node.children
|
230
|
+
node.remove
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Processes and places footnotes.
|
235
|
+
def footnotes(doc)
|
236
|
+
footnotes = Hash.new { |h, k| h[k] = [] }
|
237
|
+
doc.xpath('//note[@place="foot"]').each do |footnote|
|
238
|
+
footnotes[chapter_number(footnote)] << footnote
|
239
|
+
end
|
240
|
+
# Handle chapters 1 through n-1.
|
241
|
+
doc.xpath('//div[@class="chapter"]').each_with_index do |chapter, i|
|
242
|
+
make_footnotes(footnotes, i, chapter)
|
243
|
+
end
|
244
|
+
# Place the footnotes for Chapter n (if any).
|
245
|
+
final_chapter_number = doc.xpath('//div[@class="chapter"]').length
|
246
|
+
make_footnotes(footnotes, final_chapter_number)
|
247
|
+
rewrite_contents(footnotes)
|
248
|
+
end
|
249
|
+
|
250
|
+
# Returns a unique CSS id for the footnotes of a given chapter.
|
251
|
+
def footnotes_id(chapter_number)
|
252
|
+
"cha-#{chapter_number}_footnotes"
|
253
|
+
end
|
254
|
+
|
255
|
+
# Returns a unique CSS id for footnote n in given chapter.
|
256
|
+
def footnote_id(chapter_number, n)
|
257
|
+
"cha-#{chapter_number}_footnote-#{n}"
|
258
|
+
end
|
259
|
+
|
260
|
+
# Returns the href needed to link to footnote n.
|
261
|
+
def footnote_href(chapter_number, n)
|
262
|
+
"##{footnote_id(chapter_number, n)}"
|
263
|
+
end
|
264
|
+
|
265
|
+
# Returns a unique CSS id for the footnote reference.
|
266
|
+
def footnote_ref_id(chapter_number, n)
|
267
|
+
"cha-#{chapter_number}_footnote-ref-#{n}"
|
268
|
+
end
|
269
|
+
|
270
|
+
# Returns the href needed to link to reference for footnote n.
|
271
|
+
def footnote_ref_href(chapter_number, n)
|
272
|
+
"##{footnote_ref_id(chapter_number, n)}"
|
273
|
+
end
|
274
|
+
|
275
|
+
def make_footnotes(footnotes, previous_chapter_number, chapter = nil)
|
276
|
+
unless (chapter_footnotes = footnotes[previous_chapter_number]).empty?
|
277
|
+
doc = chapter_footnotes.first.document
|
278
|
+
footnotes_node = footnotes_list(footnotes, previous_chapter_number)
|
279
|
+
place_footnotes(footnotes_node, previous_chapter_number, chapter)
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Returns a list of footnotes ready for placement.
|
284
|
+
def footnotes_list(footnotes, chapter_number)
|
285
|
+
doc = footnotes.values[0][0].document
|
286
|
+
# For symbolic footnotes, we want to suppress numbers, which can be
|
287
|
+
# done in CSS, but it doesn't work in many EPUB & MOBI readers.
|
288
|
+
# As a kludge, we switch to ul in this case, which looks nicer.
|
289
|
+
list_type = footnote_symbols? ? 'ul' : 'ol'
|
290
|
+
footnotes_node = Nokogiri::XML::Node.new(list_type, doc)
|
291
|
+
footnotes_node['class'] = 'footnotes'
|
292
|
+
footnotes_node['class'] += ' nonumbers' if footnote_symbols?
|
293
|
+
footnotes[chapter_number].each_with_index do |footnote, i|
|
294
|
+
n = i + 1
|
295
|
+
note = Nokogiri::XML::Node.new('li', doc)
|
296
|
+
note['id'] = footnote_id(chapter_number, n)
|
297
|
+
reflink = Nokogiri::XML::Node.new('a', doc)
|
298
|
+
reflink['class'] = 'arrow'
|
299
|
+
reflink.content = "↑"
|
300
|
+
reflink['href'] = footnote_ref_href(chapter_number, n)
|
301
|
+
html = "#{footnote.inner_html} #{reflink.to_xhtml}"
|
302
|
+
html = "<sup>#{fnsymbol(i)}</sup> #{html}" if footnote_symbols?
|
303
|
+
note.inner_html = html
|
304
|
+
footnotes_node.add_child note
|
305
|
+
end
|
306
|
+
footnotes_node
|
307
|
+
end
|
308
|
+
|
309
|
+
# Places footnotes for Chapter n-1 just before Chapter n.
|
310
|
+
def place_footnotes(footnotes_node, chapter_number, chapter = nil)
|
311
|
+
doc = footnotes_node.document
|
312
|
+
footnotes_wrapper_node = Nokogiri::XML::Node.new('div', doc)
|
313
|
+
footnotes_wrapper_node['id'] = footnotes_id(chapter_number)
|
314
|
+
footnotes_wrapper_node.add_child footnotes_node
|
315
|
+
if chapter.nil?
|
316
|
+
doc.children.last.add_child(footnotes_wrapper_node)
|
317
|
+
else
|
318
|
+
chapter.add_previous_sibling(footnotes_wrapper_node)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
# Rewrites contents of each footnote with its corresponding number.
|
323
|
+
def rewrite_contents(footnotes)
|
324
|
+
footnotes.each do |chapter_number, chapter_footnotes|
|
325
|
+
chapter_footnotes.each_with_index do |node, i|
|
326
|
+
n = i + 1
|
327
|
+
node.name = 'sup'
|
328
|
+
clean_node node, %w{place id id-text data-tralics-id data-number}
|
329
|
+
node['id'] = footnote_ref_id(chapter_number, n)
|
330
|
+
node['class'] = 'footnote'
|
331
|
+
link = Nokogiri::XML::Node.new('a', node.document)
|
332
|
+
link['href'] = footnote_href(chapter_number, n)
|
333
|
+
content = footnote_symbols? ? fnsymbol(i) : n.to_s
|
334
|
+
link.content = content
|
335
|
+
node.inner_html = link
|
336
|
+
# Add an inter-sentence space if appropriate.
|
337
|
+
previous_character = node.previous_sibling.content[-1]
|
338
|
+
end_of_sentence = %w[. ! ?].include?(previous_character)
|
339
|
+
after = node.next_sibling
|
340
|
+
end_of_paragraph = after.nil? || after.content.strip.empty?
|
341
|
+
if end_of_sentence && !end_of_paragraph
|
342
|
+
space = Nokogiri::XML::Node.new('span', node.document)
|
343
|
+
space['class'] = 'intersentencespace'
|
344
|
+
node['class'] += ' intersentence'
|
345
|
+
node.add_next_sibling(space)
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
# Returns the nth footnote symbol for use in non-numerical footnotes.
|
352
|
+
# By using the modulus operator %, we arrange to loop around to the
|
353
|
+
# front if the number footnotes exceeds the number of symbols.
|
354
|
+
def fnsymbol(n)
|
355
|
+
symbols = %w[* † ‡ § ¶ ‖ ** †† ‡‡]
|
356
|
+
symbols[n % symbols.size]
|
357
|
+
end
|
358
|
+
|
359
|
+
# Returns the chapter number for a given node.
|
360
|
+
# Every node is inside some div that has a 'data-number' attribute,
|
361
|
+
# so recursively search the parents to find it.
|
362
|
+
# Then return the first number in the value, e.g., "1" in "1.2".
|
363
|
+
def chapter_number(node)
|
364
|
+
number = node['data-number']
|
365
|
+
if number && !number.empty?
|
366
|
+
number.split('.').first.to_i
|
367
|
+
else
|
368
|
+
chapter_number(node.parent)
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Handles logos for TeX and LaTeX.
|
373
|
+
def tex_logos(doc)
|
374
|
+
doc.xpath('//TeX').each do |node|
|
375
|
+
node.replace(Nokogiri::XML::fragment(tex))
|
376
|
+
end
|
377
|
+
doc.xpath('//LaTeX').each do |node|
|
378
|
+
node.replace(Nokogiri::XML::fragment(latex))
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
# Returns HTML for a nicely styled TeX logo.
|
383
|
+
def tex
|
384
|
+
%(<span class="texhtml" style="font-family: 'CMU Serif', cmr10, LMRoman10-Regular, 'Times New Roman', 'Nimbus Roman No9 L', Times, serif;">T<span style="text-transform: uppercase; vertical-align: -0.5ex; margin-left: -0.1667em; margin-right: -0.125em;">E</span>X</span>)
|
385
|
+
end
|
386
|
+
|
387
|
+
# Returns HTML for a nicely styled LaTeX logo.
|
388
|
+
def latex
|
389
|
+
%(<span class="texhtml" style="font-family: 'CMU Serif', cmr10, LMRoman10-Regular, 'Times New Roman', 'Nimbus Roman No9 L', Times, serif;">L<span style="text-transform: uppercase; font-size: 70%; margin-left: -0.36em; vertical-align: 0.3em; line-height: 0; margin-right: -0.15em;">A</span>T<span style="text-transform: uppercase; margin-left: -0.1667em; vertical-align: -0.5ex; line-height: 0; margin-right: -0.125em;">E</span>X</span>)
|
390
|
+
end
|
391
|
+
|
392
|
+
# Handles \begin{quote} ... \end{quote}.
|
393
|
+
def quote(doc)
|
394
|
+
doc.xpath('//p[@rend="quoted"]').each do |node|
|
395
|
+
clean_node node, 'rend'
|
396
|
+
node.name = 'blockquote'
|
397
|
+
node['class'] = 'quote'
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
# Handles \begin{verse} ... \end{verse}.
|
402
|
+
def verse(doc)
|
403
|
+
doc.xpath('//p[@rend="verse"]').each do |node|
|
404
|
+
clean_node node, %w{rend noindent}
|
405
|
+
node.name = 'blockquote'
|
406
|
+
node['class'] = 'verse'
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
# Converts itemized lists to uls.
|
411
|
+
def itemize(doc)
|
412
|
+
doc.xpath('//list[@type="simple"]').each do |node|
|
413
|
+
clean_node node, 'type'
|
414
|
+
node.name = 'ul'
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
# Converts enumerated lists to ols.
|
419
|
+
def enumerate(doc)
|
420
|
+
doc.xpath('//list[@type="ordered"]').each do |node|
|
421
|
+
clean_node node, 'type'
|
422
|
+
node.name = 'ol'
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
# Returns the node for a list item (li).
|
427
|
+
def item(doc)
|
428
|
+
doc.xpath('//item').each do |node|
|
429
|
+
clean_node node, %w{id-text id label}
|
430
|
+
node.name = 'li'
|
431
|
+
node.inner_html = node.at_css('p').inner_html
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
# Removes remaining errors.
|
436
|
+
def remove_errors(doc)
|
437
|
+
doc.xpath('//error').map(&:remove)
|
438
|
+
end
|
439
|
+
|
440
|
+
# Set the Tralics ids.
|
441
|
+
def set_ids(doc)
|
442
|
+
doc.xpath('//*[@id]').each do |node|
|
443
|
+
# TODO: make whitelist of non-tralics id's
|
444
|
+
next if node['id'] =~ /footnote/
|
445
|
+
|
446
|
+
node['data-tralics-id'] = node['id']
|
447
|
+
convert_labels(node)
|
448
|
+
clean_node node, %w{data-label}
|
449
|
+
end
|
450
|
+
# Replace '<unexpected>' tags with their children.
|
451
|
+
doc.xpath('//unexpected').each do |node|
|
452
|
+
node.parent.children = node.children
|
453
|
+
node.remove
|
454
|
+
end
|
455
|
+
doc.xpath('//figure').each do |node|
|
456
|
+
if unexpected = node.at_css('unexpected')
|
457
|
+
# Tralics puts in an 'unexpected' tag sometimes.
|
458
|
+
label = node.at_css('data-label')
|
459
|
+
node['id'] = pipeline_label(label)
|
460
|
+
unexpected.remove
|
461
|
+
clean_node node, %w{data-label}
|
462
|
+
elsif label = node.at_css('data-label')
|
463
|
+
node['id'] = pipeline_label(label)
|
464
|
+
label.remove
|
465
|
+
clean_node node, %w{data-label}
|
466
|
+
end
|
467
|
+
end
|
468
|
+
doc.xpath('//table').each do |node|
|
469
|
+
if unexpected = node.at_css('unexpected')
|
470
|
+
# Tralics puts in an 'unexpected' tag sometimes.
|
471
|
+
label = node.at_css('data-label')
|
472
|
+
node['id'] = pipeline_label(label)
|
473
|
+
unexpected.remove
|
474
|
+
clean_node node, %w{data-label}
|
475
|
+
elsif label = node.at_css('data-label')
|
476
|
+
node['id'] = pipeline_label(label)
|
477
|
+
label.remove
|
478
|
+
clean_node node, %w{data-label}
|
479
|
+
end
|
480
|
+
end
|
481
|
+
doc.xpath('//equation').each do |node|
|
482
|
+
if label = node.at_css('data-label')
|
483
|
+
node.at_css('texmath')['id'] = pipeline_label(label)
|
484
|
+
label.remove
|
485
|
+
end
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
489
|
+
# Convert data-labels to valid CSS ids.
|
490
|
+
def convert_labels(node)
|
491
|
+
node.children.each do |child|
|
492
|
+
if child.name == 'data-label'
|
493
|
+
node['id'] = pipeline_label(child)
|
494
|
+
child.remove
|
495
|
+
break
|
496
|
+
end
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
# Restores the label.
|
501
|
+
# Tralics does weird stuff with underscores, so they are subbed out
|
502
|
+
# so that they can be passed through the pipeline intact. This is where
|
503
|
+
# we restore them.
|
504
|
+
def pipeline_label(node)
|
505
|
+
node.inner_html.gsub(underscore_digest, '_')
|
506
|
+
end
|
507
|
+
|
508
|
+
# Processes the <head> tag given a section node.
|
509
|
+
# Supports chapter, section, and subsection.
|
510
|
+
def make_headings(doc, node, name)
|
511
|
+
head_node = node.children.first
|
512
|
+
head_node.name = name
|
513
|
+
a = doc.create_element 'a'
|
514
|
+
a['href'] = "##{node['id']}" unless node['id'].nil?
|
515
|
+
a['class'] = 'heading'
|
516
|
+
a << head_node.children
|
517
|
+
head_node << a
|
518
|
+
end
|
519
|
+
|
520
|
+
# Converts div0 to chapters and sections depending on node type.
|
521
|
+
def chapters_and_sections(doc)
|
522
|
+
doc.xpath('//div0').each do |node|
|
523
|
+
node.name = 'div'
|
524
|
+
if node['type'] == 'chapter'
|
525
|
+
node['class'] = 'chapter'
|
526
|
+
heading = 'h1'
|
527
|
+
else
|
528
|
+
node['class'] = 'section'
|
529
|
+
heading = 'h2'
|
530
|
+
end
|
531
|
+
if node['rend'] == 'nonumber'
|
532
|
+
node['class'] += '-star'
|
533
|
+
end
|
534
|
+
clean_node node, %w{type rend}
|
535
|
+
make_headings(doc, node, heading)
|
536
|
+
end
|
537
|
+
end
|
538
|
+
|
539
|
+
# Converts div1 to subsections.
|
540
|
+
def subsection(doc)
|
541
|
+
doc.xpath('//div1').each do |node|
|
542
|
+
node.name = 'div'
|
543
|
+
node['class'] = 'subsection'
|
544
|
+
if node['rend'] == 'nonumber'
|
545
|
+
node['class'] += '-star'
|
546
|
+
end
|
547
|
+
clean_node node, %w{rend}
|
548
|
+
make_headings(doc, node, 'h3')
|
549
|
+
end
|
550
|
+
end
|
551
|
+
|
552
|
+
# Converts div2 to subsections.
|
553
|
+
def subsubsection(doc)
|
554
|
+
doc.xpath('//div2').each do |node|
|
555
|
+
node.name = 'div'
|
556
|
+
node['class'] = 'subsubsection'
|
557
|
+
clean_node node, %w{rend}
|
558
|
+
make_headings(doc, node, 'h4')
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
# Converts heading elements to the proper spans.
|
563
|
+
# Headings are used in theorem-like environments like asides.
|
564
|
+
def headings(doc)
|
565
|
+
doc.xpath('//heading').each do |node|
|
566
|
+
node.name = 'span'
|
567
|
+
node['class'] = 'description'
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
# Converts strikeout text (\sout) to the proper tag.
|
572
|
+
def sout(doc)
|
573
|
+
doc.xpath('//sout').each do |node|
|
574
|
+
node.name = 'del'
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
# Converts inline code (\kode) to the proper tag.
|
579
|
+
def kode(doc)
|
580
|
+
doc.xpath('//kode').each do |node|
|
581
|
+
node.name = 'code'
|
582
|
+
end
|
583
|
+
end
|
584
|
+
|
585
|
+
# Converts filesystem path (\filepath) to the proper tag.
|
586
|
+
def filepath(doc)
|
587
|
+
doc.xpath('//filepath').each do |node|
|
588
|
+
node.name = 'span'
|
589
|
+
node['class'] = 'filepath'
|
590
|
+
end
|
591
|
+
end
|
592
|
+
|
593
|
+
# Builds the full heading for codelisting-like environments.
|
594
|
+
# The full heading, such as "Listing 1.1: Foo bars." needs to be
|
595
|
+
# extracted and manipulated to produce the right tags and classes.
|
596
|
+
def build_heading(node, css_class)
|
597
|
+
node.name = 'div'
|
598
|
+
node['class'] = css_class
|
599
|
+
|
600
|
+
heading = node.at_css('p')
|
601
|
+
heading.attributes.each do |key, value|
|
602
|
+
node.set_attribute(key, value)
|
603
|
+
heading.remove_attribute(key)
|
604
|
+
end
|
605
|
+
heading.name = 'div'
|
606
|
+
heading['class'] = 'heading'
|
607
|
+
|
608
|
+
number = heading.at_css('strong')
|
609
|
+
number.name = 'span'
|
610
|
+
number['class'] = 'number'
|
611
|
+
if css_class == 'codelisting'
|
612
|
+
number.content += ':'
|
613
|
+
else
|
614
|
+
number.content += '.'
|
615
|
+
end
|
616
|
+
|
617
|
+
heading
|
618
|
+
end
|
619
|
+
|
620
|
+
# Processes codelisting environments.
|
621
|
+
def codelistings(doc)
|
622
|
+
doc.xpath('//codelisting').each do |node|
|
623
|
+
heading = build_heading(node, 'codelisting')
|
624
|
+
code = heading.at_css('div.code')
|
625
|
+
node.add_child(code)
|
626
|
+
end
|
627
|
+
end
|
628
|
+
|
629
|
+
# Add in breaks from '\\'.
|
630
|
+
# We use a span instead of '<br />' because breaks can't be styled
|
631
|
+
# easily, and are also invalid in some contexts where we want a
|
632
|
+
# break (e.g., inside h1 tags).
|
633
|
+
def backslash_break(doc)
|
634
|
+
doc.xpath('//backslashbreak').each do |node|
|
635
|
+
node.name = 'span'
|
636
|
+
node['class'] = 'break'
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
# Handles normal, thin, and intersentence spaces.
|
641
|
+
def spaces(doc)
|
642
|
+
doc.xpath('//thinspace').each do |node|
|
643
|
+
node.name = 'span'
|
644
|
+
node['class'] = 'thinspace'
|
645
|
+
node.inner_html = ' '
|
646
|
+
end
|
647
|
+
doc.xpath('//normalspace').each do |node|
|
648
|
+
node.replace(' ')
|
649
|
+
end
|
650
|
+
doc.xpath('//intersentencespace').each do |node|
|
651
|
+
node.name = 'span'
|
652
|
+
node['class'] = 'intersentencespace'
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
# Processes boxes/asides.
|
657
|
+
def asides(doc)
|
658
|
+
doc.xpath('//aside').each do |node|
|
659
|
+
build_heading(node, 'aside')
|
660
|
+
end
|
661
|
+
end
|
662
|
+
|
663
|
+
# Processes centered elements.
|
664
|
+
def center(doc)
|
665
|
+
doc.xpath('//center').each do |node|
|
666
|
+
node.name = 'div'
|
667
|
+
node['class'] = 'center'
|
668
|
+
end
|
669
|
+
end
|
670
|
+
|
671
|
+
# Handles the title, author, date, etc., produced by \maketitle.
|
672
|
+
def title(doc)
|
673
|
+
doc.xpath('//maketitle').each do |node|
|
674
|
+
node.name = 'div'
|
675
|
+
node['id'] = 'title_page'
|
676
|
+
%w{title subtitle author date}.each do |field|
|
677
|
+
title_element = maketitle_elements[field]
|
678
|
+
if title_element
|
679
|
+
type = %w{title subtitle}.include?(field) ? 'h1' : 'h2'
|
680
|
+
el = Nokogiri::XML::Node.new(type, doc)
|
681
|
+
raw = Polytexnic::Pipeline.new(title_element).to_html
|
682
|
+
content = Nokogiri::HTML.fragment(raw).at_css('p')
|
683
|
+
unless (content.nil? && field == 'date')
|
684
|
+
el.inner_html = content.inner_html.strip
|
685
|
+
el['class'] = field
|
686
|
+
node.add_child el
|
687
|
+
end
|
688
|
+
elsif field == 'date'
|
689
|
+
# Date is missing, so insert today's date.
|
690
|
+
el = Nokogiri::XML::Node.new('h2', doc)
|
691
|
+
el['class'] = field
|
692
|
+
el.inner_html = Date.today.strftime("%A, %b %e")
|
693
|
+
node.add_child el
|
694
|
+
end
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
|
699
|
+
# Converts text to smart single quotes and apostrophes.
|
700
|
+
# This means `foo bar' and "don't" is converted to to use nice curly
|
701
|
+
# "smart" quotes and apostrophes.
|
702
|
+
# We don't bother with double quotes because Tralics already handles
|
703
|
+
# those.
|
704
|
+
def smart_single_quotes(doc)
|
705
|
+
s = doc.to_xml
|
706
|
+
s.gsub!('`', '‘')
|
707
|
+
s.gsub!("'", '’')
|
708
|
+
Nokogiri::XML(s)
|
709
|
+
end
|
710
|
+
|
711
|
+
# Restores literal environments (verbatim, code, math, etc.).
|
712
|
+
# These environments are hashed and passed through the pipeline
|
713
|
+
# so that Tralics doesn't process them.
|
714
|
+
def restore_literal(doc)
|
715
|
+
doc.xpath('//literal').each do |node|
|
716
|
+
raw_content = literal_cache[node.content]
|
717
|
+
node.parent.content = escape_backslashes(raw_content)
|
718
|
+
node.remove
|
719
|
+
end
|
720
|
+
# Restore equation references.
|
721
|
+
doc.xpath('//eqref').each do |node|
|
722
|
+
node.content = literal_cache[node.content]
|
723
|
+
node.name = 'span'
|
724
|
+
node['class'] = 'eqref'
|
725
|
+
end
|
726
|
+
# Restore non-ASCII unicode
|
727
|
+
doc.xpath('//unicode').each do |node|
|
728
|
+
node.content = literal_cache[node.content]
|
729
|
+
node.name = 'span'
|
730
|
+
node['class'] = 'unicode'
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
# Restores things inside \verb+...+
|
735
|
+
def restore_inline_verbatim(doc)
|
736
|
+
doc.xpath('//inlineverbatim').each do |node|
|
737
|
+
node.content = literal_cache[node.content]
|
738
|
+
node.name = 'span'
|
739
|
+
node['class'] = 'inline_verbatim'
|
740
|
+
end
|
741
|
+
end
|
742
|
+
|
743
|
+
# Creates linked cross-references.
|
744
|
+
def make_cross_references(doc)
|
745
|
+
# build numbering tree
|
746
|
+
doc.xpath('//*[@data-tralics-id]').each do |node|
|
747
|
+
node['data-number'] = formatted_number(node)
|
748
|
+
clean_node node, 'id-text'
|
749
|
+
# Add number span
|
750
|
+
if (head = node.css('h1 a, h2 a, h3 a').first)
|
751
|
+
el = doc.create_element 'span'
|
752
|
+
number = node['data-number']
|
753
|
+
prefix = (@cha.nil? || number.match(/\./)) ? '' : 'Chapter '
|
754
|
+
el.content = prefix + node['data-number'] + ' '
|
755
|
+
el['class'] = 'number'
|
756
|
+
chapter_name = head.children.first
|
757
|
+
if chapter_name.nil?
|
758
|
+
head.add_child(el)
|
759
|
+
else
|
760
|
+
chapter_name.add_previous_sibling(el)
|
761
|
+
end
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
targets = doc.xpath("//*[@data-tralics-id]")
|
766
|
+
target_cache = {}
|
767
|
+
targets.each do |target|
|
768
|
+
target_cache[target['data-tralics-id']] = target
|
769
|
+
end
|
770
|
+
|
771
|
+
doc.xpath('//ref').each do |node|
|
772
|
+
node.name = 'span'
|
773
|
+
target = target_cache[node['target']]
|
774
|
+
if target.nil?
|
775
|
+
node['class'] = 'undefined_ref'
|
776
|
+
node.content = node['target']
|
777
|
+
else
|
778
|
+
node['class'] = 'ref'
|
779
|
+
node.content = target['data-number']
|
780
|
+
end
|
781
|
+
clean_node node, 'target'
|
782
|
+
end
|
783
|
+
|
784
|
+
doc.xpath('//*[@target]').each do |node|
|
785
|
+
node['href'] = "##{node['target'].gsub(':', '-')}"
|
786
|
+
node['class'] = 'hyperref'
|
787
|
+
clean_node node, 'target'
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
791
|
+
# Returns the formatted number appropriate for the node.
|
792
|
+
# E.g., "2.1" for a section.
|
793
|
+
# Note: sets @cha as a side-effect. Yes, this is gross.
|
794
|
+
def formatted_number(node)
|
795
|
+
if node['class'] == 'chapter'
|
796
|
+
# Tralics numbers figures & equations
|
797
|
+
# overall, not per chapter, so we need
|
798
|
+
# counters.
|
799
|
+
@equation = 0
|
800
|
+
@figure = 0
|
801
|
+
@cha = node['id-text']
|
802
|
+
elsif node['class'] == 'section'
|
803
|
+
@sec = node['id-text']
|
804
|
+
label_number(@cha, @sec)
|
805
|
+
elsif node['class'] == 'subsection'
|
806
|
+
@subsec = node['id-text']
|
807
|
+
label_number(@cha, @sec, @subsec)
|
808
|
+
elsif node['class'] == 'subsubsection'
|
809
|
+
@ssubsec = node['id-text']
|
810
|
+
label_number(@cha, @sec, @subsec, @ssubsec)
|
811
|
+
elsif node['textype'] == 'equation'
|
812
|
+
if @cha.nil?
|
813
|
+
@equation = node['id-text']
|
814
|
+
else
|
815
|
+
@equation += 1
|
816
|
+
end
|
817
|
+
label_number(@cha, @equation)
|
818
|
+
elsif node['class'] == 'codelisting'
|
819
|
+
node['id-text']
|
820
|
+
elsif node['class'] == 'aside'
|
821
|
+
node['id-text']
|
822
|
+
elsif node.name == 'table' && node['id-text']
|
823
|
+
@table = node['id-text']
|
824
|
+
label_number(@cha, @table)
|
825
|
+
elsif node.name == 'figure'
|
826
|
+
if @cha.nil?
|
827
|
+
@figure = node['id-text']
|
828
|
+
else
|
829
|
+
@figure += 1
|
830
|
+
end
|
831
|
+
label_number(@cha, @figure)
|
832
|
+
end
|
833
|
+
end
|
834
|
+
|
835
|
+
# Returns a label number for use in headings.
|
836
|
+
# For example, label_number("1", "2") returns "1.2".
|
837
|
+
def label_number(*args)
|
838
|
+
args.compact.join('.')
|
839
|
+
end
|
840
|
+
|
841
|
+
def hrefs(doc)
|
842
|
+
doc.xpath('//xref').each do |node|
|
843
|
+
node.name = 'a'
|
844
|
+
node['href'] = literal_cache[node['url']]
|
845
|
+
# Put a class on hrefs containing TeX to allow a style override.
|
846
|
+
node.traverse do |descendant|
|
847
|
+
if descendant['class'] == 'texhtml'
|
848
|
+
node['class'] = 'tex'
|
849
|
+
break
|
850
|
+
end
|
851
|
+
end
|
852
|
+
clean_node node, 'url'
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
# Handles both \includegraphics and figure environments.
|
857
|
+
# The unified treatment comes from Tralics using the <figure> tag
|
858
|
+
# in both cases.
|
859
|
+
def graphics_and_figures(doc)
|
860
|
+
doc.xpath('//figure').each do |node|
|
861
|
+
process_graphic(node, klass: 'figure')
|
862
|
+
end
|
863
|
+
end
|
864
|
+
|
865
|
+
# Processes a graphic, including the description.
|
866
|
+
def process_graphic(node, options={})
|
867
|
+
klass = options[:klass]
|
868
|
+
node.name = 'div'
|
869
|
+
raw_graphic = (node['rend'] == 'inline')
|
870
|
+
unless raw_graphic
|
871
|
+
if node['class']
|
872
|
+
node['class'] += " #{klass}"
|
873
|
+
else
|
874
|
+
node['class'] = klass
|
875
|
+
end
|
876
|
+
end
|
877
|
+
if internal_paragraph = node.at_css('p')
|
878
|
+
clean_node internal_paragraph, 'rend'
|
879
|
+
end
|
880
|
+
if node['file'] && node['extension']
|
881
|
+
extension = node['extension']
|
882
|
+
# Support PDF images in PDF documents and PNGs in HTML.
|
883
|
+
extension = 'png' if extension == 'pdf'
|
884
|
+
filename = "#{node['file']}.#{extension}"
|
885
|
+
alt = File.basename(node['file'])
|
886
|
+
img = %(<img src="#{filename}" alt="#{alt}" />)
|
887
|
+
graphic = %(<div class="graphics">#{img}</div>)
|
888
|
+
graphic_node = Nokogiri::HTML.fragment(graphic)
|
889
|
+
if description_node = node.children.first
|
890
|
+
description_node.add_previous_sibling(graphic_node)
|
891
|
+
else
|
892
|
+
node.add_child(graphic_node)
|
893
|
+
end
|
894
|
+
clean_node node, %w[file extension rend]
|
895
|
+
end
|
896
|
+
add_caption(node, name: 'figure') unless raw_graphic
|
897
|
+
end
|
898
|
+
|
899
|
+
# Handles \image and \imagebox commands.
|
900
|
+
def images_and_imageboxes(doc)
|
901
|
+
doc.xpath('//image').each do |node|
|
902
|
+
handle_image(node, klass: 'image')
|
903
|
+
end
|
904
|
+
|
905
|
+
doc.xpath('//imagebox').each do |node|
|
906
|
+
handle_image(node, klass: 'image box')
|
907
|
+
end
|
908
|
+
end
|
909
|
+
|
910
|
+
# Processes custom image environment to use a div and the right class.
|
911
|
+
def handle_image(node, options={})
|
912
|
+
klass = options[:klass]
|
913
|
+
container = node.parent
|
914
|
+
container.name = 'div'
|
915
|
+
container['class'] = 'graphics ' + klass
|
916
|
+
node.name = 'img'
|
917
|
+
node['src'] = node.content.gsub(underscore_digest, '_')
|
918
|
+
node['alt'] = node['src'].split('.').first
|
919
|
+
node.content = ""
|
920
|
+
end
|
921
|
+
|
922
|
+
# Adds a caption to a node.
|
923
|
+
# This works for figures and tables (at the least).
|
924
|
+
def add_caption(node, options={})
|
925
|
+
name = options[:name].to_s.capitalize
|
926
|
+
doc = node.document
|
927
|
+
full_caption = Nokogiri::XML::Node.new('div', doc)
|
928
|
+
full_caption['class'] = 'caption'
|
929
|
+
n = node['data-number']
|
930
|
+
if description_node = node.at_css('head')
|
931
|
+
h = %(<span class="header">#{name} #{n}: </span>)
|
932
|
+
d = %(<span class="description">#{description_node.inner_html}</span>)
|
933
|
+
description_node.remove
|
934
|
+
full_caption.inner_html = Nokogiri::HTML.fragment(h + d)
|
935
|
+
else
|
936
|
+
header = %(<span class="header">#{name} #{n}</span>)
|
937
|
+
full_caption.inner_html = header
|
938
|
+
end
|
939
|
+
node.add_child(full_caption)
|
940
|
+
clean_node node, ['id-text']
|
941
|
+
end
|
942
|
+
|
943
|
+
# Converts XML to HTML tables.
|
944
|
+
def tables(doc)
|
945
|
+
doc.xpath('//table/row/cell').each do |node|
|
946
|
+
node.name = 'td'
|
947
|
+
if node['cols']
|
948
|
+
node['colspan'] = node['cols']
|
949
|
+
end
|
950
|
+
end
|
951
|
+
doc.xpath('//table/row').each do |node|
|
952
|
+
node.name = 'tr'
|
953
|
+
klass = []
|
954
|
+
if node['top-border'] == 'true'
|
955
|
+
klass << 'top_border'
|
956
|
+
clean_node node, %w[top-border]
|
957
|
+
end
|
958
|
+
if node['bottom-border'] == 'true'
|
959
|
+
klass << 'bottom_border'
|
960
|
+
clean_node node, %w[bottom-border]
|
961
|
+
end
|
962
|
+
node['class'] = klass.join(' ') unless klass.empty?
|
963
|
+
end
|
964
|
+
tabular_count = 0
|
965
|
+
doc.xpath('//table').each do |node|
|
966
|
+
if tabular?(node)
|
967
|
+
node['class'] = 'tabular'
|
968
|
+
clean_node node, %w[rend]
|
969
|
+
add_cell_alignment(node, tabular_count)
|
970
|
+
tabular_count += 1
|
971
|
+
elsif table?(node)
|
972
|
+
node.name = 'div'
|
973
|
+
node['class'] = 'table'
|
974
|
+
unless node.at_css('table')
|
975
|
+
inner_table = Nokogiri::XML::Node.new('table', doc)
|
976
|
+
inner_table['class'] = 'tabular'
|
977
|
+
inner_table.children = node.children
|
978
|
+
add_cell_alignment(inner_table, tabular_count)
|
979
|
+
tabular_count += 1
|
980
|
+
node.add_child(inner_table)
|
981
|
+
end
|
982
|
+
clean_node node, %w[rend]
|
983
|
+
add_caption(node, name: 'table')
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|
987
|
+
|
988
|
+
# Adds the alignment (left, center, right) plus the border (if any).
|
989
|
+
def add_cell_alignment(table, tabular_count)
|
990
|
+
alignments = @tabular_alignment_cache[tabular_count]
|
991
|
+
cell_alignments = alignments.scan(/(\|*(?:l|c|r)\|*)/).flatten
|
992
|
+
table.css('tr').each do |row|
|
993
|
+
row.css('td').zip(cell_alignments).each do |cell, alignment|
|
994
|
+
if custom_alignment?(cell)
|
995
|
+
cell['class'] = custom_class(cell)
|
996
|
+
else
|
997
|
+
cell['class'] = alignment_class(alignment)
|
998
|
+
end
|
999
|
+
clean_node cell, %w[halign right-border left-border cols]
|
1000
|
+
end
|
1001
|
+
end
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
# Returns true if the cell comes with custom alignment.
|
1005
|
+
# This is the case with a multicolumn row.
|
1006
|
+
def custom_alignment?(cell)
|
1007
|
+
cell['cols']
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
# Returns the custom class for a cell.
|
1011
|
+
def custom_class(cell)
|
1012
|
+
[].tap do |klass|
|
1013
|
+
klass << 'left_border' if cell['left-border']
|
1014
|
+
klass << "align_#{cell['halign']}" if cell['halign']
|
1015
|
+
klass << 'right_border' if cell['right-border']
|
1016
|
+
klass << 'top-border' if cell['top-border']
|
1017
|
+
end.join(' ')
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
# Returns the CSS class corresponding to the given table alignment.
|
1021
|
+
def alignment_class(alignment)
|
1022
|
+
alignment.sub('l', 'align_left')
|
1023
|
+
.sub('r', 'align_right')
|
1024
|
+
.sub('c', 'align_center')
|
1025
|
+
.sub(/^\|/, 'left_border ')
|
1026
|
+
.sub(/\|$/, ' right_border')
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
# Returns true if a table node is from a 'tabular' environment.
|
1030
|
+
# Tralics converts both
|
1031
|
+
# \begin{table}...
|
1032
|
+
# and
|
1033
|
+
# \begin{tabular}
|
1034
|
+
# to <table> tags, so we have to disambiguate them.
|
1035
|
+
def tabular?(table)
|
1036
|
+
table['rend'] == 'inline'
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
# Returns true if a table node is from a 'table' environment.
|
1040
|
+
# The make_cross_references method tags such tables with a
|
1041
|
+
# 'data-number' attribute, so we use that to detect 'table' envs.
|
1042
|
+
def table?(table)
|
1043
|
+
!table['data-number'].nil?
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
# Trims empty paragraphs.
|
1047
|
+
# Sometimes a <p></p> creeps in due to idiosyncrasies of the
|
1048
|
+
# Tralics conversion.
|
1049
|
+
def trim_empty_paragraphs(string)
|
1050
|
+
string.gsub!(/<p>\s*<\/p>/, '')
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
# Converts a document to HTML.
|
1054
|
+
# Because there's no way to know which elements are block-level
|
1055
|
+
# (and hence can't be nested inside a paragraph tag), we first extract
|
1056
|
+
# an HTML fragment by converting the document to HTML, and then use
|
1057
|
+
# Nokogiri's HTML.fragment method to read it in and emit valid markup.
|
1058
|
+
# This process transforms, e.g., the invalid
|
1059
|
+
# <p>Preformatted text: <pre>text</pre> foo</p>
|
1060
|
+
# to the valid
|
1061
|
+
# <p>Preformatted text:</p> <pre>text</pre> <p>foo</p>
|
1062
|
+
def convert_to_html(doc)
|
1063
|
+
highlight_source_code(doc)
|
1064
|
+
File.open(@highlight_cache_filename, 'wb') do |f|
|
1065
|
+
f.write(highlight_cache.to_msgpack)
|
1066
|
+
end
|
1067
|
+
body = doc.at_css('document').children.to_xhtml
|
1068
|
+
Nokogiri::HTML.fragment(body).to_xhtml.tap do |html|
|
1069
|
+
trim_empty_paragraphs(html)
|
1070
|
+
end
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
# Handles table of contents (if present).
|
1074
|
+
# This code could no doubt be made much shorter, but probably at the
|
1075
|
+
# cost of clarity.
|
1076
|
+
def table_of_contents(doc)
|
1077
|
+
toc = doc.at_css('tableofcontents')
|
1078
|
+
return if toc.nil?
|
1079
|
+
toc.add_previous_sibling('<h1 class="contents">Contents</h1>')
|
1080
|
+
toc.name = 'div'
|
1081
|
+
toc['id'] = 'table_of_contents'
|
1082
|
+
toc.remove_attribute 'depth'
|
1083
|
+
html = []
|
1084
|
+
current_depth = 0
|
1085
|
+
doc.css('div').each do |node|
|
1086
|
+
case node['class']
|
1087
|
+
when 'chapter'
|
1088
|
+
html << '<ul>' if current_depth == 0
|
1089
|
+
while current_depth > 1
|
1090
|
+
close_list(html)
|
1091
|
+
current_depth -= 1
|
1092
|
+
end
|
1093
|
+
current_depth = 1
|
1094
|
+
insert_li(html, node)
|
1095
|
+
when 'section'
|
1096
|
+
open_list(html) if current_depth == 1
|
1097
|
+
while current_depth > 2
|
1098
|
+
close_list(html)
|
1099
|
+
current_depth -= 1
|
1100
|
+
end
|
1101
|
+
current_depth = 2
|
1102
|
+
insert_li(html, node)
|
1103
|
+
when 'subsection'
|
1104
|
+
open_list(html) if current_depth == 2
|
1105
|
+
while current_depth > 3
|
1106
|
+
close_list(html)
|
1107
|
+
current_depth -= 1
|
1108
|
+
end
|
1109
|
+
current_depth = 3
|
1110
|
+
insert_li(html, node)
|
1111
|
+
end
|
1112
|
+
end
|
1113
|
+
toc.add_child(Nokogiri::HTML::DocumentFragment.parse(html.join))
|
1114
|
+
end
|
1115
|
+
|
1116
|
+
def open_list(html, li=true)
|
1117
|
+
html << '<li>' if li
|
1118
|
+
html << '<ul>'
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
def close_list(html, li=true)
|
1122
|
+
html << '</ul>'
|
1123
|
+
html << '</li>' if li
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
def insert_li(html, node)
|
1127
|
+
open = %(<li class="#{node['class']}">)
|
1128
|
+
link = node.at_css('a.heading')
|
1129
|
+
link['class'] += ' hyperref'
|
1130
|
+
html << open << link.to_xhtml << '</li>'
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
# Cleans a node by removing all the given attributes.
|
1134
|
+
def clean_node(node, attributes)
|
1135
|
+
[*attributes].each { |a| node.remove_attribute a }
|
1136
|
+
end
|
1137
|
+
end
|
1138
|
+
end
|
1139
|
+
end
|