polytexnic 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/.pull_requests/1371777257 +0 -0
  4. data/.pull_requests/1371927975 +0 -0
  5. data/.pull_requests/1372804345 +0 -0
  6. data/.pull_requests/1374784075 +0 -0
  7. data/.pull_requests/1375304853 +0 -0
  8. data/.pull_requests/1375408308 +0 -0
  9. data/.pull_requests/1375409462 +0 -0
  10. data/.pull_requests/1375410668 +0 -0
  11. data/.pull_requests/1375472132 +0 -0
  12. data/.pull_requests/1375485496 +0 -0
  13. data/.pull_requests/1375487548 +0 -0
  14. data/.pull_requests/1375492835 +0 -0
  15. data/.pull_requests/1375497765 +0 -0
  16. data/.pull_requests/1375559547 +0 -0
  17. data/.pull_requests/1375589063 +0 -0
  18. data/.pull_requests/1375841786 +0 -0
  19. data/.pull_requests/1376352634 +0 -0
  20. data/.pull_requests/1376353299 +0 -0
  21. data/.pull_requests/1376449284 +0 -0
  22. data/.pull_requests/1376452696 +0 -0
  23. data/.pull_requests/1376454166 +0 -0
  24. data/.pull_requests/1376532291 +0 -0
  25. data/.pull_requests/1376625487 +0 -0
  26. data/.pull_requests/1376690108 +0 -0
  27. data/.pull_requests/1376699046 +0 -0
  28. data/.pull_requests/1376707642 +0 -0
  29. data/.pull_requests/1377230284 +0 -0
  30. data/.pull_requests/1379118478 +0 -0
  31. data/.pull_requests/1379123150 +0 -0
  32. data/.pull_requests/1380221847 +0 -0
  33. data/.pull_requests/1380589654 +0 -0
  34. data/.pull_requests/1380673142 +0 -0
  35. data/.pull_requests/1380850800 +0 -0
  36. data/.pull_requests/1381001264 +0 -0
  37. data/.pull_requests/1381005204 +0 -0
  38. data/.pull_requests/1381103022 +0 -0
  39. data/.pull_requests/1381252832 +0 -0
  40. data/.pull_requests/1381276624 +0 -0
  41. data/.pull_requests/1381344234 +0 -0
  42. data/.pull_requests/1381385297 +0 -0
  43. data/.pull_requests/1381427498 +0 -0
  44. data/.pull_requests/1381429761 +0 -0
  45. data/.pull_requests/1381873684 +0 -0
  46. data/.pull_requests/1382045490 +0 -0
  47. data/.pull_requests/1382056384 +0 -0
  48. data/.pull_requests/1382405223 +0 -0
  49. data/.pull_requests/1382478400 +0 -0
  50. data/.pull_requests/1382479780 +0 -0
  51. data/.pull_requests/1382485483 +0 -0
  52. data/.pull_requests/1382569911 +0 -0
  53. data/.pull_requests/1382646199 +0 -0
  54. data/.pull_requests/1382649778 +0 -0
  55. data/.pull_requests/1382660987 +0 -0
  56. data/.pull_requests/1382743927 +0 -0
  57. data/.pull_requests/1382840347 +0 -0
  58. data/.pull_requests/1383077676 +0 -0
  59. data/.pull_requests/1383086948 +0 -0
  60. data/.pull_requests/1383161978 +0 -0
  61. data/.pull_requests/1383263695 +0 -0
  62. data/.pull_requests/1383274008 +0 -0
  63. data/.pull_requests/1383327328 +0 -0
  64. data/.rspec +2 -0
  65. data/.ruby-gemset +1 -0
  66. data/.ruby-version +1 -0
  67. data/Gemfile +15 -0
  68. data/Guardfile +15 -0
  69. data/LICENSE.txt +22 -0
  70. data/README.md +21 -0
  71. data/Rakefile +2 -0
  72. data/lib/polytexnic/literal.rb +299 -0
  73. data/lib/polytexnic/postprocessor.rb +28 -0
  74. data/lib/polytexnic/postprocessors/html.rb +1139 -0
  75. data/lib/polytexnic/postprocessors/latex.rb +18 -0
  76. data/lib/polytexnic/postprocessors/polytex.rb +44 -0
  77. data/lib/polytexnic/preprocessor.rb +23 -0
  78. data/lib/polytexnic/preprocessors/html.rb +349 -0
  79. data/lib/polytexnic/preprocessors/latex.rb +43 -0
  80. data/lib/polytexnic/preprocessors/polytex.rb +127 -0
  81. data/lib/polytexnic/utils.rb +176 -0
  82. data/lib/polytexnic/version.rb +3 -0
  83. data/lib/polytexnic.rb +92 -0
  84. data/notes/pandoc.md +41 -0
  85. data/polytexnic.gemspec +28 -0
  86. data/polytexnic_commands.sty +5 -0
  87. data/precompiled_binaries/tralics +0 -0
  88. data/spec/fixtures/code_listing.tex +14 -0
  89. data/spec/fixtures/figures.tex +8 -0
  90. data/spec/fixtures/inline_math.html +4 -0
  91. data/spec/fixtures/inline_math.tex +3 -0
  92. data/spec/fixtures/math_environments.html +50 -0
  93. data/spec/fixtures/math_environments.tex +56 -0
  94. data/spec/fixtures/section_xrefs.tex +9 -0
  95. data/spec/fixtures/sidebar.tex +10 -0
  96. data/spec/fixtures/tables.tex +8 -0
  97. data/spec/fixtures/verbatim_environments.html +11 -0
  98. data/spec/fixtures/verbatim_environments.tex +13 -0
  99. data/spec/integration_spec.rb +34 -0
  100. data/spec/markdown_to_polytex_spec.rb +192 -0
  101. data/spec/resemble_matcher_spec.rb +69 -0
  102. data/spec/spec_helper.rb +38 -0
  103. data/spec/support/resemble_matcher.rb +100 -0
  104. data/spec/to_html/asides_spec.rb +42 -0
  105. data/spec/to_html/chapters_and_sections_spec.rb +268 -0
  106. data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
  107. data/spec/to_html/codelistings_spec.rb +70 -0
  108. data/spec/to_html/core_spec.rb +227 -0
  109. data/spec/to_html/eqref_spec.rb +32 -0
  110. data/spec/to_html/footnote_spec.rb +164 -0
  111. data/spec/to_html/graphics_and_figures_spec.rb +358 -0
  112. data/spec/to_html/lists_spec.rb +103 -0
  113. data/spec/to_html/literal_environments/code_spec.rb +141 -0
  114. data/spec/to_html/literal_environments/math_spec.rb +255 -0
  115. data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
  116. data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
  117. data/spec/to_html/quotations_and_verse_spec.rb +86 -0
  118. data/spec/to_html/table_of_contents_spec.rb +93 -0
  119. data/spec/to_html/table_spec.rb +269 -0
  120. data/spec/to_html/text_formatting_spec.rb +50 -0
  121. data/spec/to_latex_spec.rb +197 -0
  122. data/tasks/bin/ruby_tests +41 -0
  123. data/tasks/run_tests_with_both_rubies.rake +5 -0
  124. data/tmp/.gitkeep +0 -0
  125. metadata +286 -0
@@ -0,0 +1,18 @@
1
+ require 'polytexnic/literal'
2
+
3
+ module Polytexnic
4
+ module Postprocessor
5
+ module Latex
6
+
7
+ # Restores literal environments (verbatim, code, math, etc.).
8
+ def replace_hashes(polytex)
9
+ puts polytex if debug?
10
+ polytex.tap do
11
+ literal_cache.each do |key, value|
12
+ polytex.gsub!(key, escape_backslashes(value))
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,44 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Postprocessor
4
+ module Polytex
5
+
6
+ # Removes references to the hypertarget package.
7
+ # TODO: Support hypertarget
8
+ # This isn't a priority, as you get most of what you need
9
+ # with hyperref.
10
+ def remove_hypertarget
11
+ @source.gsub!(/\\hypertarget.*$/, '')
12
+ end
13
+
14
+ # Fixes a kramdown verbatim bug.
15
+ # When converting code, kramdown outputs
16
+ # "\begin{verbatim}foo" instead of
17
+ # "\begin{verbatim}\nfoo".
18
+ def fix_verbatim_bug
19
+ @source.gsub!(/\\begin\{verbatim\}/) { |s| s + "\n" }
20
+ end
21
+
22
+ # Writes the PolyTeX code environments based on the code cache.
23
+ # I.e., code that looks like
24
+ # {lang="ruby"}
25
+ # def foo
26
+ # "bar"
27
+ # end
28
+ # becomes
29
+ # %= lang:ruby
30
+ # \begin{code}
31
+ # def foo
32
+ # "bar"
33
+ # end
34
+ # \end{code}
35
+ # which reduces syntax highlighting to a previously solved problem.
36
+ def write_polytex_code
37
+ code_cache.each do |key, (code, lang, in_codelisting)|
38
+ latex = "%= lang:#{lang}\n\\begin{code}\n#{code}\n\\end{code}"
39
+ @source.gsub!(key, latex)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,23 @@
1
+ # encoding=utf-8
2
+ require 'polytexnic/literal'
3
+ require 'polytexnic/preprocessors/html'
4
+ require 'polytexnic/preprocessors/latex'
5
+ require 'polytexnic/preprocessors/polytex'
6
+
7
+ module Polytexnic
8
+ module Preprocessor
9
+ include Literal
10
+ include Html
11
+ include Latex
12
+ include Polytex
13
+
14
+ # Preprocesses the input based on output format.
15
+ def preprocess(format)
16
+ case format
17
+ when :html then to_xml
18
+ when :latex then to_processed_latex
19
+ when :polytex then to_polytex
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,349 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Preprocessor
4
+ module Html
5
+
6
+ # Converts HTML to XML.
7
+ # The heart of the process is using Tralics to convert the input PolyTeX
8
+ # to XML. The raw PolyTeX needs to be processed first to make everything
9
+ # go smoothly, but after that the steps to producing the corresponding
10
+ # XML is straightforward.
11
+ def to_xml
12
+ polytex = process_for_tralics(@polytex)
13
+ doc = Nokogiri::XML(tralics_xml(polytex))
14
+ add_document_tag(doc)
15
+ @xml = doc.to_xml
16
+ end
17
+
18
+ private
19
+
20
+ # Processes the input PolyTeX for Tralics.
21
+ # The key steps are creating a clean document safe for making global
22
+ # substitutions (gsubs), and then making a bunch of gsubs.
23
+ def process_for_tralics(polytex)
24
+ clean_document(polytex).tap do |output|
25
+ process_spaces(output)
26
+ remove_commands(output)
27
+ hyperrefs(output)
28
+ title_fields(output)
29
+ maketitle(output)
30
+ label_names(output)
31
+ image_names(output)
32
+ restore_eq_labels(output)
33
+ convert_figure_centering(output)
34
+ convert_longtable(output)
35
+ mark_environments(output)
36
+ make_tabular_alignment_cache(output)
37
+ end
38
+ end
39
+
40
+ # Returns a clean document with cached literal environments.
41
+ # This is a key step: we cache literal environments that should be
42
+ # passed through the pipeline with no changes (verbatim, code, etc.).
43
+ # The result is a document that can safely be transformed using
44
+ # global substitutions.
45
+ def clean_document(polytex)
46
+ doc = cache_unicode(cache_literal(add_commands(polytex)))
47
+ inline_verbatim(doc)
48
+ cache_hrefs(doc)
49
+ remove_comments(doc)
50
+ double_backslashes(cache_display_inline_math(doc))
51
+ end
52
+
53
+ # Prepares spaces to be passed through the pipeline.
54
+ # Handles thin spaces ('\,') and normal spaces ('\ '), as well as
55
+ # end-of-sentence spaces.
56
+ def process_spaces(doc)
57
+ doc.gsub!(/\\,/, xmlelement('thinspace'))
58
+ # Match an end of sentence character, while also recognizing
59
+ # things like (Or otherwise.) and ``Yes, indeed!'' as being the
60
+ # ends of sentences.
61
+ end_of_sentence = '[.?!](?:\)|\'+)?'
62
+ # Handle a forced normal space '\ '.
63
+ doc.gsub!(/(#{end_of_sentence})\\ /) do
64
+ $1 + xmlelement('normalspace')
65
+ end
66
+ not_a_capital = '[^A-Z]'
67
+ # Case of "foo. A"
68
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})[ ]+([^\s])/) do
69
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
70
+ end
71
+ # Case of "foo.\n A"
72
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n[ ]+([^\s])/) do
73
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
74
+ end
75
+ # Case of "foo.\nA"
76
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n([^\n])/) do
77
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
78
+ end
79
+ # Handle the manual override to force an inter-sentence space, '\@',
80
+ # as in 'Superman II\@. A new sentence'.
81
+ doc.gsub!(/\\@\. /, '.' + xmlelement('intersentencespace') + ' ')
82
+ end
83
+
84
+ # Removes commands that might screw up Tralics.
85
+ def remove_commands(doc)
86
+ # Determine if we're using footnote symbols.
87
+ symbols_cmd = '\renewcommand{\thefootnote}{\fnsymbol{footnote}}'
88
+ @footnote_symbols = !!doc.match(/^\s*#{Regexp.escape(symbols_cmd)}/)
89
+
90
+ doc.gsub!(/^\s*\\renewcommand.*$/, '')
91
+ end
92
+
93
+ # Returns true if we should use footnote symbols in place of numbers.
94
+ def footnote_symbols?
95
+ @footnote_symbols
96
+ end
97
+
98
+ # Handles \verb environments.
99
+ # LaTeX supports an inline verbatim environment using
100
+ # \verb+<stuff>+
101
+ # The + is arbitrary; any non-letter character is fine as long as it
102
+ # doesn't appear in <stuff>, so this code has exactly the same effect:
103
+ # \verb!<stuff>!
104
+ # \verb@<stuff>@
105
+ # \verb8<stuff>8
106
+ # My preference is to use + or - if available.
107
+ def inline_verbatim(doc)
108
+ doc.gsub!(/\\verb([^A-Za-z])(.*?)\1/) do
109
+ key = digest($2)
110
+ literal_cache[key] = $2
111
+ xmlelement('inlineverbatim') { key }
112
+ end
113
+ end
114
+
115
+ # Removes commented-out lines.
116
+ def remove_comments(output)
117
+ output.gsub!(/[^\\]%.*$/, '')
118
+ end
119
+
120
+ # Converts LaTeX double backslashes to HTML breaks.
121
+ def double_backslashes(string)
122
+ lines = []
123
+ in_table = false
124
+ string.split("\n").each do |line|
125
+ in_table ||= (line =~ /^\s*\\begin{(?:tabular|longtable)}/)
126
+ line.gsub!('\\\\', xmlelement('backslashbreak')) unless in_table
127
+ lines << line
128
+ in_table = (in_table && line !~ /^\s*\\end{tabular}/)
129
+ end
130
+ lines.join("\n")
131
+ end
132
+
133
+ # Adds some default commands.
134
+ def add_commands(polytex)
135
+ line(custom_commands) + tralics_commands + polytex
136
+ end
137
+
138
+ # Pads a string with newlines.
139
+ def line(string)
140
+ "\n#{string}\n"
141
+ end
142
+
143
+ # Handles title fields.
144
+ def title_fields(string)
145
+ %w{title subtitle author date}.each do |field|
146
+ string.gsub! /\\#{field}\{(.*)\}/ do |s|
147
+ maketitle_elements[field] = $1
148
+ ''
149
+ end
150
+ end
151
+ end
152
+
153
+ # Replaces maketitle with an XML element.
154
+ def maketitle(string)
155
+ string.gsub! /\\maketitle/ do |s|
156
+ xmlelement('maketitle')
157
+ end
158
+ end
159
+
160
+ # Preserves label names.
161
+ # Tralics doesn't keep the names of labels, e.g., 'cha:foobar' in
162
+ # '\label{cha:foobar}'. But Tralics supplies a wide variety of
163
+ # pseudo-LaTeX commands to add arbitrary XML elements to the final
164
+ # document. In this case, the \xbox command does the trick. See
165
+ # http://www-sop.inria.fr/marelle/tralics/doc-x.html
166
+ # for more information.
167
+ def label_names(string)
168
+ string.gsub! /\\label\{(.*?)\}/ do |s|
169
+ label = $1.gsub(':', '-').gsub('_', underscore_digest)
170
+ "#{s}\n\\xbox{data-label}{#{label}}"
171
+ end
172
+ end
173
+
174
+ # Handles image names with underscores.
175
+ # This is a terrible kludge, and it's annoying that it's
176
+ # apparently necessary.
177
+ def image_names(string)
178
+ string.gsub! /\\image\{(.*?)\}/ do |s|
179
+ escaped_filename = $1.gsub('_', underscore_digest)
180
+ "\\image{#{escaped_filename}}"
181
+ end
182
+ string.gsub! /\\imagebox\{(.*?)\}/ do |s|
183
+ escaped_filename = $1.gsub('_', underscore_digest)
184
+ "\\imagebox{#{escaped_filename}}"
185
+ end
186
+ end
187
+
188
+ # Restores the equation labels.
189
+ def restore_eq_labels(output)
190
+ math_label_cache.each do |key, label|
191
+ output.gsub!(key, label)
192
+ end
193
+ end
194
+
195
+ # Handles centering in figures.
196
+ # The way we handle generic \begin{center}...\end{center} doesn't
197
+ # work in figures for some reason. Luckily, the preferred method
198
+ # is to use \centering anyway, so this kludge is actually better LaTeX.
199
+ def convert_figure_centering(output)
200
+ @in_figure = false
201
+ centered = output.split("\n").map do |line|
202
+ if line =~ /^\s*\\begin\{figure\}/
203
+ @in_figure = true
204
+ line
205
+ elsif @in_figure && line =~ /^\s*\\begin\{center\}/
206
+ '\centering'
207
+ elsif @in_figure && line =~ /^\s*\\end\{center\}/
208
+ ''
209
+ elsif @in_figure && line =~ /^\s*\\end\{figure\}/
210
+ @in_figure = false
211
+ line
212
+ else
213
+ line
214
+ end
215
+ end.join("\n")
216
+ output.replace(centered)
217
+ end
218
+
219
+ # Converts the longtable environment to simple tabular.
220
+ # This is mainly because kramdown outputs longtables by default,
221
+ # but as a side-effect you can also use longtables in PolyTeX
222
+ # input documents.
223
+ def convert_longtable(output)
224
+ output.gsub!('\begin{longtable}', '\begin{tabular}')
225
+ output.gsub!('\end{longtable}', '\end{tabular}')
226
+ end
227
+
228
+ # Marks environments with their types.
229
+ # Tralics strips some information when processing LaTeX, such as
230
+ # whether a particular div defines a chapter. We remedy this by
231
+ # using the \AddAttToCurrent pseudo-LaTeX command to mark such
232
+ # environments with their types.
233
+ def mark_environments(string)
234
+
235
+ # Marks chapters with a 'chapter' type.
236
+ # Also handles \chapter*.
237
+ string.gsub! /^\s*\\chapter\*?\{(.*)\}/ do |s|
238
+ "#{s}\n\\AddAttToCurrent{type}{chapter}"
239
+ end
240
+
241
+ # Wrap codelistings in a 'codelisting' element.
242
+ string.gsub! /\\begin{codelisting}/ do |s|
243
+ "\\begin{xmlelement*}{codelisting}\n#{s}"
244
+ end
245
+ string.gsub! /\\end{codelisting}/ do |s|
246
+ "#{s}\n\\end{xmlelement*}"
247
+ end
248
+
249
+ # Wrap asides in an 'aside' element.
250
+ string.gsub! /\\begin{aside}/ do |s|
251
+ "\\begin{xmlelement*}{aside}\n#{s}"
252
+ end
253
+ string.gsub! /\\end{aside}/ do |s|
254
+ "#{s}\n\\end{xmlelement*}"
255
+ end
256
+
257
+ # Replace quotations and verse with corresponding XML elements.
258
+ string.gsub! /\\begin{quote}/ do |s|
259
+ quotation = '\AddAttToCurrent{class}{quotation}'
260
+ "\\begin{xmlelement*}{blockquote}\n#{quotation}"
261
+ end
262
+ string.gsub! /\\end{quote}/ do |s|
263
+ "\\end{xmlelement*}"
264
+ end
265
+ string.gsub! /\\begin{verse}/ do |s|
266
+ "\\begin{xmlelement*}{blockquote}\n\\AddAttToCurrent{class}{verse}"
267
+ end
268
+ string.gsub! /\\end{verse}/ do |s|
269
+ "\\end{xmlelement*}"
270
+ end
271
+
272
+ # Handle \begin{center}...\end{center}
273
+ string.gsub! /\\begin{center}/, '\begin{xmlelement*}{center}'
274
+ string.gsub! /\\end{center}/, '\end{xmlelement*}'
275
+
276
+ # Handle \centering
277
+ string.gsub! /\\centering/, '\AddAttToCurrent{class}{center}'
278
+
279
+ # # Handle \image
280
+ # string.gsub! /\\image/, '\includegraphics'
281
+ end
282
+
283
+ # Collects alignment information for tabular environments.
284
+ # We suck out all the stuff like 'l|l|lr' in
285
+ # \begin{tabular}{l|l|lr}
286
+ # The reason is that we need to work around a couple of bugs in Tralics.
287
+ # I've tried in vain to figure out WTF is going on in the Tralics
288
+ # source, but it's easy enough in Ruby so I'm throwing it in here.
289
+ def make_tabular_alignment_cache(output)
290
+ alignment_regex = /^\s*\\begin{tabular}{((?:\|*[lcr]+\|*)+)}/
291
+ @tabular_alignment_cache = output.scan(alignment_regex).flatten
292
+ end
293
+
294
+ # Returns the XML produced by the Tralics program.
295
+ # There is a lot of ugly file manipulation here, but it's fundamentally
296
+ # straightforward. The heart of it is
297
+ #
298
+ # system("#{tralics} -nomathml #{file.path} > log/tralics.log")
299
+ #
300
+ # which writes the converted PolyTeX file as XML, which then gets
301
+ # read in and lightly processed.
302
+ def tralics_xml(polytex)
303
+ file = Tempfile.new(['polytex', '.tex'])
304
+ puts polytex if debug?
305
+ file.write(polytex)
306
+ file.close
307
+ Dir.mkdir 'log' unless File.directory?('log')
308
+ system("#{tralics} -nomathml #{file.path} > log/tralics.log")
309
+ dirname = File.dirname(file.path)
310
+ xml_filename = File.basename(file.path, '.tex') + '.xml'
311
+ raw_xml = File.read(File.join(dirname, xml_filename))
312
+ xml = clean_xml(raw_xml)
313
+ puts xml if debug?
314
+ xml
315
+ ensure
316
+ xmlfile = file.path.sub('.tex', '.xml')
317
+ logfile = file.path.sub('.tex', '.log')
318
+ [xmlfile, logfile].each do |file|
319
+ File.delete(file) if File.exist?(file)
320
+ end
321
+ file.delete
322
+ end
323
+
324
+ # Wraps the whole document in <document></document>.
325
+ # Fragmentary documents come wrapped in 'unknown' tags.
326
+ # Full documents are wrapped in 'std' tags.
327
+ # Change either to 'document' for consistency.
328
+ def add_document_tag(doc)
329
+ %w[unknown std].each do |parent_tag|
330
+ node = doc.at_css(parent_tag)
331
+ node.name = 'document' unless node.nil?
332
+ end
333
+ end
334
+
335
+ def clean_xml(raw_xml)
336
+ nokogiri_ellipsis_workaround(raw_xml)
337
+ end
338
+
339
+ # Fixes a Nokogiri bug.
340
+ # As of this writing, the latest version of Nokogiri (1.5.6) doesn't
341
+ # handle the horizontal ellipsis character '&#133;' correctly in Ruby 2.
342
+ # The kludgy solution is to replace it with '…' in the raw XML,
343
+ # which does work.
344
+ def nokogiri_ellipsis_workaround(raw_xml)
345
+ raw_xml.gsub('&#133;', '…')
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,43 @@
1
+ module Polytexnic
2
+ module Preprocessor
3
+ module Latex
4
+
5
+ def to_processed_latex
6
+ @polytex = polish_tables(process_asides(clean_latex_document))
7
+ end
8
+
9
+ # Returns LaTeX with hashed versions of literal environments.
10
+ # Literal environments are hashed and passed through the pipeline
11
+ # so that we can process things like refs to hyperrefs using gsubs.
12
+ def clean_latex_document
13
+ cache_literal(@polytex, :latex)
14
+ end
15
+
16
+ def polish_tables(text)
17
+ text.tap do
18
+ text.gsub!(/^\s*(\\begin\{table\})/) do
19
+ "#{$1}\n\\begin{center}\n\\small\n"
20
+ end
21
+ text.gsub!(/^\s*(\\end\{table\})/) { "\\end{center}\n#{$1}" }
22
+ end
23
+ end
24
+
25
+ # Processes aside environments.
26
+ # In order to get nice framed & shaded aside boxes, we need to
27
+ # transform the default aside into a new environment.
28
+ def process_asides(text)
29
+ # Transform asides with headings and labels.
30
+ aside_regex = /\\begin{aside}\n\s*
31
+ \\heading{(.*?)}\s*
32
+ \\label{(.*?)}\n
33
+ (.*?)
34
+ \\end{aside}/mx
35
+ text.tap do
36
+ text.gsub!(aside_regex) do
37
+ %(\\begin{shaded_aside}{#{$1}}{#{$2}}\n#{$3}\n\\end{shaded_aside})
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,127 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Preprocessor
4
+ module Polytex
5
+
6
+ # Converts Markdown to PolyTeX.
7
+ # We adopt a unified approach: rather than convert "Markdown" (I use
8
+ # the term loosely*) directly to HTML, we convert it to PolyTeX and
9
+ # then run everything through the PolyTeX pipeline. Happily, kramdown
10
+ # comes equipped with a `to_latex` method that does most of the heavy
11
+ # lifting. The ouput isn't as clean as that produced by Pandoc (our
12
+ # previous choice), but it comes with significant advantages: (1) It's
13
+ # written in Ruby, available as a gem, so its use eliminates an external
14
+ # dependency. (2) It's the foundation for the "Markdown" interpreter
15
+ # used by Leanpub, so by using it ourselves we ensure greater
16
+ # compatibility with Leanpub books.
17
+ #
18
+ # * <rant>The number of mutually incompatible markup languages going
19
+ # by the name "Markdown" is truly mind-boggling. Most of them add things
20
+ # to John Gruber's original Markdown language in an ever-expanding
21
+ # attempt to bolt on the functionality needed to write longer documents.
22
+ # At this point, I fear that "Markdown" has become little more than a
23
+ # marketing term.</rant>
24
+ def to_polytex
25
+ require 'Kramdown'
26
+ cleaned_markdown = cache_code_environments
27
+ cleaned_markdown.tap do |markdown|
28
+ convert_code_inclusion(markdown)
29
+ end
30
+ math_cache = cache_math(cleaned_markdown)
31
+ # Override the header ordering, which starts with 'section' by default.
32
+ lh = 'chapter,section,subsection,subsubsection,paragraph,subparagraph'
33
+ kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
34
+ @source = restore_inclusion(restore_math(kramdown.to_latex, math_cache))
35
+ end
36
+
37
+ def cache_code_environments
38
+ output = []
39
+ lines = @source.split("\n")
40
+ indentation = ' ' * 4
41
+ while (line = lines.shift)
42
+ if line =~ /\{lang="(.*?)"\}/
43
+ language = $1
44
+ code = []
45
+ while (line = lines.shift) && line.match(/^#{indentation}(.*)$/) do
46
+ code << $1
47
+ end
48
+ code = code.join("\n")
49
+ key = digest(code)
50
+ code_cache[key] = [code, language]
51
+ output << key
52
+ output << line
53
+ elsif line =~ /^```\s*$/ # basic code fences
54
+ while (line = lines.shift) && !line.match(/^```\s*$/)
55
+ output << indentation + line
56
+ end
57
+ output << "\n"
58
+ elsif line =~ /^```(\w+)\s*$/ # syntax-highlighted code fences
59
+ language = $1
60
+ code = []
61
+ while (line = lines.shift) && !line.match(/^```\s*$/) do
62
+ code << line
63
+ end
64
+ code = code.join("\n")
65
+ key = digest(code)
66
+ code_cache[key] = [code, language]
67
+ output << key
68
+ else
69
+ output << line
70
+ end
71
+ end
72
+ output.join("\n")
73
+ end
74
+
75
+ # Caches Leanpub-style math.
76
+ # Leanpub uses the notation {$$}...{/$$} for both inline and block math,
77
+ # with the only difference being the presences of newlines:
78
+ # {$$} x^2 {/$$} % inline
79
+ # and
80
+ # {$$}
81
+ # x^2 % block
82
+ # {/$$}
83
+ # I personally hate this notation and convention, but anyone who really
84
+ # cares should just use PolyTeX instead of Markdown.
85
+ def cache_math(text)
86
+ cache = {}
87
+ text.gsub!(/\{\$\$\}\n(.*?)\n\{\/\$\$\}/) do
88
+ key = digest($1)
89
+ cache[[:block, key]] = $1
90
+ key
91
+ end
92
+ text.gsub!(/\{\$\$\}(.*?)\{\/\$\$\}/) do
93
+ key = digest($1)
94
+ cache[[:inline, key]] = $1
95
+ key
96
+ end
97
+ cache
98
+ end
99
+
100
+ # Restores the Markdown math.
101
+ # This is easy because we're running everything through our LaTeX
102
+ # pipeline.
103
+ def restore_math(text, cache)
104
+ cache.each do |(kind, key), value|
105
+ case kind
106
+ when :inline
107
+ open = '\('
108
+ close = '\)'
109
+ when :block
110
+ open = '\[' + "\n"
111
+ close = "\n" + '\]'
112
+ end
113
+ text.gsub!(key, open + value + close)
114
+ end
115
+ text
116
+ end
117
+ end
118
+
119
+ # Adds support for <<(path/to/code) inclusion.
120
+ def convert_code_inclusion(text)
121
+ text.gsub!(/^\s*<<(\(.*?\))/) { "<!-- inclusion= <<#{$1}-->" }
122
+ end
123
+ def restore_inclusion(text)
124
+ text.gsub(/% <!-- inclusion= (.*?)-->/) { "%= #{$1}" }
125
+ end
126
+ end
127
+ end