polytexnic 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/.pull_requests/1371777257 +0 -0
  4. data/.pull_requests/1371927975 +0 -0
  5. data/.pull_requests/1372804345 +0 -0
  6. data/.pull_requests/1374784075 +0 -0
  7. data/.pull_requests/1375304853 +0 -0
  8. data/.pull_requests/1375408308 +0 -0
  9. data/.pull_requests/1375409462 +0 -0
  10. data/.pull_requests/1375410668 +0 -0
  11. data/.pull_requests/1375472132 +0 -0
  12. data/.pull_requests/1375485496 +0 -0
  13. data/.pull_requests/1375487548 +0 -0
  14. data/.pull_requests/1375492835 +0 -0
  15. data/.pull_requests/1375497765 +0 -0
  16. data/.pull_requests/1375559547 +0 -0
  17. data/.pull_requests/1375589063 +0 -0
  18. data/.pull_requests/1375841786 +0 -0
  19. data/.pull_requests/1376352634 +0 -0
  20. data/.pull_requests/1376353299 +0 -0
  21. data/.pull_requests/1376449284 +0 -0
  22. data/.pull_requests/1376452696 +0 -0
  23. data/.pull_requests/1376454166 +0 -0
  24. data/.pull_requests/1376532291 +0 -0
  25. data/.pull_requests/1376625487 +0 -0
  26. data/.pull_requests/1376690108 +0 -0
  27. data/.pull_requests/1376699046 +0 -0
  28. data/.pull_requests/1376707642 +0 -0
  29. data/.pull_requests/1377230284 +0 -0
  30. data/.pull_requests/1379118478 +0 -0
  31. data/.pull_requests/1379123150 +0 -0
  32. data/.pull_requests/1380221847 +0 -0
  33. data/.pull_requests/1380589654 +0 -0
  34. data/.pull_requests/1380673142 +0 -0
  35. data/.pull_requests/1380850800 +0 -0
  36. data/.pull_requests/1381001264 +0 -0
  37. data/.pull_requests/1381005204 +0 -0
  38. data/.pull_requests/1381103022 +0 -0
  39. data/.pull_requests/1381252832 +0 -0
  40. data/.pull_requests/1381276624 +0 -0
  41. data/.pull_requests/1381344234 +0 -0
  42. data/.pull_requests/1381385297 +0 -0
  43. data/.pull_requests/1381427498 +0 -0
  44. data/.pull_requests/1381429761 +0 -0
  45. data/.pull_requests/1381873684 +0 -0
  46. data/.pull_requests/1382045490 +0 -0
  47. data/.pull_requests/1382056384 +0 -0
  48. data/.pull_requests/1382405223 +0 -0
  49. data/.pull_requests/1382478400 +0 -0
  50. data/.pull_requests/1382479780 +0 -0
  51. data/.pull_requests/1382485483 +0 -0
  52. data/.pull_requests/1382569911 +0 -0
  53. data/.pull_requests/1382646199 +0 -0
  54. data/.pull_requests/1382649778 +0 -0
  55. data/.pull_requests/1382660987 +0 -0
  56. data/.pull_requests/1382743927 +0 -0
  57. data/.pull_requests/1382840347 +0 -0
  58. data/.pull_requests/1383077676 +0 -0
  59. data/.pull_requests/1383086948 +0 -0
  60. data/.pull_requests/1383161978 +0 -0
  61. data/.pull_requests/1383263695 +0 -0
  62. data/.pull_requests/1383274008 +0 -0
  63. data/.pull_requests/1383327328 +0 -0
  64. data/.rspec +2 -0
  65. data/.ruby-gemset +1 -0
  66. data/.ruby-version +1 -0
  67. data/Gemfile +15 -0
  68. data/Guardfile +15 -0
  69. data/LICENSE.txt +22 -0
  70. data/README.md +21 -0
  71. data/Rakefile +2 -0
  72. data/lib/polytexnic/literal.rb +299 -0
  73. data/lib/polytexnic/postprocessor.rb +28 -0
  74. data/lib/polytexnic/postprocessors/html.rb +1139 -0
  75. data/lib/polytexnic/postprocessors/latex.rb +18 -0
  76. data/lib/polytexnic/postprocessors/polytex.rb +44 -0
  77. data/lib/polytexnic/preprocessor.rb +23 -0
  78. data/lib/polytexnic/preprocessors/html.rb +349 -0
  79. data/lib/polytexnic/preprocessors/latex.rb +43 -0
  80. data/lib/polytexnic/preprocessors/polytex.rb +127 -0
  81. data/lib/polytexnic/utils.rb +176 -0
  82. data/lib/polytexnic/version.rb +3 -0
  83. data/lib/polytexnic.rb +92 -0
  84. data/notes/pandoc.md +41 -0
  85. data/polytexnic.gemspec +28 -0
  86. data/polytexnic_commands.sty +5 -0
  87. data/precompiled_binaries/tralics +0 -0
  88. data/spec/fixtures/code_listing.tex +14 -0
  89. data/spec/fixtures/figures.tex +8 -0
  90. data/spec/fixtures/inline_math.html +4 -0
  91. data/spec/fixtures/inline_math.tex +3 -0
  92. data/spec/fixtures/math_environments.html +50 -0
  93. data/spec/fixtures/math_environments.tex +56 -0
  94. data/spec/fixtures/section_xrefs.tex +9 -0
  95. data/spec/fixtures/sidebar.tex +10 -0
  96. data/spec/fixtures/tables.tex +8 -0
  97. data/spec/fixtures/verbatim_environments.html +11 -0
  98. data/spec/fixtures/verbatim_environments.tex +13 -0
  99. data/spec/integration_spec.rb +34 -0
  100. data/spec/markdown_to_polytex_spec.rb +192 -0
  101. data/spec/resemble_matcher_spec.rb +69 -0
  102. data/spec/spec_helper.rb +38 -0
  103. data/spec/support/resemble_matcher.rb +100 -0
  104. data/spec/to_html/asides_spec.rb +42 -0
  105. data/spec/to_html/chapters_and_sections_spec.rb +268 -0
  106. data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
  107. data/spec/to_html/codelistings_spec.rb +70 -0
  108. data/spec/to_html/core_spec.rb +227 -0
  109. data/spec/to_html/eqref_spec.rb +32 -0
  110. data/spec/to_html/footnote_spec.rb +164 -0
  111. data/spec/to_html/graphics_and_figures_spec.rb +358 -0
  112. data/spec/to_html/lists_spec.rb +103 -0
  113. data/spec/to_html/literal_environments/code_spec.rb +141 -0
  114. data/spec/to_html/literal_environments/math_spec.rb +255 -0
  115. data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
  116. data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
  117. data/spec/to_html/quotations_and_verse_spec.rb +86 -0
  118. data/spec/to_html/table_of_contents_spec.rb +93 -0
  119. data/spec/to_html/table_spec.rb +269 -0
  120. data/spec/to_html/text_formatting_spec.rb +50 -0
  121. data/spec/to_latex_spec.rb +197 -0
  122. data/tasks/bin/ruby_tests +41 -0
  123. data/tasks/run_tests_with_both_rubies.rake +5 -0
  124. data/tmp/.gitkeep +0 -0
  125. metadata +286 -0
@@ -0,0 +1,18 @@
1
+ require 'polytexnic/literal'
2
+
3
+ module Polytexnic
4
+ module Postprocessor
5
+ module Latex
6
+
7
+ # Restores literal environments (verbatim, code, math, etc.).
8
+ def replace_hashes(polytex)
9
+ puts polytex if debug?
10
+ polytex.tap do
11
+ literal_cache.each do |key, value|
12
+ polytex.gsub!(key, escape_backslashes(value))
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,44 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Postprocessor
4
+ module Polytex
5
+
6
+ # Removes references to the hypertarget package.
7
+ # TODO: Support hypertarget
8
+ # This isn't a priority, as you get most of what you need
9
+ # with hyperref.
10
+ def remove_hypertarget
11
+ @source.gsub!(/\\hypertarget.*$/, '')
12
+ end
13
+
14
+ # Fixes a kramdown verbatim bug.
15
+ # When converting code, kramdown outputs
16
+ # "\begin{verbatim}foo" instead of
17
+ # "\begin{verbatim}\nfoo".
18
+ def fix_verbatim_bug
19
+ @source.gsub!(/\\begin\{verbatim\}/) { |s| s + "\n" }
20
+ end
21
+
22
+ # Writes the PolyTeX code environments based on the code cache.
23
+ # I.e., code that looks like
24
+ # {lang="ruby"}
25
+ # def foo
26
+ # "bar"
27
+ # end
28
+ # becomes
29
+ # %= lang:ruby
30
+ # \begin{code}
31
+ # def foo
32
+ # "bar"
33
+ # end
34
+ # \end{code}
35
+ # which reduces syntax highlighting to a previously solved problem.
36
+ def write_polytex_code
37
+ code_cache.each do |key, (code, lang, in_codelisting)|
38
+ latex = "%= lang:#{lang}\n\\begin{code}\n#{code}\n\\end{code}"
39
+ @source.gsub!(key, latex)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,23 @@
1
+ # encoding=utf-8
2
+ require 'polytexnic/literal'
3
+ require 'polytexnic/preprocessors/html'
4
+ require 'polytexnic/preprocessors/latex'
5
+ require 'polytexnic/preprocessors/polytex'
6
+
7
+ module Polytexnic
8
+ module Preprocessor
9
+ include Literal
10
+ include Html
11
+ include Latex
12
+ include Polytex
13
+
14
+ # Preprocesses the input based on output format.
15
+ def preprocess(format)
16
+ case format
17
+ when :html then to_xml
18
+ when :latex then to_processed_latex
19
+ when :polytex then to_polytex
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,349 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Preprocessor
4
+ module Html
5
+
6
+ # Converts HTML to XML.
7
+ # The heart of the process is using Tralics to convert the input PolyTeX
8
+ # to XML. The raw PolyTeX needs to be processed first to make everything
9
+ # go smoothly, but after that the steps to producing the corresponding
10
+ # XML is straightforward.
11
+ def to_xml
12
+ polytex = process_for_tralics(@polytex)
13
+ doc = Nokogiri::XML(tralics_xml(polytex))
14
+ add_document_tag(doc)
15
+ @xml = doc.to_xml
16
+ end
17
+
18
+ private
19
+
20
+ # Processes the input PolyTeX for Tralics.
21
+ # The key steps are creating a clean document safe for making global
22
+ # substitutions (gsubs), and then making a bunch of gsubs.
23
+ def process_for_tralics(polytex)
24
+ clean_document(polytex).tap do |output|
25
+ process_spaces(output)
26
+ remove_commands(output)
27
+ hyperrefs(output)
28
+ title_fields(output)
29
+ maketitle(output)
30
+ label_names(output)
31
+ image_names(output)
32
+ restore_eq_labels(output)
33
+ convert_figure_centering(output)
34
+ convert_longtable(output)
35
+ mark_environments(output)
36
+ make_tabular_alignment_cache(output)
37
+ end
38
+ end
39
+
40
+ # Returns a clean document with cached literal environments.
41
+ # This is a key step: we cache literal environments that should be
42
+ # passed through the pipeline with no changes (verbatim, code, etc.).
43
+ # The result is a document that can safely be transformed using
44
+ # global substitutions.
45
+ def clean_document(polytex)
46
+ doc = cache_unicode(cache_literal(add_commands(polytex)))
47
+ inline_verbatim(doc)
48
+ cache_hrefs(doc)
49
+ remove_comments(doc)
50
+ double_backslashes(cache_display_inline_math(doc))
51
+ end
52
+
53
+ # Prepares spaces to be passed through the pipeline.
54
+ # Handles thin spaces ('\,') and normal spaces ('\ '), as well as
55
+ # end-of-sentence spaces.
56
+ def process_spaces(doc)
57
+ doc.gsub!(/\\,/, xmlelement('thinspace'))
58
+ # Match an end of sentence character, while also recognizing
59
+ # things like (Or otherwise.) and ``Yes, indeed!'' as being the
60
+ # ends of sentences.
61
+ end_of_sentence = '[.?!](?:\)|\'+)?'
62
+ # Handle a forced normal space '\ '.
63
+ doc.gsub!(/(#{end_of_sentence})\\ /) do
64
+ $1 + xmlelement('normalspace')
65
+ end
66
+ not_a_capital = '[^A-Z]'
67
+ # Case of "foo. A"
68
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})[ ]+([^\s])/) do
69
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
70
+ end
71
+ # Case of "foo.\n A"
72
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n[ ]+([^\s])/) do
73
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
74
+ end
75
+ # Case of "foo.\nA"
76
+ doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n([^\n])/) do
77
+ $1 + $2 + xmlelement('intersentencespace') + ' ' + $3
78
+ end
79
+ # Handle the manual override to force an inter-sentence space, '\@',
80
+ # as in 'Superman II\@. A new sentence'.
81
+ doc.gsub!(/\\@\. /, '.' + xmlelement('intersentencespace') + ' ')
82
+ end
83
+
84
+ # Removes commands that might screw up Tralics.
85
+ def remove_commands(doc)
86
+ # Determine if we're using footnote symbols.
87
+ symbols_cmd = '\renewcommand{\thefootnote}{\fnsymbol{footnote}}'
88
+ @footnote_symbols = !!doc.match(/^\s*#{Regexp.escape(symbols_cmd)}/)
89
+
90
+ doc.gsub!(/^\s*\\renewcommand.*$/, '')
91
+ end
92
+
93
+ # Returns true if we should use footnote symbols in place of numbers.
94
+ def footnote_symbols?
95
+ @footnote_symbols
96
+ end
97
+
98
+ # Handles \verb environments.
99
+ # LaTeX supports an inline verbatim environment using
100
+ # \verb+<stuff>+
101
+ # The + is arbitrary; any non-letter character is fine as long as it
102
+ # doesn't appear in <stuff>, so this code has exactly the same effect:
103
+ # \verb!<stuff>!
104
+ # \verb@<stuff>@
105
+ # \verb8<stuff>8
106
+ # My preference is to use + or - if available.
107
+ def inline_verbatim(doc)
108
+ doc.gsub!(/\\verb([^A-Za-z])(.*?)\1/) do
109
+ key = digest($2)
110
+ literal_cache[key] = $2
111
+ xmlelement('inlineverbatim') { key }
112
+ end
113
+ end
114
+
115
+ # Removes commented-out lines.
116
+ def remove_comments(output)
117
+ output.gsub!(/[^\\]%.*$/, '')
118
+ end
119
+
120
+ # Converts LaTeX double backslashes to HTML breaks.
121
+ def double_backslashes(string)
122
+ lines = []
123
+ in_table = false
124
+ string.split("\n").each do |line|
125
+ in_table ||= (line =~ /^\s*\\begin{(?:tabular|longtable)}/)
126
+ line.gsub!('\\\\', xmlelement('backslashbreak')) unless in_table
127
+ lines << line
128
+ in_table = (in_table && line !~ /^\s*\\end{tabular}/)
129
+ end
130
+ lines.join("\n")
131
+ end
132
+
133
+ # Adds some default commands.
134
+ def add_commands(polytex)
135
+ line(custom_commands) + tralics_commands + polytex
136
+ end
137
+
138
+ # Pads a string with newlines.
139
+ def line(string)
140
+ "\n#{string}\n"
141
+ end
142
+
143
+ # Handles title fields.
144
+ def title_fields(string)
145
+ %w{title subtitle author date}.each do |field|
146
+ string.gsub! /\\#{field}\{(.*)\}/ do |s|
147
+ maketitle_elements[field] = $1
148
+ ''
149
+ end
150
+ end
151
+ end
152
+
153
+ # Replaces maketitle with an XML element.
154
+ def maketitle(string)
155
+ string.gsub! /\\maketitle/ do |s|
156
+ xmlelement('maketitle')
157
+ end
158
+ end
159
+
160
+ # Preserves label names.
161
+ # Tralics doesn't keep the names of labels, e.g., 'cha:foobar' in
162
+ # '\label{cha:foobar}'. But Tralics supplies a wide variety of
163
+ # pseudo-LaTeX commands to add arbitrary XML elements to the final
164
+ # document. In this case, the \xbox command does the trick. See
165
+ # http://www-sop.inria.fr/marelle/tralics/doc-x.html
166
+ # for more information.
167
+ def label_names(string)
168
+ string.gsub! /\\label\{(.*?)\}/ do |s|
169
+ label = $1.gsub(':', '-').gsub('_', underscore_digest)
170
+ "#{s}\n\\xbox{data-label}{#{label}}"
171
+ end
172
+ end
173
+
174
+ # Handles image names with underscores.
175
+ # This is a terrible kludge, and it's annoying that it's
176
+ # apparently necessary.
177
+ def image_names(string)
178
+ string.gsub! /\\image\{(.*?)\}/ do |s|
179
+ escaped_filename = $1.gsub('_', underscore_digest)
180
+ "\\image{#{escaped_filename}}"
181
+ end
182
+ string.gsub! /\\imagebox\{(.*?)\}/ do |s|
183
+ escaped_filename = $1.gsub('_', underscore_digest)
184
+ "\\imagebox{#{escaped_filename}}"
185
+ end
186
+ end
187
+
188
+ # Restores the equation labels.
189
+ def restore_eq_labels(output)
190
+ math_label_cache.each do |key, label|
191
+ output.gsub!(key, label)
192
+ end
193
+ end
194
+
195
+ # Handles centering in figures.
196
+ # The way we handle generic \begin{center}...\end{center} doesn't
197
+ # work in figures for some reason. Luckily, the preferred method
198
+ # is to use \centering anyway, so this kludge is actually better LaTeX.
199
+ def convert_figure_centering(output)
200
+ @in_figure = false
201
+ centered = output.split("\n").map do |line|
202
+ if line =~ /^\s*\\begin\{figure\}/
203
+ @in_figure = true
204
+ line
205
+ elsif @in_figure && line =~ /^\s*\\begin\{center\}/
206
+ '\centering'
207
+ elsif @in_figure && line =~ /^\s*\\end\{center\}/
208
+ ''
209
+ elsif @in_figure && line =~ /^\s*\\end\{figure\}/
210
+ @in_figure = false
211
+ line
212
+ else
213
+ line
214
+ end
215
+ end.join("\n")
216
+ output.replace(centered)
217
+ end
218
+
219
+ # Converts the longtable environment to simple tabular.
220
+ # This is mainly because kramdown outputs longtables by default,
221
+ # but as a side-effect you can also use longtables in PolyTeX
222
+ # input documents.
223
+ def convert_longtable(output)
224
+ output.gsub!('\begin{longtable}', '\begin{tabular}')
225
+ output.gsub!('\end{longtable}', '\end{tabular}')
226
+ end
227
+
228
+ # Marks environments with their types.
229
+ # Tralics strips some information when processing LaTeX, such as
230
+ # whether a particular div defines a chapter. We remedy this by
231
+ # using the \AddAttToCurrent pseudo-LaTeX command to mark such
232
+ # environments with their types.
233
+ def mark_environments(string)
234
+
235
+ # Marks chapters with a 'chapter' type.
236
+ # Also handles \chapter*.
237
+ string.gsub! /^\s*\\chapter\*?\{(.*)\}/ do |s|
238
+ "#{s}\n\\AddAttToCurrent{type}{chapter}"
239
+ end
240
+
241
+ # Wrap codelistings in a 'codelisting' element.
242
+ string.gsub! /\\begin{codelisting}/ do |s|
243
+ "\\begin{xmlelement*}{codelisting}\n#{s}"
244
+ end
245
+ string.gsub! /\\end{codelisting}/ do |s|
246
+ "#{s}\n\\end{xmlelement*}"
247
+ end
248
+
249
+ # Wrap asides in an 'aside' element.
250
+ string.gsub! /\\begin{aside}/ do |s|
251
+ "\\begin{xmlelement*}{aside}\n#{s}"
252
+ end
253
+ string.gsub! /\\end{aside}/ do |s|
254
+ "#{s}\n\\end{xmlelement*}"
255
+ end
256
+
257
+ # Replace quotations and verse with corresponding XML elements.
258
+ string.gsub! /\\begin{quote}/ do |s|
259
+ quotation = '\AddAttToCurrent{class}{quotation}'
260
+ "\\begin{xmlelement*}{blockquote}\n#{quotation}"
261
+ end
262
+ string.gsub! /\\end{quote}/ do |s|
263
+ "\\end{xmlelement*}"
264
+ end
265
+ string.gsub! /\\begin{verse}/ do |s|
266
+ "\\begin{xmlelement*}{blockquote}\n\\AddAttToCurrent{class}{verse}"
267
+ end
268
+ string.gsub! /\\end{verse}/ do |s|
269
+ "\\end{xmlelement*}"
270
+ end
271
+
272
+ # Handle \begin{center}...\end{center}
273
+ string.gsub! /\\begin{center}/, '\begin{xmlelement*}{center}'
274
+ string.gsub! /\\end{center}/, '\end{xmlelement*}'
275
+
276
+ # Handle \centering
277
+ string.gsub! /\\centering/, '\AddAttToCurrent{class}{center}'
278
+
279
+ # # Handle \image
280
+ # string.gsub! /\\image/, '\includegraphics'
281
+ end
282
+
283
+ # Collects alignment information for tabular environments.
284
+ # We suck out all the stuff like 'l|l|lr' in
285
+ # \begin{tabular}{l|l|lr}
286
+ # The reason is that we need to work around a couple of bugs in Tralics.
287
+ # I've tried in vain to figure out WTF is going on in the Tralics
288
+ # source, but it's easy enough in Ruby so I'm throwing it in here.
289
+ def make_tabular_alignment_cache(output)
290
+ alignment_regex = /^\s*\\begin{tabular}{((?:\|*[lcr]+\|*)+)}/
291
+ @tabular_alignment_cache = output.scan(alignment_regex).flatten
292
+ end
293
+
294
+ # Returns the XML produced by the Tralics program.
295
+ # There is a lot of ugly file manipulation here, but it's fundamentally
296
+ # straightforward. The heart of it is
297
+ #
298
+ # system("#{tralics} -nomathml #{file.path} > log/tralics.log")
299
+ #
300
+ # which writes the converted PolyTeX file as XML, which then gets
301
+ # read in and lightly processed.
302
+ def tralics_xml(polytex)
303
+ file = Tempfile.new(['polytex', '.tex'])
304
+ puts polytex if debug?
305
+ file.write(polytex)
306
+ file.close
307
+ Dir.mkdir 'log' unless File.directory?('log')
308
+ system("#{tralics} -nomathml #{file.path} > log/tralics.log")
309
+ dirname = File.dirname(file.path)
310
+ xml_filename = File.basename(file.path, '.tex') + '.xml'
311
+ raw_xml = File.read(File.join(dirname, xml_filename))
312
+ xml = clean_xml(raw_xml)
313
+ puts xml if debug?
314
+ xml
315
+ ensure
316
+ xmlfile = file.path.sub('.tex', '.xml')
317
+ logfile = file.path.sub('.tex', '.log')
318
+ [xmlfile, logfile].each do |file|
319
+ File.delete(file) if File.exist?(file)
320
+ end
321
+ file.delete
322
+ end
323
+
324
+ # Wraps the whole document in <document></document>.
325
+ # Fragmentary documents come wrapped in 'unknown' tags.
326
+ # Full documents are wrapped in 'std' tags.
327
+ # Change either to 'document' for consistency.
328
+ def add_document_tag(doc)
329
+ %w[unknown std].each do |parent_tag|
330
+ node = doc.at_css(parent_tag)
331
+ node.name = 'document' unless node.nil?
332
+ end
333
+ end
334
+
335
+ def clean_xml(raw_xml)
336
+ nokogiri_ellipsis_workaround(raw_xml)
337
+ end
338
+
339
+ # Fixes a Nokogiri bug.
340
+ # As of this writing, the latest version of Nokogiri (1.5.6) doesn't
341
+ # handle the horizontal ellipsis character '&#133;' correctly in Ruby 2.
342
+ # The kludgy solution is to replace it with '…' in the raw XML,
343
+ # which does work.
344
+ def nokogiri_ellipsis_workaround(raw_xml)
345
+ raw_xml.gsub('&#133;', '…')
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,43 @@
1
+ module Polytexnic
2
+ module Preprocessor
3
+ module Latex
4
+
5
+ def to_processed_latex
6
+ @polytex = polish_tables(process_asides(clean_latex_document))
7
+ end
8
+
9
+ # Returns LaTeX with hashed versions of literal environments.
10
+ # Literal environments are hashed and passed through the pipeline
11
+ # so that we can process things like refs to hyperrefs using gsubs.
12
+ def clean_latex_document
13
+ cache_literal(@polytex, :latex)
14
+ end
15
+
16
+ def polish_tables(text)
17
+ text.tap do
18
+ text.gsub!(/^\s*(\\begin\{table\})/) do
19
+ "#{$1}\n\\begin{center}\n\\small\n"
20
+ end
21
+ text.gsub!(/^\s*(\\end\{table\})/) { "\\end{center}\n#{$1}" }
22
+ end
23
+ end
24
+
25
+ # Processes aside environments.
26
+ # In order to get nice framed & shaded aside boxes, we need to
27
+ # transform the default aside into a new environment.
28
+ def process_asides(text)
29
+ # Transform asides with headings and labels.
30
+ aside_regex = /\\begin{aside}\n\s*
31
+ \\heading{(.*?)}\s*
32
+ \\label{(.*?)}\n
33
+ (.*?)
34
+ \\end{aside}/mx
35
+ text.tap do
36
+ text.gsub!(aside_regex) do
37
+ %(\\begin{shaded_aside}{#{$1}}{#{$2}}\n#{$3}\n\\end{shaded_aside})
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,127 @@
1
+ # encoding=utf-8
2
+ module Polytexnic
3
+ module Preprocessor
4
+ module Polytex
5
+
6
+ # Converts Markdown to PolyTeX.
7
+ # We adopt a unified approach: rather than convert "Markdown" (I use
8
+ # the term loosely*) directly to HTML, we convert it to PolyTeX and
9
+ # then run everything through the PolyTeX pipeline. Happily, kramdown
10
+ # comes equipped with a `to_latex` method that does most of the heavy
11
+ # lifting. The ouput isn't as clean as that produced by Pandoc (our
12
+ # previous choice), but it comes with significant advantages: (1) It's
13
+ # written in Ruby, available as a gem, so its use eliminates an external
14
+ # dependency. (2) It's the foundation for the "Markdown" interpreter
15
+ # used by Leanpub, so by using it ourselves we ensure greater
16
+ # compatibility with Leanpub books.
17
+ #
18
+ # * <rant>The number of mutually incompatible markup languages going
19
+ # by the name "Markdown" is truly mind-boggling. Most of them add things
20
+ # to John Gruber's original Markdown language in an ever-expanding
21
+ # attempt to bolt on the functionality needed to write longer documents.
22
+ # At this point, I fear that "Markdown" has become little more than a
23
+ # marketing term.</rant>
24
+ def to_polytex
25
+ require 'Kramdown'
26
+ cleaned_markdown = cache_code_environments
27
+ cleaned_markdown.tap do |markdown|
28
+ convert_code_inclusion(markdown)
29
+ end
30
+ math_cache = cache_math(cleaned_markdown)
31
+ # Override the header ordering, which starts with 'section' by default.
32
+ lh = 'chapter,section,subsection,subsubsection,paragraph,subparagraph'
33
+ kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
34
+ @source = restore_inclusion(restore_math(kramdown.to_latex, math_cache))
35
+ end
36
+
37
+ def cache_code_environments
38
+ output = []
39
+ lines = @source.split("\n")
40
+ indentation = ' ' * 4
41
+ while (line = lines.shift)
42
+ if line =~ /\{lang="(.*?)"\}/
43
+ language = $1
44
+ code = []
45
+ while (line = lines.shift) && line.match(/^#{indentation}(.*)$/) do
46
+ code << $1
47
+ end
48
+ code = code.join("\n")
49
+ key = digest(code)
50
+ code_cache[key] = [code, language]
51
+ output << key
52
+ output << line
53
+ elsif line =~ /^```\s*$/ # basic code fences
54
+ while (line = lines.shift) && !line.match(/^```\s*$/)
55
+ output << indentation + line
56
+ end
57
+ output << "\n"
58
+ elsif line =~ /^```(\w+)\s*$/ # syntax-highlighted code fences
59
+ language = $1
60
+ code = []
61
+ while (line = lines.shift) && !line.match(/^```\s*$/) do
62
+ code << line
63
+ end
64
+ code = code.join("\n")
65
+ key = digest(code)
66
+ code_cache[key] = [code, language]
67
+ output << key
68
+ else
69
+ output << line
70
+ end
71
+ end
72
+ output.join("\n")
73
+ end
74
+
75
+ # Caches Leanpub-style math.
76
+ # Leanpub uses the notation {$$}...{/$$} for both inline and block math,
77
+ # with the only difference being the presences of newlines:
78
+ # {$$} x^2 {/$$} % inline
79
+ # and
80
+ # {$$}
81
+ # x^2 % block
82
+ # {/$$}
83
+ # I personally hate this notation and convention, but anyone who really
84
+ # cares should just use PolyTeX instead of Markdown.
85
+ def cache_math(text)
86
+ cache = {}
87
+ text.gsub!(/\{\$\$\}\n(.*?)\n\{\/\$\$\}/) do
88
+ key = digest($1)
89
+ cache[[:block, key]] = $1
90
+ key
91
+ end
92
+ text.gsub!(/\{\$\$\}(.*?)\{\/\$\$\}/) do
93
+ key = digest($1)
94
+ cache[[:inline, key]] = $1
95
+ key
96
+ end
97
+ cache
98
+ end
99
+
100
+ # Restores the Markdown math.
101
+ # This is easy because we're running everything through our LaTeX
102
+ # pipeline.
103
+ def restore_math(text, cache)
104
+ cache.each do |(kind, key), value|
105
+ case kind
106
+ when :inline
107
+ open = '\('
108
+ close = '\)'
109
+ when :block
110
+ open = '\[' + "\n"
111
+ close = "\n" + '\]'
112
+ end
113
+ text.gsub!(key, open + value + close)
114
+ end
115
+ text
116
+ end
117
+ end
118
+
119
+ # Adds support for <<(path/to/code) inclusion.
120
+ def convert_code_inclusion(text)
121
+ text.gsub!(/^\s*<<(\(.*?\))/) { "<!-- inclusion= <<#{$1}-->" }
122
+ end
123
+ def restore_inclusion(text)
124
+ text.gsub(/% <!-- inclusion= (.*?)-->/) { "%= #{$1}" }
125
+ end
126
+ end
127
+ end