polytexnic 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.pull_requests/1371777257 +0 -0
- data/.pull_requests/1371927975 +0 -0
- data/.pull_requests/1372804345 +0 -0
- data/.pull_requests/1374784075 +0 -0
- data/.pull_requests/1375304853 +0 -0
- data/.pull_requests/1375408308 +0 -0
- data/.pull_requests/1375409462 +0 -0
- data/.pull_requests/1375410668 +0 -0
- data/.pull_requests/1375472132 +0 -0
- data/.pull_requests/1375485496 +0 -0
- data/.pull_requests/1375487548 +0 -0
- data/.pull_requests/1375492835 +0 -0
- data/.pull_requests/1375497765 +0 -0
- data/.pull_requests/1375559547 +0 -0
- data/.pull_requests/1375589063 +0 -0
- data/.pull_requests/1375841786 +0 -0
- data/.pull_requests/1376352634 +0 -0
- data/.pull_requests/1376353299 +0 -0
- data/.pull_requests/1376449284 +0 -0
- data/.pull_requests/1376452696 +0 -0
- data/.pull_requests/1376454166 +0 -0
- data/.pull_requests/1376532291 +0 -0
- data/.pull_requests/1376625487 +0 -0
- data/.pull_requests/1376690108 +0 -0
- data/.pull_requests/1376699046 +0 -0
- data/.pull_requests/1376707642 +0 -0
- data/.pull_requests/1377230284 +0 -0
- data/.pull_requests/1379118478 +0 -0
- data/.pull_requests/1379123150 +0 -0
- data/.pull_requests/1380221847 +0 -0
- data/.pull_requests/1380589654 +0 -0
- data/.pull_requests/1380673142 +0 -0
- data/.pull_requests/1380850800 +0 -0
- data/.pull_requests/1381001264 +0 -0
- data/.pull_requests/1381005204 +0 -0
- data/.pull_requests/1381103022 +0 -0
- data/.pull_requests/1381252832 +0 -0
- data/.pull_requests/1381276624 +0 -0
- data/.pull_requests/1381344234 +0 -0
- data/.pull_requests/1381385297 +0 -0
- data/.pull_requests/1381427498 +0 -0
- data/.pull_requests/1381429761 +0 -0
- data/.pull_requests/1381873684 +0 -0
- data/.pull_requests/1382045490 +0 -0
- data/.pull_requests/1382056384 +0 -0
- data/.pull_requests/1382405223 +0 -0
- data/.pull_requests/1382478400 +0 -0
- data/.pull_requests/1382479780 +0 -0
- data/.pull_requests/1382485483 +0 -0
- data/.pull_requests/1382569911 +0 -0
- data/.pull_requests/1382646199 +0 -0
- data/.pull_requests/1382649778 +0 -0
- data/.pull_requests/1382660987 +0 -0
- data/.pull_requests/1382743927 +0 -0
- data/.pull_requests/1382840347 +0 -0
- data/.pull_requests/1383077676 +0 -0
- data/.pull_requests/1383086948 +0 -0
- data/.pull_requests/1383161978 +0 -0
- data/.pull_requests/1383263695 +0 -0
- data/.pull_requests/1383274008 +0 -0
- data/.pull_requests/1383327328 +0 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Guardfile +15 -0
- data/LICENSE.txt +22 -0
- data/README.md +21 -0
- data/Rakefile +2 -0
- data/lib/polytexnic/literal.rb +299 -0
- data/lib/polytexnic/postprocessor.rb +28 -0
- data/lib/polytexnic/postprocessors/html.rb +1139 -0
- data/lib/polytexnic/postprocessors/latex.rb +18 -0
- data/lib/polytexnic/postprocessors/polytex.rb +44 -0
- data/lib/polytexnic/preprocessor.rb +23 -0
- data/lib/polytexnic/preprocessors/html.rb +349 -0
- data/lib/polytexnic/preprocessors/latex.rb +43 -0
- data/lib/polytexnic/preprocessors/polytex.rb +127 -0
- data/lib/polytexnic/utils.rb +176 -0
- data/lib/polytexnic/version.rb +3 -0
- data/lib/polytexnic.rb +92 -0
- data/notes/pandoc.md +41 -0
- data/polytexnic.gemspec +28 -0
- data/polytexnic_commands.sty +5 -0
- data/precompiled_binaries/tralics +0 -0
- data/spec/fixtures/code_listing.tex +14 -0
- data/spec/fixtures/figures.tex +8 -0
- data/spec/fixtures/inline_math.html +4 -0
- data/spec/fixtures/inline_math.tex +3 -0
- data/spec/fixtures/math_environments.html +50 -0
- data/spec/fixtures/math_environments.tex +56 -0
- data/spec/fixtures/section_xrefs.tex +9 -0
- data/spec/fixtures/sidebar.tex +10 -0
- data/spec/fixtures/tables.tex +8 -0
- data/spec/fixtures/verbatim_environments.html +11 -0
- data/spec/fixtures/verbatim_environments.tex +13 -0
- data/spec/integration_spec.rb +34 -0
- data/spec/markdown_to_polytex_spec.rb +192 -0
- data/spec/resemble_matcher_spec.rb +69 -0
- data/spec/spec_helper.rb +38 -0
- data/spec/support/resemble_matcher.rb +100 -0
- data/spec/to_html/asides_spec.rb +42 -0
- data/spec/to_html/chapters_and_sections_spec.rb +268 -0
- data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
- data/spec/to_html/codelistings_spec.rb +70 -0
- data/spec/to_html/core_spec.rb +227 -0
- data/spec/to_html/eqref_spec.rb +32 -0
- data/spec/to_html/footnote_spec.rb +164 -0
- data/spec/to_html/graphics_and_figures_spec.rb +358 -0
- data/spec/to_html/lists_spec.rb +103 -0
- data/spec/to_html/literal_environments/code_spec.rb +141 -0
- data/spec/to_html/literal_environments/math_spec.rb +255 -0
- data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
- data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
- data/spec/to_html/quotations_and_verse_spec.rb +86 -0
- data/spec/to_html/table_of_contents_spec.rb +93 -0
- data/spec/to_html/table_spec.rb +269 -0
- data/spec/to_html/text_formatting_spec.rb +50 -0
- data/spec/to_latex_spec.rb +197 -0
- data/tasks/bin/ruby_tests +41 -0
- data/tasks/run_tests_with_both_rubies.rake +5 -0
- data/tmp/.gitkeep +0 -0
- metadata +286 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'polytexnic/literal'
|
2
|
+
|
3
|
+
module Polytexnic
|
4
|
+
module Postprocessor
|
5
|
+
module Latex
|
6
|
+
|
7
|
+
# Restores literal environments (verbatim, code, math, etc.).
|
8
|
+
def replace_hashes(polytex)
|
9
|
+
puts polytex if debug?
|
10
|
+
polytex.tap do
|
11
|
+
literal_cache.each do |key, value|
|
12
|
+
polytex.gsub!(key, escape_backslashes(value))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module Polytexnic
|
3
|
+
module Postprocessor
|
4
|
+
module Polytex
|
5
|
+
|
6
|
+
# Removes references to the hypertarget package.
|
7
|
+
# TODO: Support hypertarget
|
8
|
+
# This isn't a priority, as you get most of what you need
|
9
|
+
# with hyperref.
|
10
|
+
def remove_hypertarget
|
11
|
+
@source.gsub!(/\\hypertarget.*$/, '')
|
12
|
+
end
|
13
|
+
|
14
|
+
# Fixes a kramdown verbatim bug.
|
15
|
+
# When converting code, kramdown outputs
|
16
|
+
# "\begin{verbatim}foo" instead of
|
17
|
+
# "\begin{verbatim}\nfoo".
|
18
|
+
def fix_verbatim_bug
|
19
|
+
@source.gsub!(/\\begin\{verbatim\}/) { |s| s + "\n" }
|
20
|
+
end
|
21
|
+
|
22
|
+
# Writes the PolyTeX code environments based on the code cache.
|
23
|
+
# I.e., code that looks like
|
24
|
+
# {lang="ruby"}
|
25
|
+
# def foo
|
26
|
+
# "bar"
|
27
|
+
# end
|
28
|
+
# becomes
|
29
|
+
# %= lang:ruby
|
30
|
+
# \begin{code}
|
31
|
+
# def foo
|
32
|
+
# "bar"
|
33
|
+
# end
|
34
|
+
# \end{code}
|
35
|
+
# which reduces syntax highlighting to a previously solved problem.
|
36
|
+
def write_polytex_code
|
37
|
+
code_cache.each do |key, (code, lang, in_codelisting)|
|
38
|
+
latex = "%= lang:#{lang}\n\\begin{code}\n#{code}\n\\end{code}"
|
39
|
+
@source.gsub!(key, latex)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
require 'polytexnic/literal'
|
3
|
+
require 'polytexnic/preprocessors/html'
|
4
|
+
require 'polytexnic/preprocessors/latex'
|
5
|
+
require 'polytexnic/preprocessors/polytex'
|
6
|
+
|
7
|
+
module Polytexnic
|
8
|
+
module Preprocessor
|
9
|
+
include Literal
|
10
|
+
include Html
|
11
|
+
include Latex
|
12
|
+
include Polytex
|
13
|
+
|
14
|
+
# Preprocesses the input based on output format.
|
15
|
+
def preprocess(format)
|
16
|
+
case format
|
17
|
+
when :html then to_xml
|
18
|
+
when :latex then to_processed_latex
|
19
|
+
when :polytex then to_polytex
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,349 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module Polytexnic
|
3
|
+
module Preprocessor
|
4
|
+
module Html
|
5
|
+
|
6
|
+
# Converts HTML to XML.
|
7
|
+
# The heart of the process is using Tralics to convert the input PolyTeX
|
8
|
+
# to XML. The raw PolyTeX needs to be processed first to make everything
|
9
|
+
# go smoothly, but after that the steps to producing the corresponding
|
10
|
+
# XML is straightforward.
|
11
|
+
def to_xml
|
12
|
+
polytex = process_for_tralics(@polytex)
|
13
|
+
doc = Nokogiri::XML(tralics_xml(polytex))
|
14
|
+
add_document_tag(doc)
|
15
|
+
@xml = doc.to_xml
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Processes the input PolyTeX for Tralics.
|
21
|
+
# The key steps are creating a clean document safe for making global
|
22
|
+
# substitutions (gsubs), and then making a bunch of gsubs.
|
23
|
+
def process_for_tralics(polytex)
|
24
|
+
clean_document(polytex).tap do |output|
|
25
|
+
process_spaces(output)
|
26
|
+
remove_commands(output)
|
27
|
+
hyperrefs(output)
|
28
|
+
title_fields(output)
|
29
|
+
maketitle(output)
|
30
|
+
label_names(output)
|
31
|
+
image_names(output)
|
32
|
+
restore_eq_labels(output)
|
33
|
+
convert_figure_centering(output)
|
34
|
+
convert_longtable(output)
|
35
|
+
mark_environments(output)
|
36
|
+
make_tabular_alignment_cache(output)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns a clean document with cached literal environments.
|
41
|
+
# This is a key step: we cache literal environments that should be
|
42
|
+
# passed through the pipeline with no changes (verbatim, code, etc.).
|
43
|
+
# The result is a document that can safely be transformed using
|
44
|
+
# global substitutions.
|
45
|
+
def clean_document(polytex)
|
46
|
+
doc = cache_unicode(cache_literal(add_commands(polytex)))
|
47
|
+
inline_verbatim(doc)
|
48
|
+
cache_hrefs(doc)
|
49
|
+
remove_comments(doc)
|
50
|
+
double_backslashes(cache_display_inline_math(doc))
|
51
|
+
end
|
52
|
+
|
53
|
+
# Prepares spaces to be passed through the pipeline.
|
54
|
+
# Handles thin spaces ('\,') and normal spaces ('\ '), as well as
|
55
|
+
# end-of-sentence spaces.
|
56
|
+
def process_spaces(doc)
|
57
|
+
doc.gsub!(/\\,/, xmlelement('thinspace'))
|
58
|
+
# Match an end of sentence character, while also recognizing
|
59
|
+
# things like (Or otherwise.) and ``Yes, indeed!'' as being the
|
60
|
+
# ends of sentences.
|
61
|
+
end_of_sentence = '[.?!](?:\)|\'+)?'
|
62
|
+
# Handle a forced normal space '\ '.
|
63
|
+
doc.gsub!(/(#{end_of_sentence})\\ /) do
|
64
|
+
$1 + xmlelement('normalspace')
|
65
|
+
end
|
66
|
+
not_a_capital = '[^A-Z]'
|
67
|
+
# Case of "foo. A"
|
68
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})[ ]+([^\s])/) do
|
69
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
70
|
+
end
|
71
|
+
# Case of "foo.\n A"
|
72
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n[ ]+([^\s])/) do
|
73
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
74
|
+
end
|
75
|
+
# Case of "foo.\nA"
|
76
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n([^\n])/) do
|
77
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
78
|
+
end
|
79
|
+
# Handle the manual override to force an inter-sentence space, '\@',
|
80
|
+
# as in 'Superman II\@. A new sentence'.
|
81
|
+
doc.gsub!(/\\@\. /, '.' + xmlelement('intersentencespace') + ' ')
|
82
|
+
end
|
83
|
+
|
84
|
+
# Removes commands that might screw up Tralics.
|
85
|
+
def remove_commands(doc)
|
86
|
+
# Determine if we're using footnote symbols.
|
87
|
+
symbols_cmd = '\renewcommand{\thefootnote}{\fnsymbol{footnote}}'
|
88
|
+
@footnote_symbols = !!doc.match(/^\s*#{Regexp.escape(symbols_cmd)}/)
|
89
|
+
|
90
|
+
doc.gsub!(/^\s*\\renewcommand.*$/, '')
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns true if we should use footnote symbols in place of numbers.
|
94
|
+
def footnote_symbols?
|
95
|
+
@footnote_symbols
|
96
|
+
end
|
97
|
+
|
98
|
+
# Handles \verb environments.
|
99
|
+
# LaTeX supports an inline verbatim environment using
|
100
|
+
# \verb+<stuff>+
|
101
|
+
# The + is arbitrary; any non-letter character is fine as long as it
|
102
|
+
# doesn't appear in <stuff>, so this code has exactly the same effect:
|
103
|
+
# \verb!<stuff>!
|
104
|
+
# \verb@<stuff>@
|
105
|
+
# \verb8<stuff>8
|
106
|
+
# My preference is to use + or - if available.
|
107
|
+
def inline_verbatim(doc)
|
108
|
+
doc.gsub!(/\\verb([^A-Za-z])(.*?)\1/) do
|
109
|
+
key = digest($2)
|
110
|
+
literal_cache[key] = $2
|
111
|
+
xmlelement('inlineverbatim') { key }
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Removes commented-out lines.
|
116
|
+
def remove_comments(output)
|
117
|
+
output.gsub!(/[^\\]%.*$/, '')
|
118
|
+
end
|
119
|
+
|
120
|
+
# Converts LaTeX double backslashes to HTML breaks.
|
121
|
+
def double_backslashes(string)
|
122
|
+
lines = []
|
123
|
+
in_table = false
|
124
|
+
string.split("\n").each do |line|
|
125
|
+
in_table ||= (line =~ /^\s*\\begin{(?:tabular|longtable)}/)
|
126
|
+
line.gsub!('\\\\', xmlelement('backslashbreak')) unless in_table
|
127
|
+
lines << line
|
128
|
+
in_table = (in_table && line !~ /^\s*\\end{tabular}/)
|
129
|
+
end
|
130
|
+
lines.join("\n")
|
131
|
+
end
|
132
|
+
|
133
|
+
# Adds some default commands.
|
134
|
+
def add_commands(polytex)
|
135
|
+
line(custom_commands) + tralics_commands + polytex
|
136
|
+
end
|
137
|
+
|
138
|
+
# Pads a string with newlines.
|
139
|
+
def line(string)
|
140
|
+
"\n#{string}\n"
|
141
|
+
end
|
142
|
+
|
143
|
+
# Handles title fields.
|
144
|
+
def title_fields(string)
|
145
|
+
%w{title subtitle author date}.each do |field|
|
146
|
+
string.gsub! /\\#{field}\{(.*)\}/ do |s|
|
147
|
+
maketitle_elements[field] = $1
|
148
|
+
''
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# Replaces maketitle with an XML element.
|
154
|
+
def maketitle(string)
|
155
|
+
string.gsub! /\\maketitle/ do |s|
|
156
|
+
xmlelement('maketitle')
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Preserves label names.
|
161
|
+
# Tralics doesn't keep the names of labels, e.g., 'cha:foobar' in
|
162
|
+
# '\label{cha:foobar}'. But Tralics supplies a wide variety of
|
163
|
+
# pseudo-LaTeX commands to add arbitrary XML elements to the final
|
164
|
+
# document. In this case, the \xbox command does the trick. See
|
165
|
+
# http://www-sop.inria.fr/marelle/tralics/doc-x.html
|
166
|
+
# for more information.
|
167
|
+
def label_names(string)
|
168
|
+
string.gsub! /\\label\{(.*?)\}/ do |s|
|
169
|
+
label = $1.gsub(':', '-').gsub('_', underscore_digest)
|
170
|
+
"#{s}\n\\xbox{data-label}{#{label}}"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Handles image names with underscores.
|
175
|
+
# This is a terrible kludge, and it's annoying that it's
|
176
|
+
# apparently necessary.
|
177
|
+
def image_names(string)
|
178
|
+
string.gsub! /\\image\{(.*?)\}/ do |s|
|
179
|
+
escaped_filename = $1.gsub('_', underscore_digest)
|
180
|
+
"\\image{#{escaped_filename}}"
|
181
|
+
end
|
182
|
+
string.gsub! /\\imagebox\{(.*?)\}/ do |s|
|
183
|
+
escaped_filename = $1.gsub('_', underscore_digest)
|
184
|
+
"\\imagebox{#{escaped_filename}}"
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Restores the equation labels.
|
189
|
+
def restore_eq_labels(output)
|
190
|
+
math_label_cache.each do |key, label|
|
191
|
+
output.gsub!(key, label)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Handles centering in figures.
|
196
|
+
# The way we handle generic \begin{center}...\end{center} doesn't
|
197
|
+
# work in figures for some reason. Luckily, the preferred method
|
198
|
+
# is to use \centering anyway, so this kludge is actually better LaTeX.
|
199
|
+
def convert_figure_centering(output)
|
200
|
+
@in_figure = false
|
201
|
+
centered = output.split("\n").map do |line|
|
202
|
+
if line =~ /^\s*\\begin\{figure\}/
|
203
|
+
@in_figure = true
|
204
|
+
line
|
205
|
+
elsif @in_figure && line =~ /^\s*\\begin\{center\}/
|
206
|
+
'\centering'
|
207
|
+
elsif @in_figure && line =~ /^\s*\\end\{center\}/
|
208
|
+
''
|
209
|
+
elsif @in_figure && line =~ /^\s*\\end\{figure\}/
|
210
|
+
@in_figure = false
|
211
|
+
line
|
212
|
+
else
|
213
|
+
line
|
214
|
+
end
|
215
|
+
end.join("\n")
|
216
|
+
output.replace(centered)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Converts the longtable environment to simple tabular.
|
220
|
+
# This is mainly because kramdown outputs longtables by default,
|
221
|
+
# but as a side-effect you can also use longtables in PolyTeX
|
222
|
+
# input documents.
|
223
|
+
def convert_longtable(output)
|
224
|
+
output.gsub!('\begin{longtable}', '\begin{tabular}')
|
225
|
+
output.gsub!('\end{longtable}', '\end{tabular}')
|
226
|
+
end
|
227
|
+
|
228
|
+
# Marks environments with their types.
|
229
|
+
# Tralics strips some information when processing LaTeX, such as
|
230
|
+
# whether a particular div defines a chapter. We remedy this by
|
231
|
+
# using the \AddAttToCurrent pseudo-LaTeX command to mark such
|
232
|
+
# environments with their types.
|
233
|
+
def mark_environments(string)
|
234
|
+
|
235
|
+
# Marks chapters with a 'chapter' type.
|
236
|
+
# Also handles \chapter*.
|
237
|
+
string.gsub! /^\s*\\chapter\*?\{(.*)\}/ do |s|
|
238
|
+
"#{s}\n\\AddAttToCurrent{type}{chapter}"
|
239
|
+
end
|
240
|
+
|
241
|
+
# Wrap codelistings in a 'codelisting' element.
|
242
|
+
string.gsub! /\\begin{codelisting}/ do |s|
|
243
|
+
"\\begin{xmlelement*}{codelisting}\n#{s}"
|
244
|
+
end
|
245
|
+
string.gsub! /\\end{codelisting}/ do |s|
|
246
|
+
"#{s}\n\\end{xmlelement*}"
|
247
|
+
end
|
248
|
+
|
249
|
+
# Wrap asides in an 'aside' element.
|
250
|
+
string.gsub! /\\begin{aside}/ do |s|
|
251
|
+
"\\begin{xmlelement*}{aside}\n#{s}"
|
252
|
+
end
|
253
|
+
string.gsub! /\\end{aside}/ do |s|
|
254
|
+
"#{s}\n\\end{xmlelement*}"
|
255
|
+
end
|
256
|
+
|
257
|
+
# Replace quotations and verse with corresponding XML elements.
|
258
|
+
string.gsub! /\\begin{quote}/ do |s|
|
259
|
+
quotation = '\AddAttToCurrent{class}{quotation}'
|
260
|
+
"\\begin{xmlelement*}{blockquote}\n#{quotation}"
|
261
|
+
end
|
262
|
+
string.gsub! /\\end{quote}/ do |s|
|
263
|
+
"\\end{xmlelement*}"
|
264
|
+
end
|
265
|
+
string.gsub! /\\begin{verse}/ do |s|
|
266
|
+
"\\begin{xmlelement*}{blockquote}\n\\AddAttToCurrent{class}{verse}"
|
267
|
+
end
|
268
|
+
string.gsub! /\\end{verse}/ do |s|
|
269
|
+
"\\end{xmlelement*}"
|
270
|
+
end
|
271
|
+
|
272
|
+
# Handle \begin{center}...\end{center}
|
273
|
+
string.gsub! /\\begin{center}/, '\begin{xmlelement*}{center}'
|
274
|
+
string.gsub! /\\end{center}/, '\end{xmlelement*}'
|
275
|
+
|
276
|
+
# Handle \centering
|
277
|
+
string.gsub! /\\centering/, '\AddAttToCurrent{class}{center}'
|
278
|
+
|
279
|
+
# # Handle \image
|
280
|
+
# string.gsub! /\\image/, '\includegraphics'
|
281
|
+
end
|
282
|
+
|
283
|
+
# Collects alignment information for tabular environments.
|
284
|
+
# We suck out all the stuff like 'l|l|lr' in
|
285
|
+
# \begin{tabular}{l|l|lr}
|
286
|
+
# The reason is that we need to work around a couple of bugs in Tralics.
|
287
|
+
# I've tried in vain to figure out WTF is going on in the Tralics
|
288
|
+
# source, but it's easy enough in Ruby so I'm throwing it in here.
|
289
|
+
def make_tabular_alignment_cache(output)
|
290
|
+
alignment_regex = /^\s*\\begin{tabular}{((?:\|*[lcr]+\|*)+)}/
|
291
|
+
@tabular_alignment_cache = output.scan(alignment_regex).flatten
|
292
|
+
end
|
293
|
+
|
294
|
+
# Returns the XML produced by the Tralics program.
|
295
|
+
# There is a lot of ugly file manipulation here, but it's fundamentally
|
296
|
+
# straightforward. The heart of it is
|
297
|
+
#
|
298
|
+
# system("#{tralics} -nomathml #{file.path} > log/tralics.log")
|
299
|
+
#
|
300
|
+
# which writes the converted PolyTeX file as XML, which then gets
|
301
|
+
# read in and lightly processed.
|
302
|
+
def tralics_xml(polytex)
|
303
|
+
file = Tempfile.new(['polytex', '.tex'])
|
304
|
+
puts polytex if debug?
|
305
|
+
file.write(polytex)
|
306
|
+
file.close
|
307
|
+
Dir.mkdir 'log' unless File.directory?('log')
|
308
|
+
system("#{tralics} -nomathml #{file.path} > log/tralics.log")
|
309
|
+
dirname = File.dirname(file.path)
|
310
|
+
xml_filename = File.basename(file.path, '.tex') + '.xml'
|
311
|
+
raw_xml = File.read(File.join(dirname, xml_filename))
|
312
|
+
xml = clean_xml(raw_xml)
|
313
|
+
puts xml if debug?
|
314
|
+
xml
|
315
|
+
ensure
|
316
|
+
xmlfile = file.path.sub('.tex', '.xml')
|
317
|
+
logfile = file.path.sub('.tex', '.log')
|
318
|
+
[xmlfile, logfile].each do |file|
|
319
|
+
File.delete(file) if File.exist?(file)
|
320
|
+
end
|
321
|
+
file.delete
|
322
|
+
end
|
323
|
+
|
324
|
+
# Wraps the whole document in <document></document>.
|
325
|
+
# Fragmentary documents come wrapped in 'unknown' tags.
|
326
|
+
# Full documents are wrapped in 'std' tags.
|
327
|
+
# Change either to 'document' for consistency.
|
328
|
+
def add_document_tag(doc)
|
329
|
+
%w[unknown std].each do |parent_tag|
|
330
|
+
node = doc.at_css(parent_tag)
|
331
|
+
node.name = 'document' unless node.nil?
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
def clean_xml(raw_xml)
|
336
|
+
nokogiri_ellipsis_workaround(raw_xml)
|
337
|
+
end
|
338
|
+
|
339
|
+
# Fixes a Nokogiri bug.
|
340
|
+
# As of this writing, the latest version of Nokogiri (1.5.6) doesn't
|
341
|
+
# handle the horizontal ellipsis character '…' correctly in Ruby 2.
|
342
|
+
# The kludgy solution is to replace it with '…' in the raw XML,
|
343
|
+
# which does work.
|
344
|
+
def nokogiri_ellipsis_workaround(raw_xml)
|
345
|
+
raw_xml.gsub('…', '…')
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Polytexnic
|
2
|
+
module Preprocessor
|
3
|
+
module Latex
|
4
|
+
|
5
|
+
def to_processed_latex
|
6
|
+
@polytex = polish_tables(process_asides(clean_latex_document))
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns LaTeX with hashed versions of literal environments.
|
10
|
+
# Literal environments are hashed and passed through the pipeline
|
11
|
+
# so that we can process things like refs to hyperrefs using gsubs.
|
12
|
+
def clean_latex_document
|
13
|
+
cache_literal(@polytex, :latex)
|
14
|
+
end
|
15
|
+
|
16
|
+
def polish_tables(text)
|
17
|
+
text.tap do
|
18
|
+
text.gsub!(/^\s*(\\begin\{table\})/) do
|
19
|
+
"#{$1}\n\\begin{center}\n\\small\n"
|
20
|
+
end
|
21
|
+
text.gsub!(/^\s*(\\end\{table\})/) { "\\end{center}\n#{$1}" }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Processes aside environments.
|
26
|
+
# In order to get nice framed & shaded aside boxes, we need to
|
27
|
+
# transform the default aside into a new environment.
|
28
|
+
def process_asides(text)
|
29
|
+
# Transform asides with headings and labels.
|
30
|
+
aside_regex = /\\begin{aside}\n\s*
|
31
|
+
\\heading{(.*?)}\s*
|
32
|
+
\\label{(.*?)}\n
|
33
|
+
(.*?)
|
34
|
+
\\end{aside}/mx
|
35
|
+
text.tap do
|
36
|
+
text.gsub!(aside_regex) do
|
37
|
+
%(\\begin{shaded_aside}{#{$1}}{#{$2}}\n#{$3}\n\\end{shaded_aside})
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module Polytexnic
|
3
|
+
module Preprocessor
|
4
|
+
module Polytex
|
5
|
+
|
6
|
+
# Converts Markdown to PolyTeX.
|
7
|
+
# We adopt a unified approach: rather than convert "Markdown" (I use
|
8
|
+
# the term loosely*) directly to HTML, we convert it to PolyTeX and
|
9
|
+
# then run everything through the PolyTeX pipeline. Happily, kramdown
|
10
|
+
# comes equipped with a `to_latex` method that does most of the heavy
|
11
|
+
# lifting. The ouput isn't as clean as that produced by Pandoc (our
|
12
|
+
# previous choice), but it comes with significant advantages: (1) It's
|
13
|
+
# written in Ruby, available as a gem, so its use eliminates an external
|
14
|
+
# dependency. (2) It's the foundation for the "Markdown" interpreter
|
15
|
+
# used by Leanpub, so by using it ourselves we ensure greater
|
16
|
+
# compatibility with Leanpub books.
|
17
|
+
#
|
18
|
+
# * <rant>The number of mutually incompatible markup languages going
|
19
|
+
# by the name "Markdown" is truly mind-boggling. Most of them add things
|
20
|
+
# to John Gruber's original Markdown language in an ever-expanding
|
21
|
+
# attempt to bolt on the functionality needed to write longer documents.
|
22
|
+
# At this point, I fear that "Markdown" has become little more than a
|
23
|
+
# marketing term.</rant>
|
24
|
+
def to_polytex
|
25
|
+
require 'Kramdown'
|
26
|
+
cleaned_markdown = cache_code_environments
|
27
|
+
cleaned_markdown.tap do |markdown|
|
28
|
+
convert_code_inclusion(markdown)
|
29
|
+
end
|
30
|
+
math_cache = cache_math(cleaned_markdown)
|
31
|
+
# Override the header ordering, which starts with 'section' by default.
|
32
|
+
lh = 'chapter,section,subsection,subsubsection,paragraph,subparagraph'
|
33
|
+
kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
|
34
|
+
@source = restore_inclusion(restore_math(kramdown.to_latex, math_cache))
|
35
|
+
end
|
36
|
+
|
37
|
+
def cache_code_environments
|
38
|
+
output = []
|
39
|
+
lines = @source.split("\n")
|
40
|
+
indentation = ' ' * 4
|
41
|
+
while (line = lines.shift)
|
42
|
+
if line =~ /\{lang="(.*?)"\}/
|
43
|
+
language = $1
|
44
|
+
code = []
|
45
|
+
while (line = lines.shift) && line.match(/^#{indentation}(.*)$/) do
|
46
|
+
code << $1
|
47
|
+
end
|
48
|
+
code = code.join("\n")
|
49
|
+
key = digest(code)
|
50
|
+
code_cache[key] = [code, language]
|
51
|
+
output << key
|
52
|
+
output << line
|
53
|
+
elsif line =~ /^```\s*$/ # basic code fences
|
54
|
+
while (line = lines.shift) && !line.match(/^```\s*$/)
|
55
|
+
output << indentation + line
|
56
|
+
end
|
57
|
+
output << "\n"
|
58
|
+
elsif line =~ /^```(\w+)\s*$/ # syntax-highlighted code fences
|
59
|
+
language = $1
|
60
|
+
code = []
|
61
|
+
while (line = lines.shift) && !line.match(/^```\s*$/) do
|
62
|
+
code << line
|
63
|
+
end
|
64
|
+
code = code.join("\n")
|
65
|
+
key = digest(code)
|
66
|
+
code_cache[key] = [code, language]
|
67
|
+
output << key
|
68
|
+
else
|
69
|
+
output << line
|
70
|
+
end
|
71
|
+
end
|
72
|
+
output.join("\n")
|
73
|
+
end
|
74
|
+
|
75
|
+
# Caches Leanpub-style math.
|
76
|
+
# Leanpub uses the notation {$$}...{/$$} for both inline and block math,
|
77
|
+
# with the only difference being the presences of newlines:
|
78
|
+
# {$$} x^2 {/$$} % inline
|
79
|
+
# and
|
80
|
+
# {$$}
|
81
|
+
# x^2 % block
|
82
|
+
# {/$$}
|
83
|
+
# I personally hate this notation and convention, but anyone who really
|
84
|
+
# cares should just use PolyTeX instead of Markdown.
|
85
|
+
def cache_math(text)
|
86
|
+
cache = {}
|
87
|
+
text.gsub!(/\{\$\$\}\n(.*?)\n\{\/\$\$\}/) do
|
88
|
+
key = digest($1)
|
89
|
+
cache[[:block, key]] = $1
|
90
|
+
key
|
91
|
+
end
|
92
|
+
text.gsub!(/\{\$\$\}(.*?)\{\/\$\$\}/) do
|
93
|
+
key = digest($1)
|
94
|
+
cache[[:inline, key]] = $1
|
95
|
+
key
|
96
|
+
end
|
97
|
+
cache
|
98
|
+
end
|
99
|
+
|
100
|
+
# Restores the Markdown math.
|
101
|
+
# This is easy because we're running everything through our LaTeX
|
102
|
+
# pipeline.
|
103
|
+
def restore_math(text, cache)
|
104
|
+
cache.each do |(kind, key), value|
|
105
|
+
case kind
|
106
|
+
when :inline
|
107
|
+
open = '\('
|
108
|
+
close = '\)'
|
109
|
+
when :block
|
110
|
+
open = '\[' + "\n"
|
111
|
+
close = "\n" + '\]'
|
112
|
+
end
|
113
|
+
text.gsub!(key, open + value + close)
|
114
|
+
end
|
115
|
+
text
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Adds support for <<(path/to/code) inclusion.
|
120
|
+
def convert_code_inclusion(text)
|
121
|
+
text.gsub!(/^\s*<<(\(.*?\))/) { "<!-- inclusion= <<#{$1}-->" }
|
122
|
+
end
|
123
|
+
def restore_inclusion(text)
|
124
|
+
text.gsub(/% <!-- inclusion= (.*?)-->/) { "%= #{$1}" }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|