polytexnic 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.pull_requests/1371777257 +0 -0
- data/.pull_requests/1371927975 +0 -0
- data/.pull_requests/1372804345 +0 -0
- data/.pull_requests/1374784075 +0 -0
- data/.pull_requests/1375304853 +0 -0
- data/.pull_requests/1375408308 +0 -0
- data/.pull_requests/1375409462 +0 -0
- data/.pull_requests/1375410668 +0 -0
- data/.pull_requests/1375472132 +0 -0
- data/.pull_requests/1375485496 +0 -0
- data/.pull_requests/1375487548 +0 -0
- data/.pull_requests/1375492835 +0 -0
- data/.pull_requests/1375497765 +0 -0
- data/.pull_requests/1375559547 +0 -0
- data/.pull_requests/1375589063 +0 -0
- data/.pull_requests/1375841786 +0 -0
- data/.pull_requests/1376352634 +0 -0
- data/.pull_requests/1376353299 +0 -0
- data/.pull_requests/1376449284 +0 -0
- data/.pull_requests/1376452696 +0 -0
- data/.pull_requests/1376454166 +0 -0
- data/.pull_requests/1376532291 +0 -0
- data/.pull_requests/1376625487 +0 -0
- data/.pull_requests/1376690108 +0 -0
- data/.pull_requests/1376699046 +0 -0
- data/.pull_requests/1376707642 +0 -0
- data/.pull_requests/1377230284 +0 -0
- data/.pull_requests/1379118478 +0 -0
- data/.pull_requests/1379123150 +0 -0
- data/.pull_requests/1380221847 +0 -0
- data/.pull_requests/1380589654 +0 -0
- data/.pull_requests/1380673142 +0 -0
- data/.pull_requests/1380850800 +0 -0
- data/.pull_requests/1381001264 +0 -0
- data/.pull_requests/1381005204 +0 -0
- data/.pull_requests/1381103022 +0 -0
- data/.pull_requests/1381252832 +0 -0
- data/.pull_requests/1381276624 +0 -0
- data/.pull_requests/1381344234 +0 -0
- data/.pull_requests/1381385297 +0 -0
- data/.pull_requests/1381427498 +0 -0
- data/.pull_requests/1381429761 +0 -0
- data/.pull_requests/1381873684 +0 -0
- data/.pull_requests/1382045490 +0 -0
- data/.pull_requests/1382056384 +0 -0
- data/.pull_requests/1382405223 +0 -0
- data/.pull_requests/1382478400 +0 -0
- data/.pull_requests/1382479780 +0 -0
- data/.pull_requests/1382485483 +0 -0
- data/.pull_requests/1382569911 +0 -0
- data/.pull_requests/1382646199 +0 -0
- data/.pull_requests/1382649778 +0 -0
- data/.pull_requests/1382660987 +0 -0
- data/.pull_requests/1382743927 +0 -0
- data/.pull_requests/1382840347 +0 -0
- data/.pull_requests/1383077676 +0 -0
- data/.pull_requests/1383086948 +0 -0
- data/.pull_requests/1383161978 +0 -0
- data/.pull_requests/1383263695 +0 -0
- data/.pull_requests/1383274008 +0 -0
- data/.pull_requests/1383327328 +0 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Guardfile +15 -0
- data/LICENSE.txt +22 -0
- data/README.md +21 -0
- data/Rakefile +2 -0
- data/lib/polytexnic/literal.rb +299 -0
- data/lib/polytexnic/postprocessor.rb +28 -0
- data/lib/polytexnic/postprocessors/html.rb +1139 -0
- data/lib/polytexnic/postprocessors/latex.rb +18 -0
- data/lib/polytexnic/postprocessors/polytex.rb +44 -0
- data/lib/polytexnic/preprocessor.rb +23 -0
- data/lib/polytexnic/preprocessors/html.rb +349 -0
- data/lib/polytexnic/preprocessors/latex.rb +43 -0
- data/lib/polytexnic/preprocessors/polytex.rb +127 -0
- data/lib/polytexnic/utils.rb +176 -0
- data/lib/polytexnic/version.rb +3 -0
- data/lib/polytexnic.rb +92 -0
- data/notes/pandoc.md +41 -0
- data/polytexnic.gemspec +28 -0
- data/polytexnic_commands.sty +5 -0
- data/precompiled_binaries/tralics +0 -0
- data/spec/fixtures/code_listing.tex +14 -0
- data/spec/fixtures/figures.tex +8 -0
- data/spec/fixtures/inline_math.html +4 -0
- data/spec/fixtures/inline_math.tex +3 -0
- data/spec/fixtures/math_environments.html +50 -0
- data/spec/fixtures/math_environments.tex +56 -0
- data/spec/fixtures/section_xrefs.tex +9 -0
- data/spec/fixtures/sidebar.tex +10 -0
- data/spec/fixtures/tables.tex +8 -0
- data/spec/fixtures/verbatim_environments.html +11 -0
- data/spec/fixtures/verbatim_environments.tex +13 -0
- data/spec/integration_spec.rb +34 -0
- data/spec/markdown_to_polytex_spec.rb +192 -0
- data/spec/resemble_matcher_spec.rb +69 -0
- data/spec/spec_helper.rb +38 -0
- data/spec/support/resemble_matcher.rb +100 -0
- data/spec/to_html/asides_spec.rb +42 -0
- data/spec/to_html/chapters_and_sections_spec.rb +268 -0
- data/spec/to_html/characters_and_punctuation_spec.rb +138 -0
- data/spec/to_html/codelistings_spec.rb +70 -0
- data/spec/to_html/core_spec.rb +227 -0
- data/spec/to_html/eqref_spec.rb +32 -0
- data/spec/to_html/footnote_spec.rb +164 -0
- data/spec/to_html/graphics_and_figures_spec.rb +358 -0
- data/spec/to_html/lists_spec.rb +103 -0
- data/spec/to_html/literal_environments/code_spec.rb +141 -0
- data/spec/to_html/literal_environments/math_spec.rb +255 -0
- data/spec/to_html/literal_environments/unicode_spec.rb +12 -0
- data/spec/to_html/literal_environments/verbatim_spec.rb +168 -0
- data/spec/to_html/quotations_and_verse_spec.rb +86 -0
- data/spec/to_html/table_of_contents_spec.rb +93 -0
- data/spec/to_html/table_spec.rb +269 -0
- data/spec/to_html/text_formatting_spec.rb +50 -0
- data/spec/to_latex_spec.rb +197 -0
- data/tasks/bin/ruby_tests +41 -0
- data/tasks/run_tests_with_both_rubies.rake +5 -0
- data/tmp/.gitkeep +0 -0
- metadata +286 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'polytexnic/literal'
|
|
2
|
+
|
|
3
|
+
module Polytexnic
|
|
4
|
+
module Postprocessor
|
|
5
|
+
module Latex
|
|
6
|
+
|
|
7
|
+
# Restores literal environments (verbatim, code, math, etc.).
|
|
8
|
+
def replace_hashes(polytex)
|
|
9
|
+
puts polytex if debug?
|
|
10
|
+
polytex.tap do
|
|
11
|
+
literal_cache.each do |key, value|
|
|
12
|
+
polytex.gsub!(key, escape_backslashes(value))
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# encoding=utf-8
|
|
2
|
+
module Polytexnic
|
|
3
|
+
module Postprocessor
|
|
4
|
+
module Polytex
|
|
5
|
+
|
|
6
|
+
# Removes references to the hypertarget package.
|
|
7
|
+
# TODO: Support hypertarget
|
|
8
|
+
# This isn't a priority, as you get most of what you need
|
|
9
|
+
# with hyperref.
|
|
10
|
+
def remove_hypertarget
|
|
11
|
+
@source.gsub!(/\\hypertarget.*$/, '')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Fixes a kramdown verbatim bug.
|
|
15
|
+
# When converting code, kramdown outputs
|
|
16
|
+
# "\begin{verbatim}foo" instead of
|
|
17
|
+
# "\begin{verbatim}\nfoo".
|
|
18
|
+
def fix_verbatim_bug
|
|
19
|
+
@source.gsub!(/\\begin\{verbatim\}/) { |s| s + "\n" }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Writes the PolyTeX code environments based on the code cache.
|
|
23
|
+
# I.e., code that looks like
|
|
24
|
+
# {lang="ruby"}
|
|
25
|
+
# def foo
|
|
26
|
+
# "bar"
|
|
27
|
+
# end
|
|
28
|
+
# becomes
|
|
29
|
+
# %= lang:ruby
|
|
30
|
+
# \begin{code}
|
|
31
|
+
# def foo
|
|
32
|
+
# "bar"
|
|
33
|
+
# end
|
|
34
|
+
# \end{code}
|
|
35
|
+
# which reduces syntax highlighting to a previously solved problem.
|
|
36
|
+
def write_polytex_code
|
|
37
|
+
code_cache.each do |key, (code, lang, in_codelisting)|
|
|
38
|
+
latex = "%= lang:#{lang}\n\\begin{code}\n#{code}\n\\end{code}"
|
|
39
|
+
@source.gsub!(key, latex)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# encoding=utf-8
|
|
2
|
+
require 'polytexnic/literal'
|
|
3
|
+
require 'polytexnic/preprocessors/html'
|
|
4
|
+
require 'polytexnic/preprocessors/latex'
|
|
5
|
+
require 'polytexnic/preprocessors/polytex'
|
|
6
|
+
|
|
7
|
+
module Polytexnic
|
|
8
|
+
module Preprocessor
|
|
9
|
+
include Literal
|
|
10
|
+
include Html
|
|
11
|
+
include Latex
|
|
12
|
+
include Polytex
|
|
13
|
+
|
|
14
|
+
# Preprocesses the input based on output format.
|
|
15
|
+
def preprocess(format)
|
|
16
|
+
case format
|
|
17
|
+
when :html then to_xml
|
|
18
|
+
when :latex then to_processed_latex
|
|
19
|
+
when :polytex then to_polytex
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# encoding=utf-8
|
|
2
|
+
module Polytexnic
|
|
3
|
+
module Preprocessor
|
|
4
|
+
module Html
|
|
5
|
+
|
|
6
|
+
# Converts HTML to XML.
|
|
7
|
+
# The heart of the process is using Tralics to convert the input PolyTeX
|
|
8
|
+
# to XML. The raw PolyTeX needs to be processed first to make everything
|
|
9
|
+
# go smoothly, but after that the steps to producing the corresponding
|
|
10
|
+
# XML is straightforward.
|
|
11
|
+
def to_xml
|
|
12
|
+
polytex = process_for_tralics(@polytex)
|
|
13
|
+
doc = Nokogiri::XML(tralics_xml(polytex))
|
|
14
|
+
add_document_tag(doc)
|
|
15
|
+
@xml = doc.to_xml
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
# Processes the input PolyTeX for Tralics.
|
|
21
|
+
# The key steps are creating a clean document safe for making global
|
|
22
|
+
# substitutions (gsubs), and then making a bunch of gsubs.
|
|
23
|
+
def process_for_tralics(polytex)
|
|
24
|
+
clean_document(polytex).tap do |output|
|
|
25
|
+
process_spaces(output)
|
|
26
|
+
remove_commands(output)
|
|
27
|
+
hyperrefs(output)
|
|
28
|
+
title_fields(output)
|
|
29
|
+
maketitle(output)
|
|
30
|
+
label_names(output)
|
|
31
|
+
image_names(output)
|
|
32
|
+
restore_eq_labels(output)
|
|
33
|
+
convert_figure_centering(output)
|
|
34
|
+
convert_longtable(output)
|
|
35
|
+
mark_environments(output)
|
|
36
|
+
make_tabular_alignment_cache(output)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Returns a clean document with cached literal environments.
|
|
41
|
+
# This is a key step: we cache literal environments that should be
|
|
42
|
+
# passed through the pipeline with no changes (verbatim, code, etc.).
|
|
43
|
+
# The result is a document that can safely be transformed using
|
|
44
|
+
# global substitutions.
|
|
45
|
+
def clean_document(polytex)
|
|
46
|
+
doc = cache_unicode(cache_literal(add_commands(polytex)))
|
|
47
|
+
inline_verbatim(doc)
|
|
48
|
+
cache_hrefs(doc)
|
|
49
|
+
remove_comments(doc)
|
|
50
|
+
double_backslashes(cache_display_inline_math(doc))
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Prepares spaces to be passed through the pipeline.
|
|
54
|
+
# Handles thin spaces ('\,') and normal spaces ('\ '), as well as
|
|
55
|
+
# end-of-sentence spaces.
|
|
56
|
+
def process_spaces(doc)
|
|
57
|
+
doc.gsub!(/\\,/, xmlelement('thinspace'))
|
|
58
|
+
# Match an end of sentence character, while also recognizing
|
|
59
|
+
# things like (Or otherwise.) and ``Yes, indeed!'' as being the
|
|
60
|
+
# ends of sentences.
|
|
61
|
+
end_of_sentence = '[.?!](?:\)|\'+)?'
|
|
62
|
+
# Handle a forced normal space '\ '.
|
|
63
|
+
doc.gsub!(/(#{end_of_sentence})\\ /) do
|
|
64
|
+
$1 + xmlelement('normalspace')
|
|
65
|
+
end
|
|
66
|
+
not_a_capital = '[^A-Z]'
|
|
67
|
+
# Case of "foo. A"
|
|
68
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})[ ]+([^\s])/) do
|
|
69
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
|
70
|
+
end
|
|
71
|
+
# Case of "foo.\n A"
|
|
72
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n[ ]+([^\s])/) do
|
|
73
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
|
74
|
+
end
|
|
75
|
+
# Case of "foo.\nA"
|
|
76
|
+
doc.gsub!(/(#{not_a_capital})(#{end_of_sentence})\n([^\n])/) do
|
|
77
|
+
$1 + $2 + xmlelement('intersentencespace') + ' ' + $3
|
|
78
|
+
end
|
|
79
|
+
# Handle the manual override to force an inter-sentence space, '\@',
|
|
80
|
+
# as in 'Superman II\@. A new sentence'.
|
|
81
|
+
doc.gsub!(/\\@\. /, '.' + xmlelement('intersentencespace') + ' ')
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Removes commands that might screw up Tralics.
|
|
85
|
+
def remove_commands(doc)
|
|
86
|
+
# Determine if we're using footnote symbols.
|
|
87
|
+
symbols_cmd = '\renewcommand{\thefootnote}{\fnsymbol{footnote}}'
|
|
88
|
+
@footnote_symbols = !!doc.match(/^\s*#{Regexp.escape(symbols_cmd)}/)
|
|
89
|
+
|
|
90
|
+
doc.gsub!(/^\s*\\renewcommand.*$/, '')
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Returns true if we should use footnote symbols in place of numbers.
|
|
94
|
+
def footnote_symbols?
|
|
95
|
+
@footnote_symbols
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Handles \verb environments.
|
|
99
|
+
# LaTeX supports an inline verbatim environment using
|
|
100
|
+
# \verb+<stuff>+
|
|
101
|
+
# The + is arbitrary; any non-letter character is fine as long as it
|
|
102
|
+
# doesn't appear in <stuff>, so this code has exactly the same effect:
|
|
103
|
+
# \verb!<stuff>!
|
|
104
|
+
# \verb@<stuff>@
|
|
105
|
+
# \verb8<stuff>8
|
|
106
|
+
# My preference is to use + or - if available.
|
|
107
|
+
def inline_verbatim(doc)
|
|
108
|
+
doc.gsub!(/\\verb([^A-Za-z])(.*?)\1/) do
|
|
109
|
+
key = digest($2)
|
|
110
|
+
literal_cache[key] = $2
|
|
111
|
+
xmlelement('inlineverbatim') { key }
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Removes commented-out lines.
|
|
116
|
+
def remove_comments(output)
|
|
117
|
+
output.gsub!(/[^\\]%.*$/, '')
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Converts LaTeX double backslashes to HTML breaks.
|
|
121
|
+
def double_backslashes(string)
|
|
122
|
+
lines = []
|
|
123
|
+
in_table = false
|
|
124
|
+
string.split("\n").each do |line|
|
|
125
|
+
in_table ||= (line =~ /^\s*\\begin{(?:tabular|longtable)}/)
|
|
126
|
+
line.gsub!('\\\\', xmlelement('backslashbreak')) unless in_table
|
|
127
|
+
lines << line
|
|
128
|
+
in_table = (in_table && line !~ /^\s*\\end{tabular}/)
|
|
129
|
+
end
|
|
130
|
+
lines.join("\n")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Adds some default commands.
|
|
134
|
+
def add_commands(polytex)
|
|
135
|
+
line(custom_commands) + tralics_commands + polytex
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Pads a string with newlines.
|
|
139
|
+
def line(string)
|
|
140
|
+
"\n#{string}\n"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Handles title fields.
|
|
144
|
+
def title_fields(string)
|
|
145
|
+
%w{title subtitle author date}.each do |field|
|
|
146
|
+
string.gsub! /\\#{field}\{(.*)\}/ do |s|
|
|
147
|
+
maketitle_elements[field] = $1
|
|
148
|
+
''
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Replaces maketitle with an XML element.
|
|
154
|
+
def maketitle(string)
|
|
155
|
+
string.gsub! /\\maketitle/ do |s|
|
|
156
|
+
xmlelement('maketitle')
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Preserves label names.
|
|
161
|
+
# Tralics doesn't keep the names of labels, e.g., 'cha:foobar' in
|
|
162
|
+
# '\label{cha:foobar}'. But Tralics supplies a wide variety of
|
|
163
|
+
# pseudo-LaTeX commands to add arbitrary XML elements to the final
|
|
164
|
+
# document. In this case, the \xbox command does the trick. See
|
|
165
|
+
# http://www-sop.inria.fr/marelle/tralics/doc-x.html
|
|
166
|
+
# for more information.
|
|
167
|
+
def label_names(string)
|
|
168
|
+
string.gsub! /\\label\{(.*?)\}/ do |s|
|
|
169
|
+
label = $1.gsub(':', '-').gsub('_', underscore_digest)
|
|
170
|
+
"#{s}\n\\xbox{data-label}{#{label}}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Handles image names with underscores.
|
|
175
|
+
# This is a terrible kludge, and it's annoying that it's
|
|
176
|
+
# apparently necessary.
|
|
177
|
+
def image_names(string)
|
|
178
|
+
string.gsub! /\\image\{(.*?)\}/ do |s|
|
|
179
|
+
escaped_filename = $1.gsub('_', underscore_digest)
|
|
180
|
+
"\\image{#{escaped_filename}}"
|
|
181
|
+
end
|
|
182
|
+
string.gsub! /\\imagebox\{(.*?)\}/ do |s|
|
|
183
|
+
escaped_filename = $1.gsub('_', underscore_digest)
|
|
184
|
+
"\\imagebox{#{escaped_filename}}"
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Restores the equation labels.
|
|
189
|
+
def restore_eq_labels(output)
|
|
190
|
+
math_label_cache.each do |key, label|
|
|
191
|
+
output.gsub!(key, label)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Handles centering in figures.
|
|
196
|
+
# The way we handle generic \begin{center}...\end{center} doesn't
|
|
197
|
+
# work in figures for some reason. Luckily, the preferred method
|
|
198
|
+
# is to use \centering anyway, so this kludge is actually better LaTeX.
|
|
199
|
+
def convert_figure_centering(output)
|
|
200
|
+
@in_figure = false
|
|
201
|
+
centered = output.split("\n").map do |line|
|
|
202
|
+
if line =~ /^\s*\\begin\{figure\}/
|
|
203
|
+
@in_figure = true
|
|
204
|
+
line
|
|
205
|
+
elsif @in_figure && line =~ /^\s*\\begin\{center\}/
|
|
206
|
+
'\centering'
|
|
207
|
+
elsif @in_figure && line =~ /^\s*\\end\{center\}/
|
|
208
|
+
''
|
|
209
|
+
elsif @in_figure && line =~ /^\s*\\end\{figure\}/
|
|
210
|
+
@in_figure = false
|
|
211
|
+
line
|
|
212
|
+
else
|
|
213
|
+
line
|
|
214
|
+
end
|
|
215
|
+
end.join("\n")
|
|
216
|
+
output.replace(centered)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Converts the longtable environment to simple tabular.
|
|
220
|
+
# This is mainly because kramdown outputs longtables by default,
|
|
221
|
+
# but as a side-effect you can also use longtables in PolyTeX
|
|
222
|
+
# input documents.
|
|
223
|
+
def convert_longtable(output)
|
|
224
|
+
output.gsub!('\begin{longtable}', '\begin{tabular}')
|
|
225
|
+
output.gsub!('\end{longtable}', '\end{tabular}')
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Marks environments with their types.
|
|
229
|
+
# Tralics strips some information when processing LaTeX, such as
|
|
230
|
+
# whether a particular div defines a chapter. We remedy this by
|
|
231
|
+
# using the \AddAttToCurrent pseudo-LaTeX command to mark such
|
|
232
|
+
# environments with their types.
|
|
233
|
+
def mark_environments(string)
|
|
234
|
+
|
|
235
|
+
# Marks chapters with a 'chapter' type.
|
|
236
|
+
# Also handles \chapter*.
|
|
237
|
+
string.gsub! /^\s*\\chapter\*?\{(.*)\}/ do |s|
|
|
238
|
+
"#{s}\n\\AddAttToCurrent{type}{chapter}"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Wrap codelistings in a 'codelisting' element.
|
|
242
|
+
string.gsub! /\\begin{codelisting}/ do |s|
|
|
243
|
+
"\\begin{xmlelement*}{codelisting}\n#{s}"
|
|
244
|
+
end
|
|
245
|
+
string.gsub! /\\end{codelisting}/ do |s|
|
|
246
|
+
"#{s}\n\\end{xmlelement*}"
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Wrap asides in an 'aside' element.
|
|
250
|
+
string.gsub! /\\begin{aside}/ do |s|
|
|
251
|
+
"\\begin{xmlelement*}{aside}\n#{s}"
|
|
252
|
+
end
|
|
253
|
+
string.gsub! /\\end{aside}/ do |s|
|
|
254
|
+
"#{s}\n\\end{xmlelement*}"
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Replace quotations and verse with corresponding XML elements.
|
|
258
|
+
string.gsub! /\\begin{quote}/ do |s|
|
|
259
|
+
quotation = '\AddAttToCurrent{class}{quotation}'
|
|
260
|
+
"\\begin{xmlelement*}{blockquote}\n#{quotation}"
|
|
261
|
+
end
|
|
262
|
+
string.gsub! /\\end{quote}/ do |s|
|
|
263
|
+
"\\end{xmlelement*}"
|
|
264
|
+
end
|
|
265
|
+
string.gsub! /\\begin{verse}/ do |s|
|
|
266
|
+
"\\begin{xmlelement*}{blockquote}\n\\AddAttToCurrent{class}{verse}"
|
|
267
|
+
end
|
|
268
|
+
string.gsub! /\\end{verse}/ do |s|
|
|
269
|
+
"\\end{xmlelement*}"
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Handle \begin{center}...\end{center}
|
|
273
|
+
string.gsub! /\\begin{center}/, '\begin{xmlelement*}{center}'
|
|
274
|
+
string.gsub! /\\end{center}/, '\end{xmlelement*}'
|
|
275
|
+
|
|
276
|
+
# Handle \centering
|
|
277
|
+
string.gsub! /\\centering/, '\AddAttToCurrent{class}{center}'
|
|
278
|
+
|
|
279
|
+
# # Handle \image
|
|
280
|
+
# string.gsub! /\\image/, '\includegraphics'
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Collects alignment information for tabular environments.
|
|
284
|
+
# We suck out all the stuff like 'l|l|lr' in
|
|
285
|
+
# \begin{tabular}{l|l|lr}
|
|
286
|
+
# The reason is that we need to work around a couple of bugs in Tralics.
|
|
287
|
+
# I've tried in vain to figure out WTF is going on in the Tralics
|
|
288
|
+
# source, but it's easy enough in Ruby so I'm throwing it in here.
|
|
289
|
+
def make_tabular_alignment_cache(output)
|
|
290
|
+
alignment_regex = /^\s*\\begin{tabular}{((?:\|*[lcr]+\|*)+)}/
|
|
291
|
+
@tabular_alignment_cache = output.scan(alignment_regex).flatten
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Returns the XML produced by the Tralics program.
|
|
295
|
+
# There is a lot of ugly file manipulation here, but it's fundamentally
|
|
296
|
+
# straightforward. The heart of it is
|
|
297
|
+
#
|
|
298
|
+
# system("#{tralics} -nomathml #{file.path} > log/tralics.log")
|
|
299
|
+
#
|
|
300
|
+
# which writes the converted PolyTeX file as XML, which then gets
|
|
301
|
+
# read in and lightly processed.
|
|
302
|
+
def tralics_xml(polytex)
|
|
303
|
+
file = Tempfile.new(['polytex', '.tex'])
|
|
304
|
+
puts polytex if debug?
|
|
305
|
+
file.write(polytex)
|
|
306
|
+
file.close
|
|
307
|
+
Dir.mkdir 'log' unless File.directory?('log')
|
|
308
|
+
system("#{tralics} -nomathml #{file.path} > log/tralics.log")
|
|
309
|
+
dirname = File.dirname(file.path)
|
|
310
|
+
xml_filename = File.basename(file.path, '.tex') + '.xml'
|
|
311
|
+
raw_xml = File.read(File.join(dirname, xml_filename))
|
|
312
|
+
xml = clean_xml(raw_xml)
|
|
313
|
+
puts xml if debug?
|
|
314
|
+
xml
|
|
315
|
+
ensure
|
|
316
|
+
xmlfile = file.path.sub('.tex', '.xml')
|
|
317
|
+
logfile = file.path.sub('.tex', '.log')
|
|
318
|
+
[xmlfile, logfile].each do |file|
|
|
319
|
+
File.delete(file) if File.exist?(file)
|
|
320
|
+
end
|
|
321
|
+
file.delete
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Wraps the whole document in <document></document>.
|
|
325
|
+
# Fragmentary documents come wrapped in 'unknown' tags.
|
|
326
|
+
# Full documents are wrapped in 'std' tags.
|
|
327
|
+
# Change either to 'document' for consistency.
|
|
328
|
+
def add_document_tag(doc)
|
|
329
|
+
%w[unknown std].each do |parent_tag|
|
|
330
|
+
node = doc.at_css(parent_tag)
|
|
331
|
+
node.name = 'document' unless node.nil?
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def clean_xml(raw_xml)
|
|
336
|
+
nokogiri_ellipsis_workaround(raw_xml)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Fixes a Nokogiri bug.
|
|
340
|
+
# As of this writing, the latest version of Nokogiri (1.5.6) doesn't
|
|
341
|
+
# handle the horizontal ellipsis character '…' correctly in Ruby 2.
|
|
342
|
+
# The kludgy solution is to replace it with '…' in the raw XML,
|
|
343
|
+
# which does work.
|
|
344
|
+
def nokogiri_ellipsis_workaround(raw_xml)
|
|
345
|
+
raw_xml.gsub('…', '…')
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module Polytexnic
|
|
2
|
+
module Preprocessor
|
|
3
|
+
module Latex
|
|
4
|
+
|
|
5
|
+
def to_processed_latex
|
|
6
|
+
@polytex = polish_tables(process_asides(clean_latex_document))
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# Returns LaTeX with hashed versions of literal environments.
|
|
10
|
+
# Literal environments are hashed and passed through the pipeline
|
|
11
|
+
# so that we can process things like refs to hyperrefs using gsubs.
|
|
12
|
+
def clean_latex_document
|
|
13
|
+
cache_literal(@polytex, :latex)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def polish_tables(text)
|
|
17
|
+
text.tap do
|
|
18
|
+
text.gsub!(/^\s*(\\begin\{table\})/) do
|
|
19
|
+
"#{$1}\n\\begin{center}\n\\small\n"
|
|
20
|
+
end
|
|
21
|
+
text.gsub!(/^\s*(\\end\{table\})/) { "\\end{center}\n#{$1}" }
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Processes aside environments.
|
|
26
|
+
# In order to get nice framed & shaded aside boxes, we need to
|
|
27
|
+
# transform the default aside into a new environment.
|
|
28
|
+
def process_asides(text)
|
|
29
|
+
# Transform asides with headings and labels.
|
|
30
|
+
aside_regex = /\\begin{aside}\n\s*
|
|
31
|
+
\\heading{(.*?)}\s*
|
|
32
|
+
\\label{(.*?)}\n
|
|
33
|
+
(.*?)
|
|
34
|
+
\\end{aside}/mx
|
|
35
|
+
text.tap do
|
|
36
|
+
text.gsub!(aside_regex) do
|
|
37
|
+
%(\\begin{shaded_aside}{#{$1}}{#{$2}}\n#{$3}\n\\end{shaded_aside})
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# encoding=utf-8
|
|
2
|
+
module Polytexnic
|
|
3
|
+
module Preprocessor
|
|
4
|
+
module Polytex
|
|
5
|
+
|
|
6
|
+
# Converts Markdown to PolyTeX.
|
|
7
|
+
# We adopt a unified approach: rather than convert "Markdown" (I use
|
|
8
|
+
# the term loosely*) directly to HTML, we convert it to PolyTeX and
|
|
9
|
+
# then run everything through the PolyTeX pipeline. Happily, kramdown
|
|
10
|
+
# comes equipped with a `to_latex` method that does most of the heavy
|
|
11
|
+
# lifting. The ouput isn't as clean as that produced by Pandoc (our
|
|
12
|
+
# previous choice), but it comes with significant advantages: (1) It's
|
|
13
|
+
# written in Ruby, available as a gem, so its use eliminates an external
|
|
14
|
+
# dependency. (2) It's the foundation for the "Markdown" interpreter
|
|
15
|
+
# used by Leanpub, so by using it ourselves we ensure greater
|
|
16
|
+
# compatibility with Leanpub books.
|
|
17
|
+
#
|
|
18
|
+
# * <rant>The number of mutually incompatible markup languages going
|
|
19
|
+
# by the name "Markdown" is truly mind-boggling. Most of them add things
|
|
20
|
+
# to John Gruber's original Markdown language in an ever-expanding
|
|
21
|
+
# attempt to bolt on the functionality needed to write longer documents.
|
|
22
|
+
# At this point, I fear that "Markdown" has become little more than a
|
|
23
|
+
# marketing term.</rant>
|
|
24
|
+
def to_polytex
|
|
25
|
+
require 'Kramdown'
|
|
26
|
+
cleaned_markdown = cache_code_environments
|
|
27
|
+
cleaned_markdown.tap do |markdown|
|
|
28
|
+
convert_code_inclusion(markdown)
|
|
29
|
+
end
|
|
30
|
+
math_cache = cache_math(cleaned_markdown)
|
|
31
|
+
# Override the header ordering, which starts with 'section' by default.
|
|
32
|
+
lh = 'chapter,section,subsection,subsubsection,paragraph,subparagraph'
|
|
33
|
+
kramdown = Kramdown::Document.new(cleaned_markdown, latex_headers: lh)
|
|
34
|
+
@source = restore_inclusion(restore_math(kramdown.to_latex, math_cache))
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def cache_code_environments
|
|
38
|
+
output = []
|
|
39
|
+
lines = @source.split("\n")
|
|
40
|
+
indentation = ' ' * 4
|
|
41
|
+
while (line = lines.shift)
|
|
42
|
+
if line =~ /\{lang="(.*?)"\}/
|
|
43
|
+
language = $1
|
|
44
|
+
code = []
|
|
45
|
+
while (line = lines.shift) && line.match(/^#{indentation}(.*)$/) do
|
|
46
|
+
code << $1
|
|
47
|
+
end
|
|
48
|
+
code = code.join("\n")
|
|
49
|
+
key = digest(code)
|
|
50
|
+
code_cache[key] = [code, language]
|
|
51
|
+
output << key
|
|
52
|
+
output << line
|
|
53
|
+
elsif line =~ /^```\s*$/ # basic code fences
|
|
54
|
+
while (line = lines.shift) && !line.match(/^```\s*$/)
|
|
55
|
+
output << indentation + line
|
|
56
|
+
end
|
|
57
|
+
output << "\n"
|
|
58
|
+
elsif line =~ /^```(\w+)\s*$/ # syntax-highlighted code fences
|
|
59
|
+
language = $1
|
|
60
|
+
code = []
|
|
61
|
+
while (line = lines.shift) && !line.match(/^```\s*$/) do
|
|
62
|
+
code << line
|
|
63
|
+
end
|
|
64
|
+
code = code.join("\n")
|
|
65
|
+
key = digest(code)
|
|
66
|
+
code_cache[key] = [code, language]
|
|
67
|
+
output << key
|
|
68
|
+
else
|
|
69
|
+
output << line
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
output.join("\n")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Caches Leanpub-style math.
|
|
76
|
+
# Leanpub uses the notation {$$}...{/$$} for both inline and block math,
|
|
77
|
+
# with the only difference being the presences of newlines:
|
|
78
|
+
# {$$} x^2 {/$$} % inline
|
|
79
|
+
# and
|
|
80
|
+
# {$$}
|
|
81
|
+
# x^2 % block
|
|
82
|
+
# {/$$}
|
|
83
|
+
# I personally hate this notation and convention, but anyone who really
|
|
84
|
+
# cares should just use PolyTeX instead of Markdown.
|
|
85
|
+
def cache_math(text)
|
|
86
|
+
cache = {}
|
|
87
|
+
text.gsub!(/\{\$\$\}\n(.*?)\n\{\/\$\$\}/) do
|
|
88
|
+
key = digest($1)
|
|
89
|
+
cache[[:block, key]] = $1
|
|
90
|
+
key
|
|
91
|
+
end
|
|
92
|
+
text.gsub!(/\{\$\$\}(.*?)\{\/\$\$\}/) do
|
|
93
|
+
key = digest($1)
|
|
94
|
+
cache[[:inline, key]] = $1
|
|
95
|
+
key
|
|
96
|
+
end
|
|
97
|
+
cache
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Restores the Markdown math.
|
|
101
|
+
# This is easy because we're running everything through our LaTeX
|
|
102
|
+
# pipeline.
|
|
103
|
+
def restore_math(text, cache)
|
|
104
|
+
cache.each do |(kind, key), value|
|
|
105
|
+
case kind
|
|
106
|
+
when :inline
|
|
107
|
+
open = '\('
|
|
108
|
+
close = '\)'
|
|
109
|
+
when :block
|
|
110
|
+
open = '\[' + "\n"
|
|
111
|
+
close = "\n" + '\]'
|
|
112
|
+
end
|
|
113
|
+
text.gsub!(key, open + value + close)
|
|
114
|
+
end
|
|
115
|
+
text
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Adds support for <<(path/to/code) inclusion.
|
|
120
|
+
def convert_code_inclusion(text)
|
|
121
|
+
text.gsub!(/^\s*<<(\(.*?\))/) { "<!-- inclusion= <<#{$1}-->" }
|
|
122
|
+
end
|
|
123
|
+
def restore_inclusion(text)
|
|
124
|
+
text.gsub(/% <!-- inclusion= (.*?)-->/) { "%= #{$1}" }
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|