typohero 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ require 'typohero'
2
+ require 'perftools'
3
+
4
+ text = File.read('bench.txt')
5
+
6
+ PerfTools::CpuProfiler.start('/tmp/profile') do
7
+ 10.times { TypoHero.enhance(text) }
8
+ end
Binary file
@@ -0,0 +1,16 @@
1
+ use strict;
2
+ use LaTeX::Decode::Data;
3
+
4
+ my %WORDMAC = ( %WORDMACROS, %WORDMACROSEXTRA, %PUNCTUATION, %SYMBOLS,
5
+ %GREEK );
6
+
7
+ print "module TypoHero\n LATEX = {\n";
8
+ foreach (keys(%WORDMAC)) {
9
+ my $v = sprintf('\\u%04x', ord($WORDMAC{$_}));
10
+ print " '\\\\$_' => \"$v\",\n";
11
+ }
12
+ foreach (keys(%NEGATEDSYMBOLS)) {
13
+ my $v = sprintf('\\u%04x', ord($NEGATEDSYMBOLS{$_}));
14
+ print " '\\\\not\\\\$_' => \"$v\",\n";
15
+ }
16
+ print " }\nend\n";
@@ -1,106 +1,123 @@
1
- module TypoHero
2
- VERSION = '0.0.1'
1
+ require 'typohero/version'
2
+ require 'typohero/latex'
3
3
 
4
+ module TypoHero
4
5
  extend self
5
6
 
6
7
  EXCLUDED_TAGS = %w(head pre code kbd math script textarea)
7
- EXCLUDED_TAGS_RE = /\A<(\/)?(?:#{EXCLUDED_TAGS.join('|')})/im
8
+ EXCLUDED_TAGS_RE = /\A<(\/)?(?:#{EXCLUDED_TAGS.join('|')})[\p{Space}\/>]/im
8
9
 
9
- TOKENIZER_RE = /<[^>]+>|[^<]+|\\[\(\[\)\]]|\$\$/im
10
+ TOKENIZER_RE = /<[^>]+>|\\[\(\)\[\]]|\$\$|(?:[^<\$\\]|\$(?:[^$]|\Z)|\\(?:[^\(\)\[\]]|\Z))+/im
10
11
 
11
12
  ESCAPE = {
12
13
  '\\\\' => '&#92;',
13
14
  '\"' => '&#34;',
14
15
  "\\\'" => '&#39;',
15
16
  '\.' => '&#46;',
17
+ '\,' => '&#44;',
16
18
  '\-' => '&#45;',
17
- '\`' => '&#96;'
19
+ '\`' => '&#96;',
20
+ '\(' => '&#40',
18
21
  }
19
22
  ESCAPE_RE = Regexp.union(*ESCAPE.keys)
20
23
 
21
- EM_DASH = '&#8212;'
22
- EN_DASH = '&#8211;'
23
- ELLIPSIS = '&#8230;'
24
- LEFT_DQUOTE = '&#8220;'
25
- RIGHT_DQUOTE = '&#8221;'
26
- LEFT_QUOTE = '&#8216;'
27
- RIGHT_QUOTE = '&#8217;'
24
+ NBSP = "\u00a0"
25
+ MDASH = "\u2014"
26
+ NDASH = "\u2013"
27
+ LDQUO = "\u201C"
28
+ RDQUO = "\u201D"
29
+ LSQUO = "\u2018"
30
+ RSQUO = "\u2019"
31
+ BDQUO = "\u201E"
28
32
 
29
33
  SPECIAL = {
30
34
  # enhance!
31
- '---' => EM_DASH,
32
- '--' => EN_DASH,
33
- '...' => ELLIPSIS,
34
- '. . .' => ELLIPSIS,
35
- '``' => LEFT_DQUOTE,
36
- "''" => RIGHT_DQUOTE,
37
- '`' => LEFT_QUOTE,
38
- ',,' => '&#8222;',
39
- '-&gt;' => '&rarr;',
40
- '&lt;-' => '&larr;',
41
- '=&gt;' => '&rArr;',
42
- '&lt;=' => '&lArr;',
43
- '&gt;&gt;' => '&raquo;',
44
- '&lt;&lt;' => '&laquo;',
45
- '(c)' => '&copy;',
46
- '(C)' => '&copy;',
47
- '(r)' => '&reg;',
48
- '(R)' => '&reg;',
49
- '(tm)' => '&trade;',
50
- '(TM)' => '&trade;',
35
+ '---' => MDASH,
36
+ '--' => NDASH,
37
+ '...' => "\u2026",
38
+ '. . .' => "\u2026",
39
+ '``' => LDQUO,
40
+ "''" => RDQUO,
41
+ '`' => LSQUO,
42
+ ',,' => BDQUO,
43
+ '(c)' => "\u00A9",
44
+ '(C)' => "\u00A9",
45
+ '(r)' => "\u00AE",
46
+ '(R)' => "\u00AE",
47
+ '(tm)' => "\u2122",
48
+ '(TM)' => "\u2122",
51
49
  # normalize for further processing
52
- '&ldquo;' => LEFT_DQUOTE,
53
- '&lsquo;' => LEFT_QUOTE,
54
- '&rdquo;' => RIGHT_DQUOTE,
55
- '&rsquo;' => RIGHT_QUOTE,
56
- '&#160;' => '&nbsp',
57
- '&#xA0;' => '&nbsp',
58
- '&#x2013;' => EN_DASH,
59
- '&#x2014;' => EM_DASH,
60
- '&mdash;' => EM_DASH,
61
- '&ndash;' => EN_DASH,
50
+ '&ldquo;' => LDQUO,
51
+ '&#8220;' => LDQUO,
52
+ '&#x201C;' => LDQUO,
53
+ '&rdquo;' => RDQUO,
54
+ '&#8221;' => RDQUO,
55
+ '&#x201D;' => RDQUO,
56
+ '&lsquo;' => LSQUO,
57
+ '&#8216;' => LSQUO,
58
+ '&#x2018;' => LSQUO,
59
+ '&rsquo;' => RSQUO,
60
+ '&#8217;' => RSQUO,
61
+ '&#x2019;' => RSQUO,
62
+ '&#160;' => NBSP,
63
+ '&#xA0;' => NBSP,
64
+ '&nbsp;' => NBSP,
65
+ '&ndash;' => NDASH,
66
+ '&#x2013;' => NDASH,
67
+ '&#8211;' => NDASH,
68
+ '&#x2014;' => MDASH,
69
+ '&mdash;' => MDASH,
70
+ '&#8212;' => MDASH,
62
71
  '&#38;' => '&amp;',
63
72
  '&#x26;' => '&amp;',
64
73
  }
65
74
  SPECIAL_RE = Regexp.union(*SPECIAL.keys)
75
+ LATEX_RE = /(#{Regexp.union *LATEX.keys})(?=\p{Space}|$)/
66
76
 
67
- SPACE_RE = '\s|&nbsp;|&#8201;'
68
- DASH_RE = '&#821[12];'
77
+ DASH_RE = "[#{MDASH}#{NDASH}]"
69
78
  AMP_RE = '&(?:amp;)?'
79
+ LEFT_QUOTE_RE = "[#{LDQUO}#{LSQUO}#{BDQUO}]"
70
80
 
71
- PRIME_RE = /(?<=\d)(''?)(?=#{SPACE_RE}|\d|$)/
81
+ PRIME_RE = /(?<=\d)(''?)(?=\p{Space}|\d|$)/
72
82
  PRIMES = {
73
- "'" => '&prime;',
74
- "''" => '&Prime;',
83
+ "'" => "\u2032",
84
+ "''" => "\u2033",
85
+ "'''" => "\u2034",
75
86
  }
76
- ORDINAL_RE = /(?<=\d)(st|nd|rd|th)(?=#{SPACE_RE}|$)/
87
+ ORDINAL_RE = /(?<=\d)(st|nd|rd|th)(?=\p{Space}|$)/
77
88
 
78
- EM_DASH_SPACE_RE = /\s*(#{EM_DASH})\s*/
79
- EN_DASH_SPACE_RE = /\s*(#{EN_DASH})\s*/
89
+ MDASH_SPACE_RE = /\p{Space}*(#{MDASH})\p{Space}*/
90
+ NDASH_SPACE_RE = /\p{Space}*(#{NDASH})\p{Space}*/
80
91
 
81
- REPLACE_AMP_RE = /(?<=#{SPACE_RE})#{AMP_RE}(?=#{SPACE_RE})/m
92
+ REPLACE_AMP_RE = /(?<=\p{Space})#{AMP_RE}(?=\p{Space})/m
82
93
 
83
- CAPS_BEGIN_RE = "(^|#{SPACE_RE}|#{LEFT_DQUOTE}|#{LEFT_QUOTE})"
84
- CAPS_INNER_RE = "(?:#{AMP_RE}|[A-Z\\d\\.]|#{RIGHT_QUOTE})*" # right quote for posession (e.g. JIMMY'S)
94
+ CAPS_BEGIN_RE = "(^|\\p{Space}|#{LEFT_QUOTE_RE})"
95
+ CAPS_INNER_RE = "(?:#{AMP_RE}|[A-Z\\d\\.]|#{RSQUO})*" # right quote for posession (e.g. JIMMY'S)
85
96
  REPLACE_CAPS_RE = /#{CAPS_BEGIN_RE}([A-Z\d]#{CAPS_INNER_RE}[A-Z]#{CAPS_INNER_RE}|[A-Z]#{CAPS_INNER_RE}[A-Z\d]#{CAPS_INNER_RE})/m
86
97
 
87
98
  PUNCT_CLASS = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
88
- PUNCT_QUOTE_RE = /^['"](?=#{PUNCT_CLASS})\B/m
89
- RIGHT_QUOTE_RE = /(?<!^|#{DASH_RE}|#{SPACE_RE}|[\[\{\(\-])['"]|['"](?=\s|s\b|$)|(?<=#{DASH_RE})['"](?=#{PUNCT_CLASS})/m
99
+ RIGHT_QUOTE_RE = %r{
100
+ ^['"](?=#{PUNCT_CLASS})\B| # Very first character is a closing quote followed by punctuation at a non-word-break
101
+ (?<!^|#{DASH_RE}|\p{Space}|[\[\{\(\-])['"]| # Not after dash, space or opening parentheses
102
+ ['"](?=\p{Space}|$)| # Followed by space or end of line
103
+ 's\b| # Apostrophe
104
+ (?<=#{DASH_RE})['"](?=#{PUNCT_CLASS})| # Dash quote punctuation (e.g. --'!), for quotations
105
+ '(?=(\d\d(?:s|\p{Space}|$))) # Decade abbreviations (the '80s)
106
+ }xm
90
107
 
91
108
  LEFT_QUOTES = {
92
- "'" => LEFT_QUOTE,
93
- '"' => LEFT_DQUOTE,
109
+ "'" => LSQUO,
110
+ '"' => LDQUO,
94
111
  }
95
112
 
96
113
  RIGHT_QUOTES = {
97
- "'" => RIGHT_QUOTE,
98
- '"' => RIGHT_DQUOTE,
114
+ "'" => RSQUO,
115
+ '"' => RDQUO,
99
116
  }
100
117
 
101
118
  TWO_QUOTES = {
102
- '"\'' => LEFT_DQUOTE + LEFT_QUOTE,
103
- '\'"' => LEFT_QUOTE + LEFT_DQUOTE
119
+ '"\'' => LDQUO + LSQUO,
120
+ '\'"' => LSQUO + LDQUO
104
121
  }
105
122
 
106
123
  PARAGRAPH_RE = 'h[1-6]|p|li|dt|dd|div'
@@ -108,11 +125,13 @@ module TypoHero
108
125
 
109
126
  WIDONT_PARAGRAPH_RE = /\A<\/(?:#{PARAGRAPH_RE})>\Z/im
110
127
  WIDONT_INLINE_RE = /\A<\/?(?:#{INLINE_RE})[^>]*>\Z/im
128
+ WIDONT_NBSP_RE = /#{NBSP}|<|>/
111
129
 
112
- INITIAL_QUOTE_RE = /(?=(?:<(?:#{PARAGRAPH_RE})[^>]*>|^)(?:<(?:#{INLINE_RE})[^>]*>|\s)*)&#(8216|8220);/
130
+ INITIAL_QUOTE_RE = /(?=(?:<(?:#{PARAGRAPH_RE})[^>]*>|^)(?:<(?:#{INLINE_RE})[^>]*>|\p{Space})*)#{LEFT_QUOTE_RE}/
113
131
  INITIAL_QUOTES = {
114
- LEFT_QUOTE => "<span class=\"quo\">#{LEFT_QUOTE}</span>",
115
- LEFT_DQUOTE => "<span class=\"dquo\">#{LEFT_DQUOTE}</span>",
132
+ LSQUO => "<span class=\"quo\">#{LSQUO}</span>",
133
+ LDQUO => "<span class=\"dquo\">#{LDQUO}</span>",
134
+ BDQUO => "<span class=\"bdquo\">#{BDQUO}</span>",
116
135
  }
117
136
 
118
137
  def tokenize(input)
@@ -143,6 +162,7 @@ module TypoHero
143
162
  escape(s)
144
163
  primes(s)
145
164
  special(s)
165
+ latex(s)
146
166
  quotes(s, prev_last_char)
147
167
  dash_spaces(s)
148
168
  prev_last_char = last_char
@@ -166,12 +186,12 @@ module TypoHero
166
186
  if tokens[i] =~ WIDONT_PARAGRAPH_RE
167
187
  state = 1
168
188
  elsif tokens[i] !~ WIDONT_INLINE_RE
169
- if tokens[i] =~ /&nbsp;|<|>/
189
+ if tokens[i] =~ WIDONT_NBSP_RE
170
190
  state = 0
171
191
  elsif state == 1 || state == 3
172
- if tokens[i] =~ (state == 1 ? /(\S+)?(\s+)?(\S+\s*)\Z/m : /(\S+)?(\s+)(\S*)\Z/m)
192
+ if tokens[i] =~ (state == 1 ? /(\P{Space}+)?(\p{Space}+)?(\P{Space}+\p{Space}*)\Z/m : /(\P{Space}+)?(\p{Space}+)(\P{Space}*)\Z/m)
173
193
  if $1 && $2
174
- tokens[i].replace "#{$`}#{$1}&nbsp;#{$3}"
194
+ tokens[i].replace "#{$`}#{$1}#{NBSP}#{$3}"
175
195
  state = 0
176
196
  elsif $2
177
197
  state = 2
@@ -180,8 +200,8 @@ module TypoHero
180
200
  state = 3
181
201
  end
182
202
  end
183
- elsif state == 2 && tokens[i] =~ /(\S+\s*)\Z/m
184
- widow.sub!(/\A\s*/, '&nbsp;')
203
+ elsif state == 2 && tokens[i] =~ /(\P{Space}+\p{Space}*)\Z/m
204
+ widow.sub!(/\A\p{Space}*/, NBSP)
185
205
  state = 0
186
206
  end
187
207
  end
@@ -197,9 +217,13 @@ module TypoHero
197
217
  s.gsub!(SPECIAL_RE, SPECIAL)
198
218
  end
199
219
 
220
+ def latex(s)
221
+ s.gsub!(LATEX_RE, LATEX)
222
+ end
223
+
200
224
  def dash_spaces(s)
201
- s.gsub!(EM_DASH_SPACE_RE, '&#8201;\1&#8201;')
202
- s.gsub!(EN_DASH_SPACE_RE, ' \1 ')
225
+ s.gsub!(MDASH_SPACE_RE, "\u2009\\1\u2009")
226
+ s.gsub!(NDASH_SPACE_RE, ' \1 ')
203
227
  end
204
228
 
205
229
  def amp(s)
@@ -225,21 +249,13 @@ module TypoHero
225
249
 
226
250
  def quotes(s, prev_last_char)
227
251
  if s =~ /\A['"]\Z/
228
- s.replace(prev_last_char =~ /\S/ ? RIGHT_QUOTES[s] : LEFT_QUOTES[s])
252
+ s.replace(prev_last_char =~ /\P{Space}/ ? RIGHT_QUOTES[s] : LEFT_QUOTES[s])
229
253
  return
230
254
  end
231
255
 
232
- # Special case if the very first character is a closing
233
- # quote followed by punctuation at a non-word-break
234
- s.gsub!(PUNCT_QUOTE_RE, RIGHT_QUOTES)
235
-
236
256
  # Special case for double sets of quotes, e.g.
237
257
  # <p>He said, "'Quoted' words in a larger quote."</p>
238
258
  s.gsub!(/(?:"'|'")(?=\p{Word})/, TWO_QUOTES)
239
-
240
- # Special case for decade abbreviations (the '80s)
241
- s.gsub!(/'(?=(\d{2}(?:s|\s|$)))/, RIGHT_QUOTES)
242
-
243
259
  s.gsub!(RIGHT_QUOTE_RE, RIGHT_QUOTES)
244
260
  s.gsub!(/['"]/, LEFT_QUOTES)
245
261
  end
@@ -0,0 +1,537 @@
1
+ module TypoHero
2
+ LATEX = {
3
+ '\\textasteriskcentered' => "\u002a",
4
+ '\\nless' => "\u226e",
5
+ '\\textquoteright' => "\u2019",
6
+ '\\subset' => "\u2282",
7
+ '\\guilsinglright' => "\u203a",
8
+ '\\OHORN' => "\u01a0",
9
+ '\\textltailn' => "\u0272",
10
+ '\\bumpeq' => "\u224f",
11
+ '\\textcloserevepsilon' => "\u025e",
12
+ '\\lneqq' => "\u2268",
13
+ '\\prec' => "\u227a",
14
+ '\\gnsim' => "\u22e7",
15
+ '\\forall' => "\u2200",
16
+ '\\textrtaild' => "\u0256",
17
+ '\\textampersand' => "\u0026",
18
+ '\\rightarrow' => "\u2192",
19
+ '\\textbari' => "\u0268",
20
+ '\\surd' => "\u221a",
21
+ '\\sqcup' => "\u2294",
22
+ '\\textdong' => "\u20ab",
23
+ '\\tone5' => "\u02e5",
24
+ '\\textdollar' => "\u0024",
25
+ '\\Epsilon' => "\u0395",
26
+ '\\uparrow' => "\u2191",
27
+ '\\guilsinglleft' => "\u2039",
28
+ '\\dh' => "\u00f0",
29
+ '\\Thorn' => "\u00de",
30
+ '\\chi' => "\u03c7",
31
+ '\\textrhookrevepsilon' => "\u025d",
32
+ '\\oe' => "\u0153",
33
+ '\\AA' => "\u00c5",
34
+ '\\fallingdotseq' => "\u2252",
35
+ '\\textendash' => "\u2013",
36
+ '\\hamza' => "\u02be",
37
+ '\\ayn' => "\u02bf",
38
+ '\\texthtq' => "\u02a0",
39
+ '\\varphi' => "\u03c6",
40
+ '\\unlhd' => "\u22b4",
41
+ '\\textturnscripta' => "\u0252",
42
+ '\\nabla' => "\u2207",
43
+ '\\textnumero' => "\u2116",
44
+ '\\textbackslash' => "\u005c",
45
+ '\\exists' => "\u2203",
46
+ '\\textthornvariii' => "\u00fe",
47
+ '\\otimes' => "\u2297",
48
+ '\\odot' => "\u2299",
49
+ '\\textrtailn' => "\u0273",
50
+ '\\texthtd' => "\u0257",
51
+ '\\kappa' => "\u03ba",
52
+ '\\copyright' => "\u00a9",
53
+ '\\in' => "\u2208",
54
+ '\\Nu' => "\u039d",
55
+ '\\textTstroke' => "\u0166",
56
+ '\\texththeng' => "\u0267",
57
+ '\\lessgtr' => "\u2276",
58
+ '\\lesseqgtr' => "\u22da",
59
+ '\\textkhook' => "\u0199",
60
+ '\\textTbar' => "\u0166",
61
+ '\\doteqdot' => "\u2251",
62
+ '\\textquotedbl' => "\u0022",
63
+ '\\quotedblbase' => "\u201e",
64
+ '\\diamond' => "\u22c4",
65
+ '\\textscn' => "\u0274",
66
+ '\\mid' => "\u2223",
67
+ '\\setminus' => "\u2216",
68
+ '\\sqcap' => "\u2293",
69
+ '\\hv' => "\u0195",
70
+ '\\textEopen' => "\u0190",
71
+ '\\euro' => "\u20ac",
72
+ '\\varrho' => "\u03c1",
73
+ '\\texthbar' => "\u0127",
74
+ '\\doteq' => "\u2250",
75
+ '\\eta' => "\u03b7",
76
+ '\\guillemotright' => "\u00bb",
77
+ '\\rightthreetimes' => "\u22cc",
78
+ '\\textemdash' => "\u2014",
79
+ '\\ngtr' => "\u226f",
80
+ '\\precnsim' => "\u22e8",
81
+ '\\textthornvari' => "\u00fe",
82
+ '\\textasciimacron' => "\u00af",
83
+ '\\iota' => "\u03b9",
84
+ '\\textcolonmonetary' => "\u20a1",
85
+ '\\neq' => "\u2260",
86
+ '\\clubsuit' => "\u2663",
87
+ '\\gamma' => "\u03b3",
88
+ '\\top' => "\u22a4",
89
+ '\\textthreesuperior' => "\u00b3",
90
+ '\\textHbar' => "\u0126",
91
+ '\\curlyeqprec' => "\u22de",
92
+ '\\Alpha' => "\u0391",
93
+ '\\Tau' => "\u03a4",
94
+ '\\bigcup' => "\u22c3",
95
+ '\\textsch' => "\u029c",
96
+ '\\int' => "\u222b",
97
+ '\\textGammaafrican' => "\u0194",
98
+ '\\textquotedblleft' => "\u201c",
99
+ '\\textmu' => "\u00b5",
100
+ '\\texttretroflexhook' => "\u0288",
101
+ '\\Pi' => "\u03a0",
102
+ '\\textkra' => "\u0138",
103
+ '\\Delta' => "\u0394",
104
+ '\\textlonglegr' => "\u027c",
105
+ '\\Rightarrow' => "\u21d2",
106
+ '\\circ' => "\u2218",
107
+ '\\textKhook' => "\u0198",
108
+ '\\unrhd' => "\u22b5",
109
+ '\\Rho' => "\u03a1",
110
+ '\\multimap' => "\u22b8",
111
+ '\\propto' => "\u221d",
112
+ '\\textperiodcentered' => "\u00b7",
113
+ '\\rfloor' => "\u230b",
114
+ '\\vartheta' => "\u03b8",
115
+ '\\textgreater' => "\u003e",
116
+ '\\textcrh' => "\u0127",
117
+ '\\varsigma' => "\u03c2",
118
+ '\\textbar' => "\u007c",
119
+ '\\simeq' => "\u2243",
120
+ '\\textchook' => "\u0188",
121
+ '\\textpertenthousand' => "\u2031",
122
+ '\\textbardotlessj' => "\u025f",
123
+ '\\textonequarter' => "\u00bc",
124
+ '\\texthtp' => "\u01a5",
125
+ '\\flat' => "\u266d",
126
+ '\\texthtscg' => "\u029b",
127
+ '\\therefore' => "\u2234",
128
+ '\\leftarrow' => "\u2190",
129
+ '\\textEsh' => "\u01a9",
130
+ '\\vdots' => "\u22ee",
131
+ '\\textless' => "\u003c",
132
+ '\\texteuro' => "\u20ac",
133
+ '\\Cap' => "\u22d2",
134
+ '\\textgamma' => "\u0263",
135
+ '\\textcloseomega' => "\u0277",
136
+ '\\textDhook' => "\u018a",
137
+ '\\texttbar' => "\u0167",
138
+ '\\sphericalangle' => "\u2222",
139
+ '\\textlogicalnot' => "\u00ac",
140
+ '\\textphi' => "\u0278",
141
+ '\\dj' => "\u0111",
142
+ '\\S' => "\u00a7",
143
+ '\\textiotalatin' => "\u0269",
144
+ '\\textopeno' => "\u0254",
145
+ '\\IJ' => "\u0132",
146
+ '\\textctyogh' => "\u0293",
147
+ '\\nparallel' => "\u2226",
148
+ '\\textlyoghlig' => "\u026e",
149
+ '\\textthorn' => "\u00fe",
150
+ '\\textordfeminine' => "\u00aa",
151
+ '\\ll' => "\u226a",
152
+ '\\texthtg' => "\u0260",
153
+ '\\downarrow' => "\u2193",
154
+ '\\texteopen' => "\u025b",
155
+ '\\ncong' => "\u2247",
156
+ '\\guillemotleft' => "\u00ab",
157
+ '\\textinterrobang' => "\u203d",
158
+ '\\boxdot' => "\u22a1",
159
+ '\\texttstroke' => "\u0167",
160
+ '\\textteshlig' => "\u02a7",
161
+ '\\ESH' => "\u01a9",
162
+ '\\textflorin' => "\u0192",
163
+ '\\langle' => "\u27e8",
164
+ '\\textrevglotstop' => "\u0295",
165
+ '\\textltilde' => "\u026b",
166
+ '\\textiota' => "\u0269",
167
+ '\\ng' => "\u014b",
168
+ '\\textBhook' => "\u0181",
169
+ '\\texttrademark' => "\u2122",
170
+ '\\nsubseteq' => "\u2288",
171
+ '\\ltimes' => "\u22c9",
172
+ '\\textturna' => "\u0250",
173
+ '\\textquotesingle' => "\u0027",
174
+ '\\oplus' => "\u2295",
175
+ '\\varepsilon' => "\u03b5",
176
+ '\\boxplus' => "\u229e",
177
+ '\\textscg' => "\u0262",
178
+ '\\textquotedblright' => "\u201d",
179
+ '\\Beta' => "\u0392",
180
+ '\\textdctzlig' => "\u02a5",
181
+ '\\textctesh' => "\u0286",
182
+ '\\iint' => "\u222c",
183
+ '\\textPhook' => "\u01a4",
184
+ '\\textturnh' => "\u0265",
185
+ '\\textnaira' => "\u20a6",
186
+ '\\pm' => "\u00b1",
187
+ '\\ggg' => "\u22d9",
188
+ '\\texteturned' => "\u01dd",
189
+ '\\textscripta' => "\u0251",
190
+ '\\OE' => "\u0152",
191
+ '\\wedge' => "\u2227",
192
+ '\\ominus' => "\u2296",
193
+ '\\textordmasculine' => "\u00ba",
194
+ '\\nexists' => "\u2204",
195
+ '\\ntrianglerighteq' => "\u22ed",
196
+ '\\textdzlig' => "\u02a3",
197
+ '\\Theta' => "\u0398",
198
+ '\\textturnv' => "\u028c",
199
+ '\\lesssim' => "\u2272",
200
+ '\\Psi' => "\u03a8",
201
+ '\\tone3' => "\u02e7",
202
+ '\\lnsim' => "\u22e6",
203
+ '\\nprec' => "\u2280",
204
+ '\\gtreqless' => "\u22db",
205
+ '\\texttctclig' => "\u02a8",
206
+ '\\bigwedge' => "\u22c0",
207
+ '\\textscriptv' => "\u028b",
208
+ '\\upsilon' => "\u03c5",
209
+ '\\subseteq' => "\u2286",
210
+ '\\lessdot' => "\u22d6",
211
+ '\\bullet' => "\u2219",
212
+ '\\textpm' => "\u00b1",
213
+ '\\textrevepsilon' => "\u025c",
214
+ '\\textYhook' => "\u01b3",
215
+ '\\textphook' => "\u01a5",
216
+ '\\alpha' => "\u03b1",
217
+ '\\textglotstop' => "\u0294",
218
+ '\\mu' => "\u03bc",
219
+ '\\L' => "\u0141",
220
+ '\\set' => "\u2205",
221
+ '\\nobreakspace' => "\u00a0",
222
+ '\\angle' => "\u2220",
223
+ '\\ddag' => "\u2021",
224
+ '\\updownarrow' => "\u2195",
225
+ '\\psi' => "\u03c8",
226
+ '\\asymp' => "\u224d",
227
+ '\\cdot' => "\u22c5",
228
+ '\\supset' => "\u2283",
229
+ '\\nsim' => "\u2241",
230
+ '\\supseteq' => "\u2287",
231
+ '\\ast' => "\u2217",
232
+ '\\lhd' => "\u22b2",
233
+ '\\ss' => "\u00df",
234
+ '\\textasciitilde' => "\u007e",
235
+ '\\textdegree' => "\u00b0",
236
+ '\\textTretroflexhook' => "\u01ae",
237
+ '\\textasciicedilla' => "\u00b8",
238
+ '\\spadesuit' => "\u2660",
239
+ '\\th' => "\u00fe",
240
+ '\\ntrianglelefteq' => "\u22ec",
241
+ '\\textyen' => "\u00a5",
242
+ '\\cup' => "\u222a",
243
+ '\\textoopen' => "\u0254",
244
+ '\\Upsilon' => "\u03a5",
245
+ '\\leftrightarrow' => "\u2194",
246
+ '\\textasciiacute' => "\u00b4",
247
+ '\\textinvscr' => "\u0281",
248
+ '\\textcrd' => "\u0111",
249
+ '\\sqsubset' => "\u228f",
250
+ '\\precsim' => "\u227e",
251
+ '\\textIotaafrican' => "\u0196",
252
+ '\\textonesuperior' => "\u00b9",
253
+ '\\textDafrican' => "\u0189",
254
+ '\\textbarl' => "\u0142",
255
+ '\\between' => "\u226c",
256
+ '\\textThook' => "\u01ac",
257
+ '\\textrtaill' => "\u026d",
258
+ '\\complement' => "\u2201",
259
+ '\\lfloor' => "\u230a",
260
+ '\\succ' => "\u227b",
261
+ '\\textcloseepsilon' => "\u029a",
262
+ '\\Zeta' => "\u0396",
263
+ '\\textcent' => "\u00a2",
264
+ '\\l' => "\u0142",
265
+ '\\texttslig' => "\u02a6",
266
+ '\\gtrless' => "\u2277",
267
+ '\\ij' => "\u0133",
268
+ '\\textoverline' => "\u203e",
269
+ '\\Subset' => "\u22d0",
270
+ '\\gg' => "\u226b",
271
+ '\\DJ' => "\u0110",
272
+ '\\parallel' => "\u2225",
273
+ '\\textthornvarii' => "\u00fe",
274
+ '\\lll' => "\u22d8",
275
+ '\\texthash' => "\u0023",
276
+ '\\leftthreetimes' => "\u22cb",
277
+ '\\textequals' => "\u003d",
278
+ '\\leqq' => "\u2266",
279
+ '\\vdash' => "\u22a2",
280
+ '\\textschwa' => "\u0259",
281
+ '\\rtimes' => "\u22ca",
282
+ '\\delta' => "\u03b4",
283
+ '\\textdiv' => "\u00f7",
284
+ '\\equiv' => "\u2261",
285
+ '\\textdyoghlig' => "\u02a4",
286
+ '\\curlywedge' => "\u22cf",
287
+ '\\textEreversed' => "\u018e",
288
+ '\\sigma' => "\u03c3",
289
+ '\\ohorn' => "\u01a1",
290
+ '\\texthtk' => "\u0199",
291
+ '\\nmid' => "\u2224",
292
+ '\\textOopen' => "\u0186",
293
+ '\\leq' => "\u2264",
294
+ '\\tone2' => "\u02e8",
295
+ '\\diamondsuit' => "\u2662",
296
+ '\\omicron' => "\u03bf",
297
+ '\\textupsilon' => "\u028a",
298
+ '\\AE' => "\u00c6",
299
+ '\\succsim' => "\u227f",
300
+ '\\TH' => "\u00de",
301
+ '\\curlyvee' => "\u22ce",
302
+ '\\Leftrightarrow' => "\u21d4",
303
+ '\\infty' => "\u221e",
304
+ '\\Iota' => "\u0399",
305
+ '\\textbhook' => "\u0253",
306
+ '\\zeta' => "\u03b6",
307
+ '\\NG' => "\u014a",
308
+ '\\textwon' => "\u20a9",
309
+ '\\textscoelig' => "\u0276",
310
+ '\\Xi' => "\u039e",
311
+ '\\bigvee' => "\u22c1",
312
+ '\\UHORN' => "\u01af",
313
+ '\\textturnk' => "\u029e",
314
+ '\\circeq' => "\u2257",
315
+ '\\div' => "\u00f7",
316
+ '\\textturnlonglegr' => "\u027a",
317
+ '\\Box' => "\u25a1",
318
+ '\\textrtailt' => "\u0288",
319
+ '\\textdtail' => "\u0256",
320
+ '\\Phi' => "\u03a6",
321
+ '\\textlooptoprevesh' => "\u01aa",
322
+ '\\textthornvariv' => "\u00fe",
323
+ '\\textcopyright' => "\u00a9",
324
+ '\\approx' => "\u2248",
325
+ '\\subsetneq' => "\u228a",
326
+ '\\textstretchc' => "\u0297",
327
+ '\\textturnt' => "\u0287",
328
+ '\\wr' => "\u2240",
329
+ '\\ae' => "\u00e6",
330
+ '\\textbeltl' => "\u026c",
331
+ '\\dag' => "\u2020",
332
+ '\\o' => "\u00f8",
333
+ '\\notin' => "\u2209",
334
+ '\\Supset' => "\u22d1",
335
+ '\\textscriptg' => "\u0261",
336
+ '\\supsetneq' => "\u228b",
337
+ '\\Cup' => "\u22d3",
338
+ '\\triangleq' => "\u225c",
339
+ '\\textepsilon' => "\u025b",
340
+ '\\textreferencemark' => "\u203b",
341
+ '\\nsupseteq' => "\u2289",
342
+ '\\textquestiondown' => "\u00bf",
343
+ '\\textonehalf' => "\u00bd",
344
+ '\\ntriangleright' => "\u22eb",
345
+ '\\oslash' => "\u2298",
346
+ '\\dotplus' => "\u2214",
347
+ '\\succcurlyeq' => "\u227d",
348
+ '\\textbarrevglotstop' => "\u02a2",
349
+ '\\P' => "\u00b6",
350
+ '\\cdots' => "\u22ef",
351
+ '\\texttesh' => "\u02a7",
352
+ '\\coprod' => "\u2210",
353
+ '\\textsection' => "\u00a7",
354
+ '\\sim' => "\u223c",
355
+ '\\textpercent' => "\u0025",
356
+ '\\pounds' => "\u00a3",
357
+ '\\textturnmrleg' => "\u0270",
358
+ '\\tau' => "\u03c4",
359
+ '\\Lambda' => "\u039b",
360
+ '\\textezh' => "\u0292",
361
+ '\\textctc' => "\u0255",
362
+ '\\textlira' => "\u20a4",
363
+ '\\textthook' => "\u01ad",
364
+ '\\lnot' => "\u00ac",
365
+ '\\gneqq' => "\u2269",
366
+ '\\textsterling' => "\u00a3",
367
+ '\\textpalhookbelow' => "\u01ab",
368
+ '\\tone1' => "\u02e9",
369
+ '\\textregistered' => "\u00ae",
370
+ '\\textbraceright' => "\u007d",
371
+ '\\risingdotseq' => "\u2253",
372
+ '\\textdhook' => "\u0257",
373
+ '\\textbrokenbar' => "\u00a6",
374
+ '\\circledast' => "\u229b",
375
+ '\\textcrb' => "\u0180",
376
+ '\\nsucc' => "\u2281",
377
+ '\\textcurrency' => "\u00a4",
378
+ '\\textminus' => "\u2212",
379
+ '\\gtrdot' => "\u22d7",
380
+ '\\textthreequarters' => "\u00be",
381
+ '\\textbarglotstop' => "\u02a1",
382
+ '\\textrtails' => "\u0282",
383
+ '\\textesh' => "\u0283",
384
+ '\\circledcirc' => "\u229a",
385
+ '\\Sigma' => "\u03a3",
386
+ '\\bowtie' => "\u22c8",
387
+ '\\texttwosuperior' => "\u00b2",
388
+ '\\textrtailz' => "\u0290",
389
+ '\\boxtimes' => "\u22a0",
390
+ '\\rhd' => "\u22b3",
391
+ '\\xi' => "\u03be",
392
+ '\\nu' => "\u03bd",
393
+ '\\textnhookleft' => "\u0272",
394
+ '\\omega' => "\u03c9",
395
+ '\\textcrlambda' => "\u019b",
396
+ '\\ain' => "\u02bf",
397
+ '\\textctz' => "\u0291",
398
+ '\\dots' => "\u2026",
399
+ '\\textturny' => "\u028e",
400
+ '\\textscy' => "\u028f",
401
+ '\\textrtailr' => "\u027d",
402
+ '\\Omega' => "\u03a9",
403
+ '\\textbraceleft' => "\u007b",
404
+ '\\textsci' => "\u026a",
405
+ '\\textasciigrave' => "\u0060",
406
+ '\\rceil' => "\u2309",
407
+ '\\textbullseye' => "\u0298",
408
+ '\\textrhookschwa' => "\u025a",
409
+ '\\textcentereddot' => "\u00b7",
410
+ '\\uplus' => "\u228e",
411
+ '\\tone4' => "\u02e6",
412
+ '\\approxeq' => "\u224a",
413
+ '\\texttimes' => "\u00d7",
414
+ '\\measuredangle' => "\u2221",
415
+ '\\textraisevibyi' => "\u0285",
416
+ '\\Bumpeq' => "\u224e",
417
+ '\\succnsim' => "\u22e9",
418
+ '\\vee' => "\u2228",
419
+ '\\textparagraph' => "\u00b6",
420
+ '\\geq' => "\u2265",
421
+ '\\natural' => "\u266e",
422
+ '\\textturnrrtail' => "\u027b",
423
+ '\\textbaru' => "\u0289",
424
+ '\\textquoteleft' => "\u2018",
425
+ '\\textunderscore' => "\u005f",
426
+ '\\heartsuit' => "\u2661",
427
+ '\\leadsto' => "\u219d",
428
+ '\\lceil' => "\u2308",
429
+ '\\rightleftharpoons' => "\u21cc",
430
+ '\\star' => "\u22c6",
431
+ '\\Kappa' => "\u039a",
432
+ '\\textfishhookr' => "\u027e",
433
+ '\\quotesinglbase' => "\u201a",
434
+ '\\circleddash' => "\u229d",
435
+ '\\textctj' => "\u029d",
436
+ '\\uhorn' => "\u01b0",
437
+ '\\textlhti' => "\u027f",
438
+ '\\textChook' => "\u0187",
439
+ '\\textvhook' => "\u028b",
440
+ '\\veebar' => "\u22bb",
441
+ '\\textturnm' => "\u026f",
442
+ '\\Vvdash' => "\u22aa",
443
+ '\\textg' => "\u0067",
444
+ '\\partial' => "\u2202",
445
+ '\\Vdash' => "\u22a9",
446
+ '\\sqsubseteq' => "\u2291",
447
+ '\\textFhook' => "\u0191",
448
+ '\\textexclamdown' => "\u00a1",
449
+ '\\Eta' => "\u0397",
450
+ '\\textturnr' => "\u0279",
451
+ '\\backsimeq' => "\u22cd",
452
+ '\\Omicron' => "\u039f",
453
+ '\\texthtb' => "\u0253",
454
+ '\\i' => "\u0131",
455
+ '\\textreve' => "\u0258",
456
+ '\\eqcirc' => "\u2256",
457
+ '\\textNhookleft' => "\u019d",
458
+ '\\textscr' => "\u0280",
459
+ '\\because' => "\u2235",
460
+ '\\texthtt' => "\u01ad",
461
+ '\\bot' => "\u22a5",
462
+ '\\ngeq' => "\u2271",
463
+ '\\sharp' => "\u266f",
464
+ '\\aa' => "\u00e5",
465
+ '\\sum' => "\u2211",
466
+ '\\divideontimes' => "\u22c7",
467
+ '\\dashv' => "\u22a3",
468
+ '\\cap' => "\u2229",
469
+ '\\textasciicircum' => "\u005e",
470
+ '\\pitchfork' => "\u22d4",
471
+ '\\textramshorns' => "\u0264",
472
+ '\\lambda' => "\u03bb",
473
+ '\\mp' => "\u2213",
474
+ '\\sqsupset' => "\u2290",
475
+ '\\textbullet' => "\u2022",
476
+ '\\rangle' => "\u27e9",
477
+ '\\DH' => "\u00d0",
478
+ '\\ni' => "\u220b",
479
+ '\\texthth' => "\u0266",
480
+ '\\textscl' => "\u029f",
481
+ '\\textyogh' => "\u0292",
482
+ '\\beta' => "\u03b2",
483
+ '\\textyhook' => "\u01b4",
484
+ '\\textasciidieresis' => "\u00a8",
485
+ '\\ntriangleleft' => "\u22ea",
486
+ '\\curlyeqsucc' => "\u22df",
487
+ '\\prod' => "\u220f",
488
+ '\\Gamma' => "\u0393",
489
+ '\\textbaro' => "\u0275",
490
+ '\\gtrsim' => "\u2273",
491
+ '\\bigcap' => "\u22c2",
492
+ '\\times' => "\u00d7",
493
+ '\\backsim' => "\u223d",
494
+ '\\Mu' => "\u039c",
495
+ '\\texthvlig' => "\u0195",
496
+ '\\textperthousand' => "\u2030",
497
+ '\\oint' => "\u222e",
498
+ '\\nVdash' => "\u22ae",
499
+ '\\Chi' => "\u03a7",
500
+ '\\textltailm' => "\u0271",
501
+ '\\cong' => "\u2245",
502
+ '\\texthtbardotlessj' => "\u0284",
503
+ '\\iiint' => "\u222d",
504
+ '\\textinvglotstop' => "\u0296",
505
+ '\\nleq' => "\u2270",
506
+ '\\preccurlyeq' => "\u227c",
507
+ '\\intercal' => "\u22ba",
508
+ '\\texthtc' => "\u0188",
509
+ '\\textscb' => "\u0299",
510
+ '\\sqsupseteq' => "\u2292",
511
+ '\\textprimstress' => "\u02c8",
512
+ '\\textturnw' => "\u028d",
513
+ '\\textVhook' => "\u01b2",
514
+ '\\boxminus' => "\u229f",
515
+ '\\pi' => "\u03c0",
516
+ '\\geqq' => "\u2267",
517
+ '\\O' => "\u00d8",
518
+ '\\textEzh' => "\u01b7",
519
+ '\\barwedge' => "\u22bc",
520
+ '\\textgammalatinsmall' => "\u0263",
521
+ '\\ddots' => "\u22f1",
522
+ '\\textlengthmark' => "\u02d0",
523
+ '\\not\\equiv' => "\u2262",
524
+ '\\not\\approx' => "\u2249",
525
+ '\\not\\simeq' => "\u2244",
526
+ '\\not\\sqsupseteq' => "\u22e3",
527
+ '\\not\\sqsubseteq' => "\u22e2",
528
+ '\\not\\preccurlyeq' => "\u22e0",
529
+ '\\not\\subset' => "\u2284",
530
+ '\\not\\gtrsim' => "\u2275",
531
+ '\\not\\asymp' => "\u226d",
532
+ '\\not\\lesssim' => "\u2274",
533
+ '\\not\\succcurlyeq' => "\u22e1",
534
+ '\\not\\ni' => "\u220c",
535
+ '\\not\\supset' => "\u2285",
536
+ }
537
+ end