temml 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +44 -0
- package/contrib/auto-render/README.md +89 -0
- package/contrib/auto-render/auto-render.js +128 -0
- package/contrib/auto-render/dist/auto-render.js +217 -0
- package/contrib/auto-render/dist/auto-render.min.js +1 -0
- package/contrib/auto-render/splitAtDelimiters.js +84 -0
- package/contrib/auto-render/test/auto-render-spec.js +234 -0
- package/contrib/auto-render/test/auto-render.js +217 -0
- package/contrib/auto-render/test/test_page.html +59 -0
- package/contrib/mhchem/README.md +26 -0
- package/contrib/mhchem/mhchem.js +1705 -0
- package/contrib/mhchem/mhchem.min.js +1 -0
- package/contrib/physics/README.md +20 -0
- package/contrib/physics/physics.js +131 -0
- package/contrib/texvc/README.md +23 -0
- package/contrib/texvc/texvc.js +61 -0
- package/dist/Temml-Asana.css +201 -0
- package/dist/Temml-Latin-Modern.css +216 -0
- package/dist/Temml-Libertinus.css +214 -0
- package/dist/Temml-Local.css +194 -0
- package/dist/Temml-STIX2.css +203 -0
- package/dist/Temml.woff2 +0 -0
- package/dist/temml.cjs +13122 -0
- package/dist/temml.js +11225 -0
- package/dist/temml.min.js +1 -0
- package/dist/temml.mjs +13120 -0
- package/dist/temmlPostProcess.js +70 -0
- package/package.json +34 -0
- package/src/Lexer.js +121 -0
- package/src/MacroExpander.js +437 -0
- package/src/Namespace.js +107 -0
- package/src/ParseError.js +64 -0
- package/src/Parser.js +977 -0
- package/src/Settings.js +49 -0
- package/src/SourceLocation.js +29 -0
- package/src/Style.js +144 -0
- package/src/Token.js +40 -0
- package/src/buildMathML.js +235 -0
- package/src/constants.js +25 -0
- package/src/defineEnvironment.js +25 -0
- package/src/defineFunction.js +69 -0
- package/src/defineMacro.js +11 -0
- package/src/domTree.js +185 -0
- package/src/environments/array.js +791 -0
- package/src/environments/cd.js +252 -0
- package/src/environments.js +8 -0
- package/src/functions/accent.js +127 -0
- package/src/functions/accentunder.js +38 -0
- package/src/functions/arrow.js +204 -0
- package/src/functions/cancelto.js +36 -0
- package/src/functions/char.js +33 -0
- package/src/functions/color.js +253 -0
- package/src/functions/cr.js +46 -0
- package/src/functions/def.js +259 -0
- package/src/functions/delimsizing.js +304 -0
- package/src/functions/enclose.js +193 -0
- package/src/functions/envTag.js +38 -0
- package/src/functions/environment.js +59 -0
- package/src/functions/font.js +123 -0
- package/src/functions/genfrac.js +333 -0
- package/src/functions/hbox.js +29 -0
- package/src/functions/horizBrace.js +32 -0
- package/src/functions/href.js +90 -0
- package/src/functions/html.js +95 -0
- package/src/functions/includegraphics.js +131 -0
- package/src/functions/kern.js +75 -0
- package/src/functions/label.js +29 -0
- package/src/functions/lap.js +75 -0
- package/src/functions/math.js +40 -0
- package/src/functions/mathchoice.js +41 -0
- package/src/functions/mclass.js +201 -0
- package/src/functions/multiscript.js +91 -0
- package/src/functions/not.js +46 -0
- package/src/functions/op.js +338 -0
- package/src/functions/operatorname.js +139 -0
- package/src/functions/ordgroup.js +9 -0
- package/src/functions/phantom.js +73 -0
- package/src/functions/pmb.js +31 -0
- package/src/functions/raise.js +68 -0
- package/src/functions/ref.js +28 -0
- package/src/functions/relax.js +16 -0
- package/src/functions/rule.js +52 -0
- package/src/functions/sizing.js +64 -0
- package/src/functions/smash.js +66 -0
- package/src/functions/sqrt.js +31 -0
- package/src/functions/styling.js +58 -0
- package/src/functions/supsub.js +135 -0
- package/src/functions/symbolsOp.js +53 -0
- package/src/functions/symbolsOrd.js +102 -0
- package/src/functions/symbolsSpacing.js +53 -0
- package/src/functions/tag.js +8 -0
- package/src/functions/text.js +75 -0
- package/src/functions/tip.js +63 -0
- package/src/functions/toggle.js +13 -0
- package/src/functions/verb.js +33 -0
- package/src/functions.js +57 -0
- package/src/linebreaking.js +159 -0
- package/src/macros.js +708 -0
- package/src/mathMLTree.js +175 -0
- package/src/parseNode.js +42 -0
- package/src/parseTree.js +40 -0
- package/src/postProcess.js +57 -0
- package/src/replace.js +225 -0
- package/src/stretchy.js +66 -0
- package/src/svg.js +110 -0
- package/src/symbols.js +972 -0
- package/src/tree.js +50 -0
- package/src/unicodeAccents.js +16 -0
- package/src/unicodeScripts.js +119 -0
- package/src/unicodeSupOrSub.js +108 -0
- package/src/unicodeSymbolBuilder.js +31 -0
- package/src/unicodeSymbols.js +320 -0
- package/src/units.js +109 -0
- package/src/utils.js +109 -0
- package/src/variant.js +103 -0
- package/temml.js +181 -0
package/src/Parser.js
ADDED
|
@@ -0,0 +1,977 @@
|
|
|
1
|
+
/* eslint no-constant-condition:0 */
|
|
2
|
+
import functions from "./functions";
|
|
3
|
+
import MacroExpander, { implicitCommands } from "./MacroExpander";
|
|
4
|
+
import symbols, { ATOMS } from "./symbols";
|
|
5
|
+
import { validUnit } from "./units";
|
|
6
|
+
import { supportedCodepoint } from "./unicodeScripts";
|
|
7
|
+
import ParseError from "./ParseError";
|
|
8
|
+
import { combiningDiacriticalMarksEndRegex } from "./Lexer";
|
|
9
|
+
import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
|
|
10
|
+
import SourceLocation from "./SourceLocation";
|
|
11
|
+
import { Token } from "./Token";
|
|
12
|
+
|
|
13
|
+
// Pre-evaluate both modules as unicodeSymbols require String.normalize()
|
|
14
|
+
import unicodeAccents from /*preval*/ "./unicodeAccents";
|
|
15
|
+
import unicodeSymbols from /*preval*/ "./unicodeSymbols";
|
|
16
|
+
|
|
17
|
+
const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* This file contains the parser used to parse out a TeX expression from the
|
|
21
|
+
* input. Since TeX isn't context-free, standard parsers don't work particularly
|
|
22
|
+
* well.
|
|
23
|
+
*
|
|
24
|
+
* The strategy of this parser is as such:
|
|
25
|
+
*
|
|
26
|
+
* The main functions (the `.parse...` ones) take a position in the current
|
|
27
|
+
* parse string to parse tokens from. The lexer (found in Lexer.js, stored at
|
|
28
|
+
* this.gullet.lexer) also supports pulling out tokens at arbitrary places. When
|
|
29
|
+
* individual tokens are needed at a position, the lexer is called to pull out a
|
|
30
|
+
* token, which is then used.
|
|
31
|
+
*
|
|
32
|
+
* The parser has a property called "mode" indicating the mode that
|
|
33
|
+
* the parser is currently in. Currently it has to be one of "math" or
|
|
34
|
+
* "text", which denotes whether the current environment is a math-y
|
|
35
|
+
* one or a text-y one (e.g. inside \text). Currently, this serves to
|
|
36
|
+
* limit the functions which can be used in text mode.
|
|
37
|
+
*
|
|
38
|
+
* The main functions then return an object which contains the useful data that
|
|
39
|
+
* was parsed at its given point, and a new position at the end of the parsed
|
|
40
|
+
* data. The main functions can call each other and continue the parsing by
|
|
41
|
+
* using the returned position as a new starting point.
|
|
42
|
+
*
|
|
43
|
+
* There are also extra `.handle...` functions, which pull out some reused
|
|
44
|
+
* functionality into self-contained functions.
|
|
45
|
+
*
|
|
46
|
+
* The functions return ParseNodes.
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
export default class Parser {
|
|
50
|
+
constructor(input, settings, isPreamble = false) {
|
|
51
|
+
// Start in math mode
|
|
52
|
+
this.mode = "math";
|
|
53
|
+
// Create a new macro expander (gullet) and (indirectly via that) also a
|
|
54
|
+
// new lexer (mouth) for this parser (stomach, in the language of TeX)
|
|
55
|
+
this.gullet = new MacroExpander(input, settings, this.mode);
|
|
56
|
+
// Store the settings for use in parsing
|
|
57
|
+
this.settings = settings;
|
|
58
|
+
// Are we defining a preamble?
|
|
59
|
+
this.isPreamble = isPreamble;
|
|
60
|
+
// Count leftright depth (for \middle errors)
|
|
61
|
+
this.leftrightDepth = 0;
|
|
62
|
+
this.prevAtomType = "";
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Checks a result to make sure it has the right type, and throws an
|
|
67
|
+
* appropriate error otherwise.
|
|
68
|
+
*/
|
|
69
|
+
expect(text, consume = true) {
|
|
70
|
+
if (this.fetch().text !== text) {
|
|
71
|
+
throw new ParseError(`Expected '${text}', got '${this.fetch().text}'`, this.fetch());
|
|
72
|
+
}
|
|
73
|
+
if (consume) {
|
|
74
|
+
this.consume();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Discards the current lookahead token, considering it consumed.
|
|
80
|
+
*/
|
|
81
|
+
consume() {
|
|
82
|
+
this.nextToken = null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Return the current lookahead token, or if there isn't one (at the
|
|
87
|
+
* beginning, or if the previous lookahead token was consume()d),
|
|
88
|
+
* fetch the next token as the new lookahead token and return it.
|
|
89
|
+
*/
|
|
90
|
+
fetch() {
|
|
91
|
+
if (this.nextToken == null) {
|
|
92
|
+
this.nextToken = this.gullet.expandNextToken();
|
|
93
|
+
}
|
|
94
|
+
return this.nextToken;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Switches between "text" and "math" modes.
|
|
99
|
+
*/
|
|
100
|
+
switchMode(newMode) {
|
|
101
|
+
this.mode = newMode;
|
|
102
|
+
this.gullet.switchMode(newMode);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Main parsing function, which parses an entire input.
|
|
107
|
+
*/
|
|
108
|
+
parse() {
|
|
109
|
+
// Create a group namespace for every $...$, $$...$$, \[...\].)
|
|
110
|
+
// A \def is then valid only within that pair of delimiters.
|
|
111
|
+
this.gullet.beginGroup();
|
|
112
|
+
|
|
113
|
+
if (this.settings.colorIsTextColor) {
|
|
114
|
+
// Use old \color behavior (same as LaTeX's \textcolor) if requested.
|
|
115
|
+
// We do this within the group for the math expression, so it doesn't
|
|
116
|
+
// pollute settings.macros.
|
|
117
|
+
this.gullet.macros.set("\\color", "\\textcolor");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Try to parse the input
|
|
121
|
+
const parse = this.parseExpression(false);
|
|
122
|
+
|
|
123
|
+
// If we succeeded, make sure there's an EOF at the end
|
|
124
|
+
this.expect("EOF");
|
|
125
|
+
|
|
126
|
+
if (this.isPreamble) {
|
|
127
|
+
const macros = Object.create(null)
|
|
128
|
+
Object.entries(this.gullet.macros.current).forEach(([key, value]) => {
|
|
129
|
+
macros[key] = value
|
|
130
|
+
})
|
|
131
|
+
this.gullet.endGroup();
|
|
132
|
+
return macros
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// The only local macro that we want to save is from \tag.
|
|
136
|
+
const tag = this.gullet.macros.get("\\df@tag")
|
|
137
|
+
|
|
138
|
+
// End the group namespace for the expression
|
|
139
|
+
this.gullet.endGroup();
|
|
140
|
+
|
|
141
|
+
if (tag) { this.gullet.macros.current["\\df@tag"] = tag }
|
|
142
|
+
|
|
143
|
+
return parse;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
static get endOfExpression() {
|
|
147
|
+
return ["}", "\\endgroup", "\\end", "\\right", "\\endtoggle", "&"];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Fully parse a separate sequence of tokens as a separate job.
|
|
152
|
+
* Tokens should be specified in reverse order, as in a MacroDefinition.
|
|
153
|
+
*/
|
|
154
|
+
subparse(tokens) {
|
|
155
|
+
// Save the next token from the current job.
|
|
156
|
+
const oldToken = this.nextToken;
|
|
157
|
+
this.consume();
|
|
158
|
+
|
|
159
|
+
// Run the new job, terminating it with an excess '}'
|
|
160
|
+
this.gullet.pushToken(new Token("}"));
|
|
161
|
+
this.gullet.pushTokens(tokens);
|
|
162
|
+
const parse = this.parseExpression(false);
|
|
163
|
+
this.expect("}");
|
|
164
|
+
|
|
165
|
+
// Restore the next token from the current job.
|
|
166
|
+
this.nextToken = oldToken;
|
|
167
|
+
|
|
168
|
+
return parse;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Parses an "expression", which is a list of atoms.
|
|
173
|
+
*
|
|
174
|
+
* `breakOnInfix`: Should the parsing stop when we hit infix nodes? This
|
|
175
|
+
* happens when functions have higher precendence han infix
|
|
176
|
+
* nodes in implicit parses.
|
|
177
|
+
*
|
|
178
|
+
* `breakOnTokenText`: The text of the token that the expression should end
|
|
179
|
+
* with, or `null` if something else should end the
|
|
180
|
+
* expression.
|
|
181
|
+
*/
|
|
182
|
+
parseExpression(breakOnInfix, breakOnTokenText) {
|
|
183
|
+
const body = [];
|
|
184
|
+
// Keep adding atoms to the body until we can't parse any more atoms (either
|
|
185
|
+
// we reached the end, a }, or a \right)
|
|
186
|
+
while (true) {
|
|
187
|
+
// Ignore spaces in math mode
|
|
188
|
+
if (this.mode === "math") {
|
|
189
|
+
this.consumeSpaces();
|
|
190
|
+
}
|
|
191
|
+
const lex = this.fetch();
|
|
192
|
+
if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
if (breakOnTokenText && lex.text === breakOnTokenText) {
|
|
196
|
+
break;
|
|
197
|
+
}
|
|
198
|
+
if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
const atom = this.parseAtom(breakOnTokenText);
|
|
202
|
+
if (!atom) {
|
|
203
|
+
break;
|
|
204
|
+
} else if (atom.type === "internal") {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
body.push(atom);
|
|
208
|
+
// Keep a record of the atom type, so that op.js can set correct spacing.
|
|
209
|
+
this.prevAtomType = atom.type === "atom" ? atom.family : atom.type;
|
|
210
|
+
}
|
|
211
|
+
if (this.mode === "text") {
|
|
212
|
+
this.formLigatures(body);
|
|
213
|
+
}
|
|
214
|
+
return this.handleInfixNodes(body);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Rewrites infix operators such as \over with corresponding commands such
|
|
219
|
+
* as \frac.
|
|
220
|
+
*
|
|
221
|
+
* There can only be one infix operator per group. If there's more than one
|
|
222
|
+
* then the expression is ambiguous. This can be resolved by adding {}.
|
|
223
|
+
*/
|
|
224
|
+
handleInfixNodes(body) {
|
|
225
|
+
let overIndex = -1;
|
|
226
|
+
let funcName;
|
|
227
|
+
|
|
228
|
+
for (let i = 0; i < body.length; i++) {
|
|
229
|
+
if (body[i].type === "infix") {
|
|
230
|
+
if (overIndex !== -1) {
|
|
231
|
+
throw new ParseError("only one infix operator per group", body[i].token);
|
|
232
|
+
}
|
|
233
|
+
overIndex = i;
|
|
234
|
+
funcName = body[i].replaceWith;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if (overIndex !== -1 && funcName) {
|
|
239
|
+
let numerNode;
|
|
240
|
+
let denomNode;
|
|
241
|
+
|
|
242
|
+
const numerBody = body.slice(0, overIndex);
|
|
243
|
+
const denomBody = body.slice(overIndex + 1);
|
|
244
|
+
|
|
245
|
+
if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
|
|
246
|
+
numerNode = numerBody[0];
|
|
247
|
+
} else {
|
|
248
|
+
numerNode = { type: "ordgroup", mode: this.mode, body: numerBody };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
|
|
252
|
+
denomNode = denomBody[0];
|
|
253
|
+
} else {
|
|
254
|
+
denomNode = { type: "ordgroup", mode: this.mode, body: denomBody };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
let node;
|
|
258
|
+
if (funcName === "\\\\abovefrac") {
|
|
259
|
+
node = this.callFunction(funcName, [numerNode, body[overIndex], denomNode], []);
|
|
260
|
+
} else {
|
|
261
|
+
node = this.callFunction(funcName, [numerNode, denomNode], []);
|
|
262
|
+
}
|
|
263
|
+
return [node];
|
|
264
|
+
} else {
|
|
265
|
+
return body;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Handle a subscript or superscript with nice errors.
|
|
271
|
+
*/
|
|
272
|
+
handleSupSubscript(
|
|
273
|
+
name // For error reporting.
|
|
274
|
+
) {
|
|
275
|
+
const symbolToken = this.fetch();
|
|
276
|
+
const symbol = symbolToken.text;
|
|
277
|
+
this.consume();
|
|
278
|
+
this.consumeSpaces(); // ignore spaces before sup/subscript argument
|
|
279
|
+
const group = this.parseGroup(name);
|
|
280
|
+
|
|
281
|
+
if (!group) {
|
|
282
|
+
throw new ParseError("Expected group after '" + symbol + "'", symbolToken);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return group;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Converts the textual input of an unsupported command into a text node
|
|
290
|
+
* contained within a color node whose color is determined by errorColor
|
|
291
|
+
*/
|
|
292
|
+
formatUnsupportedCmd(text) {
|
|
293
|
+
const textordArray = [];
|
|
294
|
+
|
|
295
|
+
for (let i = 0; i < text.length; i++) {
|
|
296
|
+
textordArray.push({ type: "textord", mode: "text", text: text[i] });
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const textNode = {
|
|
300
|
+
type: "text",
|
|
301
|
+
mode: this.mode,
|
|
302
|
+
body: textordArray
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
const colorNode = {
|
|
306
|
+
type: "color",
|
|
307
|
+
mode: this.mode,
|
|
308
|
+
color: this.settings.errorColor,
|
|
309
|
+
body: [textNode]
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
return colorNode;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Parses a group with optional super/subscripts.
|
|
317
|
+
*/
|
|
318
|
+
parseAtom(breakOnTokenText) {
|
|
319
|
+
// The body of an atom is an implicit group, so that things like
|
|
320
|
+
// \left(x\right)^2 work correctly.
|
|
321
|
+
const base = this.parseGroup("atom", breakOnTokenText);
|
|
322
|
+
|
|
323
|
+
// In text mode, we don't have superscripts or subscripts
|
|
324
|
+
if (this.mode === "text") {
|
|
325
|
+
return base;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Note that base may be empty (i.e. null) at this point.
|
|
329
|
+
|
|
330
|
+
let superscript;
|
|
331
|
+
let subscript;
|
|
332
|
+
while (true) {
|
|
333
|
+
// Guaranteed in math mode, so eat any spaces first.
|
|
334
|
+
this.consumeSpaces();
|
|
335
|
+
|
|
336
|
+
// Lex the first token
|
|
337
|
+
const lex = this.fetch();
|
|
338
|
+
|
|
339
|
+
if (lex.text === "\\limits" || lex.text === "\\nolimits") {
|
|
340
|
+
// We got a limit control
|
|
341
|
+
if (base && base.type === "op") {
|
|
342
|
+
const limits = lex.text === "\\limits";
|
|
343
|
+
base.limits = limits;
|
|
344
|
+
base.alwaysHandleSupSub = true;
|
|
345
|
+
} else if (base && base.type === "operatorname") {
|
|
346
|
+
if (base.alwaysHandleSupSub) {
|
|
347
|
+
base.limits = lex.text === "\\limits"
|
|
348
|
+
}
|
|
349
|
+
} else {
|
|
350
|
+
throw new ParseError("Limit controls must follow a math operator", lex);
|
|
351
|
+
}
|
|
352
|
+
this.consume();
|
|
353
|
+
} else if (lex.text === "^") {
|
|
354
|
+
// We got a superscript start
|
|
355
|
+
if (superscript) {
|
|
356
|
+
throw new ParseError("Double superscript", lex);
|
|
357
|
+
}
|
|
358
|
+
superscript = this.handleSupSubscript("superscript");
|
|
359
|
+
} else if (lex.text === "_") {
|
|
360
|
+
// We got a subscript start
|
|
361
|
+
if (subscript) {
|
|
362
|
+
throw new ParseError("Double subscript", lex);
|
|
363
|
+
}
|
|
364
|
+
subscript = this.handleSupSubscript("subscript");
|
|
365
|
+
} else if (lex.text === "'") {
|
|
366
|
+
// We got a prime
|
|
367
|
+
if (superscript) {
|
|
368
|
+
throw new ParseError("Double superscript", lex);
|
|
369
|
+
}
|
|
370
|
+
const prime = { type: "textord", mode: this.mode, text: "\\prime" };
|
|
371
|
+
|
|
372
|
+
// Many primes can be grouped together, so we handle this here
|
|
373
|
+
const primes = [prime];
|
|
374
|
+
this.consume();
|
|
375
|
+
// Keep lexing tokens until we get something that's not a prime
|
|
376
|
+
while (this.fetch().text === "'") {
|
|
377
|
+
// For each one, add another prime to the list
|
|
378
|
+
primes.push(prime);
|
|
379
|
+
this.consume();
|
|
380
|
+
}
|
|
381
|
+
// If there's a superscript following the primes, combine that
|
|
382
|
+
// superscript in with the primes.
|
|
383
|
+
if (this.fetch().text === "^") {
|
|
384
|
+
primes.push(this.handleSupSubscript("superscript"));
|
|
385
|
+
}
|
|
386
|
+
// Put everything into an ordgroup as the superscript
|
|
387
|
+
superscript = { type: "ordgroup", mode: this.mode, body: primes };
|
|
388
|
+
} else if (uSubsAndSups[lex.text]) {
|
|
389
|
+
// A Unicode subscript or superscript character.
|
|
390
|
+
// We treat these similarly to the unicode-math package.
|
|
391
|
+
// So we render a string of Unicode (sub|super)scripts the
|
|
392
|
+
// same as a (sub|super)script of regular characters.
|
|
393
|
+
const isSub = unicodeSubRegEx.test(lex.text)
|
|
394
|
+
const subsupTokens = [];
|
|
395
|
+
subsupTokens.push(new Token(uSubsAndSups[lex.text]))
|
|
396
|
+
this.consume()
|
|
397
|
+
// Continue fetching tokens to fill out the group.
|
|
398
|
+
while (true) {
|
|
399
|
+
const token = this.fetch().text
|
|
400
|
+
if (!(uSubsAndSups[token])) { break }
|
|
401
|
+
if (unicodeSubRegEx.test(token) !== isSub) { break }
|
|
402
|
+
subsupTokens.unshift(new Token(uSubsAndSups[token]))
|
|
403
|
+
this.consume()
|
|
404
|
+
}
|
|
405
|
+
// Now create a (sub|super)script.
|
|
406
|
+
const body = this.subparse(subsupTokens)
|
|
407
|
+
if (isSub) {
|
|
408
|
+
subscript = { type: "ordgroup", mode: "math", body }
|
|
409
|
+
} else {
|
|
410
|
+
superscript = { type: "ordgroup", mode: "math", body }
|
|
411
|
+
}
|
|
412
|
+
} else {
|
|
413
|
+
// If it wasn't ^, _, a Unicode (sub|super)script, or ', stop parsing super/subscripts
|
|
414
|
+
break;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
if (superscript || subscript) {
|
|
419
|
+
if (base && base.type === "multiscript" && !base.postscripts) {
|
|
420
|
+
// base is the result of a \prescript function.
|
|
421
|
+
// Write the sub- & superscripts into the multiscript element.
|
|
422
|
+
base.postscripts = { sup: superscript, sub: subscript }
|
|
423
|
+
return base
|
|
424
|
+
} else {
|
|
425
|
+
// We got either a superscript or subscript, create a supsub
|
|
426
|
+
return {
|
|
427
|
+
type: "supsub",
|
|
428
|
+
mode: this.mode,
|
|
429
|
+
base: base,
|
|
430
|
+
sup: superscript,
|
|
431
|
+
sub: subscript
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
} else {
|
|
435
|
+
// Otherwise return the original body
|
|
436
|
+
return base;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Parses an entire function, including its base and all of its arguments.
|
|
442
|
+
*/
|
|
443
|
+
parseFunction(
|
|
444
|
+
breakOnTokenText,
|
|
445
|
+
name // For determining its context
|
|
446
|
+
) {
|
|
447
|
+
const token = this.fetch();
|
|
448
|
+
const func = token.text;
|
|
449
|
+
const funcData = functions[func];
|
|
450
|
+
if (!funcData) {
|
|
451
|
+
return null;
|
|
452
|
+
}
|
|
453
|
+
this.consume(); // consume command token
|
|
454
|
+
|
|
455
|
+
if (name && name !== "atom" && !funcData.allowedInArgument) {
|
|
456
|
+
throw new ParseError(
|
|
457
|
+
"Got function '" + func + "' with no arguments" + (name ? " as " + name : ""),
|
|
458
|
+
token
|
|
459
|
+
);
|
|
460
|
+
} else if (this.mode === "text" && !funcData.allowedInText) {
|
|
461
|
+
throw new ParseError("Can't use function '" + func + "' in text mode", token);
|
|
462
|
+
} else if (this.mode === "math" && funcData.allowedInMath === false) {
|
|
463
|
+
throw new ParseError("Can't use function '" + func + "' in math mode", token);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const prevAtomType = this.prevAtomType;
|
|
467
|
+
const { args, optArgs } = this.parseArguments(func, funcData);
|
|
468
|
+
this.prevAtomType = prevAtomType;
|
|
469
|
+
return this.callFunction(func, args, optArgs, token, breakOnTokenText);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/**
|
|
473
|
+
* Call a function handler with a suitable context and arguments.
|
|
474
|
+
*/
|
|
475
|
+
callFunction(name, args, optArgs, token, breakOnTokenText) {
|
|
476
|
+
const context = {
|
|
477
|
+
funcName: name,
|
|
478
|
+
parser: this,
|
|
479
|
+
token,
|
|
480
|
+
breakOnTokenText
|
|
481
|
+
};
|
|
482
|
+
const func = functions[name];
|
|
483
|
+
if (func && func.handler) {
|
|
484
|
+
return func.handler(context, args, optArgs);
|
|
485
|
+
} else {
|
|
486
|
+
throw new ParseError(`No function handler for ${name}`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Parses the arguments of a function or environment
|
|
492
|
+
*/
|
|
493
|
+
parseArguments(
|
|
494
|
+
func, // Should look like "\name" or "\begin{name}".
|
|
495
|
+
funcData
|
|
496
|
+
) {
|
|
497
|
+
const totalArgs = funcData.numArgs + funcData.numOptionalArgs;
|
|
498
|
+
if (totalArgs === 0) {
|
|
499
|
+
return { args: [], optArgs: [] };
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
const args = [];
|
|
503
|
+
const optArgs = [];
|
|
504
|
+
|
|
505
|
+
for (let i = 0; i < totalArgs; i++) {
|
|
506
|
+
let argType = funcData.argTypes && funcData.argTypes[i];
|
|
507
|
+
const isOptional = i < funcData.numOptionalArgs;
|
|
508
|
+
|
|
509
|
+
if (
|
|
510
|
+
(funcData.primitive && argType == null) ||
|
|
511
|
+
// \sqrt expands into primitive if optional argument doesn't exist
|
|
512
|
+
(funcData.type === "sqrt" && i === 1 && optArgs[0] == null)
|
|
513
|
+
) {
|
|
514
|
+
argType = "primitive";
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
const arg = this.parseGroupOfType(`argument to '${func}'`, argType, isOptional);
|
|
518
|
+
if (isOptional) {
|
|
519
|
+
optArgs.push(arg);
|
|
520
|
+
} else if (arg != null) {
|
|
521
|
+
args.push(arg);
|
|
522
|
+
} else {
|
|
523
|
+
// should be unreachable
|
|
524
|
+
throw new ParseError("Null argument, please report this as a bug");
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
return { args, optArgs };
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Parses a group when the mode is changing.
|
|
533
|
+
*/
|
|
534
|
+
parseGroupOfType(name, type, optional) {
|
|
535
|
+
switch (type) {
|
|
536
|
+
case "size":
|
|
537
|
+
return this.parseSizeGroup(optional);
|
|
538
|
+
case "url":
|
|
539
|
+
return this.parseUrlGroup(optional);
|
|
540
|
+
case "math":
|
|
541
|
+
case "text":
|
|
542
|
+
return this.parseArgumentGroup(optional, type);
|
|
543
|
+
case "hbox": {
|
|
544
|
+
// hbox argument type wraps the argument in the equivalent of
|
|
545
|
+
// \hbox, which is like \text but switching to \textstyle size.
|
|
546
|
+
const group = this.parseArgumentGroup(optional, "text");
|
|
547
|
+
return group != null
|
|
548
|
+
? {
|
|
549
|
+
type: "styling",
|
|
550
|
+
mode: group.mode,
|
|
551
|
+
body: [group],
|
|
552
|
+
scriptLevel: "text" // simulate \textstyle
|
|
553
|
+
}
|
|
554
|
+
: null;
|
|
555
|
+
}
|
|
556
|
+
case "raw": {
|
|
557
|
+
const token = this.parseStringGroup("raw", optional);
|
|
558
|
+
return token != null
|
|
559
|
+
? {
|
|
560
|
+
type: "raw",
|
|
561
|
+
mode: "text",
|
|
562
|
+
string: token.text
|
|
563
|
+
}
|
|
564
|
+
: null;
|
|
565
|
+
}
|
|
566
|
+
case "primitive": {
|
|
567
|
+
if (optional) {
|
|
568
|
+
throw new ParseError("A primitive argument cannot be optional");
|
|
569
|
+
}
|
|
570
|
+
const group = this.parseGroup(name);
|
|
571
|
+
if (group == null) {
|
|
572
|
+
throw new ParseError("Expected group as " + name, this.fetch());
|
|
573
|
+
}
|
|
574
|
+
return group;
|
|
575
|
+
}
|
|
576
|
+
case "original":
|
|
577
|
+
case null:
|
|
578
|
+
case undefined:
|
|
579
|
+
return this.parseArgumentGroup(optional);
|
|
580
|
+
default:
|
|
581
|
+
throw new ParseError("Unknown group type as " + name, this.fetch());
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Discard any space tokens, fetching the next non-space token.
|
|
587
|
+
*/
|
|
588
|
+
consumeSpaces() {
|
|
589
|
+
while (true) {
|
|
590
|
+
const ch = this.fetch().text
|
|
591
|
+
// \ufe0e is the Unicode variation selector to supress emoji. Ignore it.
|
|
592
|
+
if (ch === " " || ch === "\ufe0e") {
|
|
593
|
+
this.consume()
|
|
594
|
+
} else {
|
|
595
|
+
break
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Parses a group, essentially returning the string formed by the
|
|
602
|
+
* brace-enclosed tokens plus some position information.
|
|
603
|
+
*/
|
|
604
|
+
parseStringGroup(
|
|
605
|
+
modeName, // Used to describe the mode in error messages.
|
|
606
|
+
optional
|
|
607
|
+
) {
|
|
608
|
+
const argToken = this.gullet.scanArgument(optional);
|
|
609
|
+
if (argToken == null) {
|
|
610
|
+
return null;
|
|
611
|
+
}
|
|
612
|
+
let str = "";
|
|
613
|
+
let nextToken;
|
|
614
|
+
while ((nextToken = this.fetch()).text !== "EOF") {
|
|
615
|
+
str += nextToken.text;
|
|
616
|
+
this.consume();
|
|
617
|
+
}
|
|
618
|
+
this.consume(); // consume the end of the argument
|
|
619
|
+
argToken.text = str;
|
|
620
|
+
return argToken;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Parses a regex-delimited group: the largest sequence of tokens
|
|
625
|
+
* whose concatenated strings match `regex`. Returns the string
|
|
626
|
+
* formed by the tokens plus some position information.
|
|
627
|
+
*/
|
|
628
|
+
parseRegexGroup(
|
|
629
|
+
regex,
|
|
630
|
+
modeName // Used to describe the mode in error messages.
|
|
631
|
+
) {
|
|
632
|
+
const firstToken = this.fetch();
|
|
633
|
+
let lastToken = firstToken;
|
|
634
|
+
let str = "";
|
|
635
|
+
let nextToken;
|
|
636
|
+
while ((nextToken = this.fetch()).text !== "EOF" && regex.test(str + nextToken.text)) {
|
|
637
|
+
lastToken = nextToken;
|
|
638
|
+
str += lastToken.text;
|
|
639
|
+
this.consume();
|
|
640
|
+
}
|
|
641
|
+
if (str === "") {
|
|
642
|
+
throw new ParseError("Invalid " + modeName + ": '" + firstToken.text + "'", firstToken);
|
|
643
|
+
}
|
|
644
|
+
return firstToken.range(lastToken, str);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
/**
|
|
648
|
+
* Parses a size specification, consisting of magnitude and unit.
|
|
649
|
+
*/
|
|
650
|
+
parseSizeGroup(optional) {
|
|
651
|
+
let res;
|
|
652
|
+
let isBlank = false;
|
|
653
|
+
// don't expand before parseStringGroup
|
|
654
|
+
this.gullet.consumeSpaces();
|
|
655
|
+
if (!optional && this.gullet.future().text !== "{") {
|
|
656
|
+
res = this.parseRegexGroup(/^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/, "size");
|
|
657
|
+
} else {
|
|
658
|
+
res = this.parseStringGroup("size", optional);
|
|
659
|
+
}
|
|
660
|
+
if (!res) {
|
|
661
|
+
return null;
|
|
662
|
+
}
|
|
663
|
+
if (!optional && res.text.length === 0) {
|
|
664
|
+
// Because we've tested for what is !optional, this block won't
|
|
665
|
+
// affect \kern, \hspace, etc. It will capture the mandatory arguments
|
|
666
|
+
// to \genfrac and \above.
|
|
667
|
+
res.text = "0pt"; // Enable \above{}
|
|
668
|
+
isBlank = true; // This is here specifically for \genfrac
|
|
669
|
+
}
|
|
670
|
+
const match = /([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/.exec(res.text);
|
|
671
|
+
if (!match) {
|
|
672
|
+
throw new ParseError("Invalid size: '" + res.text + "'", res);
|
|
673
|
+
}
|
|
674
|
+
const data = {
|
|
675
|
+
number: +(match[1] + match[2]), // sign + magnitude, cast to number
|
|
676
|
+
unit: match[3]
|
|
677
|
+
};
|
|
678
|
+
if (!validUnit(data)) {
|
|
679
|
+
throw new ParseError("Invalid unit: '" + data.unit + "'", res);
|
|
680
|
+
}
|
|
681
|
+
return {
|
|
682
|
+
type: "size",
|
|
683
|
+
mode: this.mode,
|
|
684
|
+
value: data,
|
|
685
|
+
isBlank
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* Parses an URL, checking escaped letters and allowed protocols,
|
|
691
|
+
* and setting the catcode of % as an active character (as in \hyperref).
|
|
692
|
+
*/
|
|
693
|
+
parseUrlGroup(optional) {
|
|
694
|
+
this.gullet.lexer.setCatcode("%", 13); // active character
|
|
695
|
+
this.gullet.lexer.setCatcode("~", 12); // other character
|
|
696
|
+
const res = this.parseStringGroup("url", optional);
|
|
697
|
+
this.gullet.lexer.setCatcode("%", 14); // comment character
|
|
698
|
+
this.gullet.lexer.setCatcode("~", 13); // active character
|
|
699
|
+
if (res == null) {
|
|
700
|
+
return null;
|
|
701
|
+
}
|
|
702
|
+
// hyperref package allows backslashes alone in href, but doesn't
|
|
703
|
+
// generate valid links in such cases; we interpret this as
|
|
704
|
+
// "undefined" behaviour, and keep them as-is. Some browser will
|
|
705
|
+
// replace backslashes with forward slashes.
|
|
706
|
+
let url = res.text.replace(/\\([#$%&~_^{}])/g, "$1");
|
|
707
|
+
url = res.text.replace(/{\u2044}/g, "/");
|
|
708
|
+
return {
|
|
709
|
+
type: "url",
|
|
710
|
+
mode: this.mode,
|
|
711
|
+
url
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Parses an argument with the mode specified.
|
|
717
|
+
*/
|
|
718
|
+
parseArgumentGroup(optional, mode) {
|
|
719
|
+
const argToken = this.gullet.scanArgument(optional);
|
|
720
|
+
if (argToken == null) {
|
|
721
|
+
return null;
|
|
722
|
+
}
|
|
723
|
+
const outerMode = this.mode;
|
|
724
|
+
if (mode) {
|
|
725
|
+
// Switch to specified mode
|
|
726
|
+
this.switchMode(mode);
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
this.gullet.beginGroup();
|
|
730
|
+
const expression = this.parseExpression(false, "EOF");
|
|
731
|
+
// TODO: find an alternative way to denote the end
|
|
732
|
+
this.expect("EOF"); // expect the end of the argument
|
|
733
|
+
this.gullet.endGroup();
|
|
734
|
+
const result = {
|
|
735
|
+
type: "ordgroup",
|
|
736
|
+
mode: this.mode,
|
|
737
|
+
loc: argToken.loc,
|
|
738
|
+
body: expression
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
if (mode) {
|
|
742
|
+
// Switch mode back
|
|
743
|
+
this.switchMode(outerMode);
|
|
744
|
+
}
|
|
745
|
+
return result;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
/**
|
|
749
|
+
* Parses an ordinary group, which is either a single nucleus (like "x")
|
|
750
|
+
* or an expression in braces (like "{x+y}") or an implicit group, a group
|
|
751
|
+
* that starts at the current position, and ends right before a higher explicit
|
|
752
|
+
* group ends, or at EOF.
|
|
753
|
+
*/
|
|
754
|
+
parseGroup(
|
|
755
|
+
name, // For error reporting.
|
|
756
|
+
breakOnTokenText
|
|
757
|
+
) {
|
|
758
|
+
const firstToken = this.fetch();
|
|
759
|
+
const text = firstToken.text;
|
|
760
|
+
|
|
761
|
+
let result;
|
|
762
|
+
// Try to parse an open brace or \begingroup
|
|
763
|
+
if (text === "{" || text === "\\begingroup" || text === "\\toggle") {
|
|
764
|
+
this.consume();
|
|
765
|
+
const groupEnd = text === "{"
|
|
766
|
+
? "}"
|
|
767
|
+
: text === "\\begingroup"
|
|
768
|
+
? "\\endgroup"
|
|
769
|
+
: "\\endtoggle"
|
|
770
|
+
|
|
771
|
+
this.gullet.beginGroup();
|
|
772
|
+
// If we get a brace, parse an expression
|
|
773
|
+
const expression = this.parseExpression(false, groupEnd);
|
|
774
|
+
const lastToken = this.fetch();
|
|
775
|
+
this.expect(groupEnd); // Check that we got a matching closing brace
|
|
776
|
+
this.gullet.endGroup();
|
|
777
|
+
result = {
|
|
778
|
+
type: (lastToken.text === "\\endtoggle" ? "toggle" : "ordgroup"),
|
|
779
|
+
mode: this.mode,
|
|
780
|
+
loc: SourceLocation.range(firstToken, lastToken),
|
|
781
|
+
body: expression,
|
|
782
|
+
// A group formed by \begingroup...\endgroup is a semi-simple group
|
|
783
|
+
// which doesn't affect spacing in math mode, i.e., is transparent.
|
|
784
|
+
// https://tex.stackexchange.com/questions/1930/when-should-one-
|
|
785
|
+
// use-begingroup-instead-of-bgroup
|
|
786
|
+
semisimple: text === "\\begingroup" || undefined
|
|
787
|
+
};
|
|
788
|
+
} else {
|
|
789
|
+
// If there exists a function with this name, parse the function.
|
|
790
|
+
// Otherwise, just return a nucleus
|
|
791
|
+
result = this.parseFunction(breakOnTokenText, name) || this.parseSymbol();
|
|
792
|
+
if (result == null && text[0] === "\\" &&
|
|
793
|
+
!Object.prototype.hasOwnProperty.call(implicitCommands, text )) {
|
|
794
|
+
result = this.formatUnsupportedCmd(text);
|
|
795
|
+
this.consume();
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
return result;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
/**
|
|
802
|
+
* Form ligature-like combinations of characters for text mode.
|
|
803
|
+
* This includes inputs like "--", "---", "``" and "''".
|
|
804
|
+
* The result will simply replace multiple textord nodes with a single
|
|
805
|
+
* character in each value by a single textord node having multiple
|
|
806
|
+
* characters in its value. The representation is still ASCII source.
|
|
807
|
+
* The group will be modified in place.
|
|
808
|
+
*/
|
|
809
|
+
formLigatures(group) {
|
|
810
|
+
let n = group.length - 1;
|
|
811
|
+
for (let i = 0; i < n; ++i) {
|
|
812
|
+
const a = group[i];
|
|
813
|
+
const v = a.text;
|
|
814
|
+
if (v === "-" && group[i + 1].text === "-") {
|
|
815
|
+
if (i + 1 < n && group[i + 2].text === "-") {
|
|
816
|
+
group.splice(i, 3, {
|
|
817
|
+
type: "textord",
|
|
818
|
+
mode: "text",
|
|
819
|
+
loc: SourceLocation.range(a, group[i + 2]),
|
|
820
|
+
text: "---"
|
|
821
|
+
});
|
|
822
|
+
n -= 2;
|
|
823
|
+
} else {
|
|
824
|
+
group.splice(i, 2, {
|
|
825
|
+
type: "textord",
|
|
826
|
+
mode: "text",
|
|
827
|
+
loc: SourceLocation.range(a, group[i + 1]),
|
|
828
|
+
text: "--"
|
|
829
|
+
});
|
|
830
|
+
n -= 1;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
if ((v === "'" || v === "`") && group[i + 1].text === v) {
|
|
834
|
+
group.splice(i, 2, {
|
|
835
|
+
type: "textord",
|
|
836
|
+
mode: "text",
|
|
837
|
+
loc: SourceLocation.range(a, group[i + 1]),
|
|
838
|
+
text: v + v
|
|
839
|
+
});
|
|
840
|
+
n -= 1;
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Parse a single symbol out of the string. Here, we handle single character
|
|
847
|
+
* symbols and special functions like \verb.
|
|
848
|
+
*/
|
|
849
|
+
parseSymbol() {
|
|
850
|
+
const nucleus = this.fetch();
|
|
851
|
+
let text = nucleus.text;
|
|
852
|
+
|
|
853
|
+
if (/^\\verb[^a-zA-Z]/.test(text)) {
|
|
854
|
+
this.consume();
|
|
855
|
+
let arg = text.slice(5);
|
|
856
|
+
const star = arg.charAt(0) === "*";
|
|
857
|
+
if (star) {
|
|
858
|
+
arg = arg.slice(1);
|
|
859
|
+
}
|
|
860
|
+
// Lexer's tokenRegex is constructed to always have matching
|
|
861
|
+
// first/last characters.
|
|
862
|
+
if (arg.length < 2 || arg.charAt(0) !== arg.slice(-1)) {
|
|
863
|
+
throw new ParseError(`\\verb assertion failed --
|
|
864
|
+
please report what input caused this bug`);
|
|
865
|
+
}
|
|
866
|
+
arg = arg.slice(1, -1); // remove first and last char
|
|
867
|
+
return {
|
|
868
|
+
type: "verb",
|
|
869
|
+
mode: "text",
|
|
870
|
+
body: arg,
|
|
871
|
+
star
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
// At this point, we should have a symbol, possibly with accents.
|
|
875
|
+
// First expand any accented base symbol according to unicodeSymbols.
|
|
876
|
+
if (Object.prototype.hasOwnProperty.call(unicodeSymbols, text[0]) &&
|
|
877
|
+
!symbols[this.mode][text[0]]) {
|
|
878
|
+
// This behavior is not strict (XeTeX-compatible) in math mode.
|
|
879
|
+
if (this.settings.strict && this.mode === "math") {
|
|
880
|
+
throw new ParseError(`Accented Unicode text character "${text[0]}" used in ` + `math mode`,
|
|
881
|
+
nucleus
|
|
882
|
+
);
|
|
883
|
+
}
|
|
884
|
+
text = unicodeSymbols[text[0]] + text.slice(1);
|
|
885
|
+
}
|
|
886
|
+
// Strip off any combining characters
|
|
887
|
+
const match = combiningDiacriticalMarksEndRegex.exec(text);
|
|
888
|
+
if (match) {
|
|
889
|
+
text = text.substring(0, match.index);
|
|
890
|
+
if (text === "i") {
|
|
891
|
+
text = "\u0131"; // dotless i, in math and text mode
|
|
892
|
+
} else if (text === "j") {
|
|
893
|
+
text = "\u0237"; // dotless j, in math and text mode
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
// Recognize base symbol
|
|
897
|
+
let symbol;
|
|
898
|
+
if (symbols[this.mode][text]) {
|
|
899
|
+
const group = symbols[this.mode][text].group;
|
|
900
|
+
const loc = SourceLocation.range(nucleus);
|
|
901
|
+
let s;
|
|
902
|
+
if (Object.prototype.hasOwnProperty.call(ATOMS, group )) {
|
|
903
|
+
const family = group;
|
|
904
|
+
s = {
|
|
905
|
+
type: "atom",
|
|
906
|
+
mode: this.mode,
|
|
907
|
+
family,
|
|
908
|
+
loc,
|
|
909
|
+
text
|
|
910
|
+
};
|
|
911
|
+
} else {
|
|
912
|
+
s = {
|
|
913
|
+
type: group,
|
|
914
|
+
mode: this.mode,
|
|
915
|
+
loc,
|
|
916
|
+
text
|
|
917
|
+
};
|
|
918
|
+
}
|
|
919
|
+
symbol = s;
|
|
920
|
+
} else if (!this.strict && numberRegEx.test(text)) {
|
|
921
|
+
// A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
|
|
922
|
+
this.consume()
|
|
923
|
+
return {
|
|
924
|
+
type: "textord",
|
|
925
|
+
mode: this.mode,
|
|
926
|
+
loc: SourceLocation.range(nucleus),
|
|
927
|
+
text
|
|
928
|
+
}
|
|
929
|
+
} else if (text.charCodeAt(0) >= 0x80) {
|
|
930
|
+
// no symbol for e.g. ^
|
|
931
|
+
if (this.settings.strict) {
|
|
932
|
+
if (!supportedCodepoint(text.charCodeAt(0))) {
|
|
933
|
+
throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
|
|
934
|
+
` (${text.charCodeAt(0)})`, nucleus);
|
|
935
|
+
} else if (this.mode === "math") {
|
|
936
|
+
throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
// All nonmathematical Unicode characters are rendered as if they
|
|
940
|
+
// are in text mode (wrapped in \text) because that's what it
|
|
941
|
+
// takes to render them in LaTeX.
|
|
942
|
+
symbol = {
|
|
943
|
+
type: "textord",
|
|
944
|
+
mode: "text",
|
|
945
|
+
loc: SourceLocation.range(nucleus),
|
|
946
|
+
text
|
|
947
|
+
};
|
|
948
|
+
} else {
|
|
949
|
+
return null; // EOF, ^, _, {, }, etc.
|
|
950
|
+
}
|
|
951
|
+
this.consume();
|
|
952
|
+
// Transform combining characters into accents
|
|
953
|
+
if (match) {
|
|
954
|
+
for (let i = 0; i < match[0].length; i++) {
|
|
955
|
+
const accent = match[0][i];
|
|
956
|
+
if (!unicodeAccents[accent]) {
|
|
957
|
+
throw new ParseError(`Unknown accent ' ${accent}'`, nucleus);
|
|
958
|
+
}
|
|
959
|
+
const command = unicodeAccents[accent][this.mode] ||
|
|
960
|
+
unicodeAccents[accent].text;
|
|
961
|
+
if (!command) {
|
|
962
|
+
throw new ParseError(`Accent ${accent} unsupported in ${this.mode} mode`, nucleus);
|
|
963
|
+
}
|
|
964
|
+
symbol = {
|
|
965
|
+
type: "accent",
|
|
966
|
+
mode: this.mode,
|
|
967
|
+
loc: SourceLocation.range(nucleus),
|
|
968
|
+
label: command,
|
|
969
|
+
isStretchy: false,
|
|
970
|
+
isShifty: true,
|
|
971
|
+
base: symbol
|
|
972
|
+
};
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return symbol;
|
|
976
|
+
}
|
|
977
|
+
}
|