temml 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +44 -0
  3. package/contrib/auto-render/README.md +89 -0
  4. package/contrib/auto-render/auto-render.js +128 -0
  5. package/contrib/auto-render/dist/auto-render.js +217 -0
  6. package/contrib/auto-render/dist/auto-render.min.js +1 -0
  7. package/contrib/auto-render/splitAtDelimiters.js +84 -0
  8. package/contrib/auto-render/test/auto-render-spec.js +234 -0
  9. package/contrib/auto-render/test/auto-render.js +217 -0
  10. package/contrib/auto-render/test/test_page.html +59 -0
  11. package/contrib/mhchem/README.md +26 -0
  12. package/contrib/mhchem/mhchem.js +1705 -0
  13. package/contrib/mhchem/mhchem.min.js +1 -0
  14. package/contrib/physics/README.md +20 -0
  15. package/contrib/physics/physics.js +131 -0
  16. package/contrib/texvc/README.md +23 -0
  17. package/contrib/texvc/texvc.js +61 -0
  18. package/dist/Temml-Asana.css +201 -0
  19. package/dist/Temml-Latin-Modern.css +216 -0
  20. package/dist/Temml-Libertinus.css +214 -0
  21. package/dist/Temml-Local.css +194 -0
  22. package/dist/Temml-STIX2.css +203 -0
  23. package/dist/Temml.woff2 +0 -0
  24. package/dist/temml.cjs +13122 -0
  25. package/dist/temml.js +11225 -0
  26. package/dist/temml.min.js +1 -0
  27. package/dist/temml.mjs +13120 -0
  28. package/dist/temmlPostProcess.js +70 -0
  29. package/package.json +34 -0
  30. package/src/Lexer.js +121 -0
  31. package/src/MacroExpander.js +437 -0
  32. package/src/Namespace.js +107 -0
  33. package/src/ParseError.js +64 -0
  34. package/src/Parser.js +977 -0
  35. package/src/Settings.js +49 -0
  36. package/src/SourceLocation.js +29 -0
  37. package/src/Style.js +144 -0
  38. package/src/Token.js +40 -0
  39. package/src/buildMathML.js +235 -0
  40. package/src/constants.js +25 -0
  41. package/src/defineEnvironment.js +25 -0
  42. package/src/defineFunction.js +69 -0
  43. package/src/defineMacro.js +11 -0
  44. package/src/domTree.js +185 -0
  45. package/src/environments/array.js +791 -0
  46. package/src/environments/cd.js +252 -0
  47. package/src/environments.js +8 -0
  48. package/src/functions/accent.js +127 -0
  49. package/src/functions/accentunder.js +38 -0
  50. package/src/functions/arrow.js +204 -0
  51. package/src/functions/cancelto.js +36 -0
  52. package/src/functions/char.js +33 -0
  53. package/src/functions/color.js +253 -0
  54. package/src/functions/cr.js +46 -0
  55. package/src/functions/def.js +259 -0
  56. package/src/functions/delimsizing.js +304 -0
  57. package/src/functions/enclose.js +193 -0
  58. package/src/functions/envTag.js +38 -0
  59. package/src/functions/environment.js +59 -0
  60. package/src/functions/font.js +123 -0
  61. package/src/functions/genfrac.js +333 -0
  62. package/src/functions/hbox.js +29 -0
  63. package/src/functions/horizBrace.js +32 -0
  64. package/src/functions/href.js +90 -0
  65. package/src/functions/html.js +95 -0
  66. package/src/functions/includegraphics.js +131 -0
  67. package/src/functions/kern.js +75 -0
  68. package/src/functions/label.js +29 -0
  69. package/src/functions/lap.js +75 -0
  70. package/src/functions/math.js +40 -0
  71. package/src/functions/mathchoice.js +41 -0
  72. package/src/functions/mclass.js +201 -0
  73. package/src/functions/multiscript.js +91 -0
  74. package/src/functions/not.js +46 -0
  75. package/src/functions/op.js +338 -0
  76. package/src/functions/operatorname.js +139 -0
  77. package/src/functions/ordgroup.js +9 -0
  78. package/src/functions/phantom.js +73 -0
  79. package/src/functions/pmb.js +31 -0
  80. package/src/functions/raise.js +68 -0
  81. package/src/functions/ref.js +28 -0
  82. package/src/functions/relax.js +16 -0
  83. package/src/functions/rule.js +52 -0
  84. package/src/functions/sizing.js +64 -0
  85. package/src/functions/smash.js +66 -0
  86. package/src/functions/sqrt.js +31 -0
  87. package/src/functions/styling.js +58 -0
  88. package/src/functions/supsub.js +135 -0
  89. package/src/functions/symbolsOp.js +53 -0
  90. package/src/functions/symbolsOrd.js +102 -0
  91. package/src/functions/symbolsSpacing.js +53 -0
  92. package/src/functions/tag.js +8 -0
  93. package/src/functions/text.js +75 -0
  94. package/src/functions/tip.js +63 -0
  95. package/src/functions/toggle.js +13 -0
  96. package/src/functions/verb.js +33 -0
  97. package/src/functions.js +57 -0
  98. package/src/linebreaking.js +159 -0
  99. package/src/macros.js +708 -0
  100. package/src/mathMLTree.js +175 -0
  101. package/src/parseNode.js +42 -0
  102. package/src/parseTree.js +40 -0
  103. package/src/postProcess.js +57 -0
  104. package/src/replace.js +225 -0
  105. package/src/stretchy.js +66 -0
  106. package/src/svg.js +110 -0
  107. package/src/symbols.js +972 -0
  108. package/src/tree.js +50 -0
  109. package/src/unicodeAccents.js +16 -0
  110. package/src/unicodeScripts.js +119 -0
  111. package/src/unicodeSupOrSub.js +108 -0
  112. package/src/unicodeSymbolBuilder.js +31 -0
  113. package/src/unicodeSymbols.js +320 -0
  114. package/src/units.js +109 -0
  115. package/src/utils.js +109 -0
  116. package/src/variant.js +103 -0
  117. package/temml.js +181 -0
package/src/Parser.js ADDED
@@ -0,0 +1,977 @@
1
+ /* eslint no-constant-condition:0 */
2
+ import functions from "./functions";
3
+ import MacroExpander, { implicitCommands } from "./MacroExpander";
4
+ import symbols, { ATOMS } from "./symbols";
5
+ import { validUnit } from "./units";
6
+ import { supportedCodepoint } from "./unicodeScripts";
7
+ import ParseError from "./ParseError";
8
+ import { combiningDiacriticalMarksEndRegex } from "./Lexer";
9
+ import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
10
+ import SourceLocation from "./SourceLocation";
11
+ import { Token } from "./Token";
12
+
13
+ // Pre-evaluate both modules as unicodeSymbols require String.normalize()
14
+ import unicodeAccents from /*preval*/ "./unicodeAccents";
15
+ import unicodeSymbols from /*preval*/ "./unicodeSymbols";
16
+
17
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
18
+
19
+ /**
20
+ * This file contains the parser used to parse out a TeX expression from the
21
+ * input. Since TeX isn't context-free, standard parsers don't work particularly
22
+ * well.
23
+ *
24
+ * The strategy of this parser is as such:
25
+ *
26
+ * The main functions (the `.parse...` ones) take a position in the current
27
+ * parse string to parse tokens from. The lexer (found in Lexer.js, stored at
28
+ * this.gullet.lexer) also supports pulling out tokens at arbitrary places. When
29
+ * individual tokens are needed at a position, the lexer is called to pull out a
30
+ * token, which is then used.
31
+ *
32
+ * The parser has a property called "mode" indicating the mode that
33
+ * the parser is currently in. Currently it has to be one of "math" or
34
+ * "text", which denotes whether the current environment is a math-y
35
+ * one or a text-y one (e.g. inside \text). Currently, this serves to
36
+ * limit the functions which can be used in text mode.
37
+ *
38
+ * The main functions then return an object which contains the useful data that
39
+ * was parsed at its given point, and a new position at the end of the parsed
40
+ * data. The main functions can call each other and continue the parsing by
41
+ * using the returned position as a new starting point.
42
+ *
43
+ * There are also extra `.handle...` functions, which pull out some reused
44
+ * functionality into self-contained functions.
45
+ *
46
+ * The functions return ParseNodes.
47
+ */
48
+
49
+ export default class Parser {
50
+ constructor(input, settings, isPreamble = false) {
51
+ // Start in math mode
52
+ this.mode = "math";
53
+ // Create a new macro expander (gullet) and (indirectly via that) also a
54
+ // new lexer (mouth) for this parser (stomach, in the language of TeX)
55
+ this.gullet = new MacroExpander(input, settings, this.mode);
56
+ // Store the settings for use in parsing
57
+ this.settings = settings;
58
+ // Are we defining a preamble?
59
+ this.isPreamble = isPreamble;
60
+ // Count leftright depth (for \middle errors)
61
+ this.leftrightDepth = 0;
62
+ this.prevAtomType = "";
63
+ }
64
+
65
+ /**
66
+ * Checks a result to make sure it has the right type, and throws an
67
+ * appropriate error otherwise.
68
+ */
69
+ expect(text, consume = true) {
70
+ if (this.fetch().text !== text) {
71
+ throw new ParseError(`Expected '${text}', got '${this.fetch().text}'`, this.fetch());
72
+ }
73
+ if (consume) {
74
+ this.consume();
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Discards the current lookahead token, considering it consumed.
80
+ */
81
+ consume() {
82
+ this.nextToken = null;
83
+ }
84
+
85
+ /**
86
+ * Return the current lookahead token, or if there isn't one (at the
87
+ * beginning, or if the previous lookahead token was consume()d),
88
+ * fetch the next token as the new lookahead token and return it.
89
+ */
90
+ fetch() {
91
+ if (this.nextToken == null) {
92
+ this.nextToken = this.gullet.expandNextToken();
93
+ }
94
+ return this.nextToken;
95
+ }
96
+
97
+ /**
98
+ * Switches between "text" and "math" modes.
99
+ */
100
+ switchMode(newMode) {
101
+ this.mode = newMode;
102
+ this.gullet.switchMode(newMode);
103
+ }
104
+
105
+ /**
106
+ * Main parsing function, which parses an entire input.
107
+ */
108
+ parse() {
109
+ // Create a group namespace for every $...$, $$...$$, \[...\].)
110
+ // A \def is then valid only within that pair of delimiters.
111
+ this.gullet.beginGroup();
112
+
113
+ if (this.settings.colorIsTextColor) {
114
+ // Use old \color behavior (same as LaTeX's \textcolor) if requested.
115
+ // We do this within the group for the math expression, so it doesn't
116
+ // pollute settings.macros.
117
+ this.gullet.macros.set("\\color", "\\textcolor");
118
+ }
119
+
120
+ // Try to parse the input
121
+ const parse = this.parseExpression(false);
122
+
123
+ // If we succeeded, make sure there's an EOF at the end
124
+ this.expect("EOF");
125
+
126
+ if (this.isPreamble) {
127
+ const macros = Object.create(null)
128
+ Object.entries(this.gullet.macros.current).forEach(([key, value]) => {
129
+ macros[key] = value
130
+ })
131
+ this.gullet.endGroup();
132
+ return macros
133
+ }
134
+
135
+ // The only local macro that we want to save is from \tag.
136
+ const tag = this.gullet.macros.get("\\df@tag")
137
+
138
+ // End the group namespace for the expression
139
+ this.gullet.endGroup();
140
+
141
+ if (tag) { this.gullet.macros.current["\\df@tag"] = tag }
142
+
143
+ return parse;
144
+ }
145
+
146
+ static get endOfExpression() {
147
+ return ["}", "\\endgroup", "\\end", "\\right", "\\endtoggle", "&"];
148
+ }
149
+
150
+ /**
151
+ * Fully parse a separate sequence of tokens as a separate job.
152
+ * Tokens should be specified in reverse order, as in a MacroDefinition.
153
+ */
154
+ subparse(tokens) {
155
+ // Save the next token from the current job.
156
+ const oldToken = this.nextToken;
157
+ this.consume();
158
+
159
+ // Run the new job, terminating it with an excess '}'
160
+ this.gullet.pushToken(new Token("}"));
161
+ this.gullet.pushTokens(tokens);
162
+ const parse = this.parseExpression(false);
163
+ this.expect("}");
164
+
165
+ // Restore the next token from the current job.
166
+ this.nextToken = oldToken;
167
+
168
+ return parse;
169
+ }
170
+
171
+ /**
172
+ * Parses an "expression", which is a list of atoms.
173
+ *
174
+ * `breakOnInfix`: Should the parsing stop when we hit infix nodes? This
175
+ * happens when functions have higher precendence han infix
176
+ * nodes in implicit parses.
177
+ *
178
+ * `breakOnTokenText`: The text of the token that the expression should end
179
+ * with, or `null` if something else should end the
180
+ * expression.
181
+ */
182
+ parseExpression(breakOnInfix, breakOnTokenText) {
183
+ const body = [];
184
+ // Keep adding atoms to the body until we can't parse any more atoms (either
185
+ // we reached the end, a }, or a \right)
186
+ while (true) {
187
+ // Ignore spaces in math mode
188
+ if (this.mode === "math") {
189
+ this.consumeSpaces();
190
+ }
191
+ const lex = this.fetch();
192
+ if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
193
+ break;
194
+ }
195
+ if (breakOnTokenText && lex.text === breakOnTokenText) {
196
+ break;
197
+ }
198
+ if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
199
+ break;
200
+ }
201
+ const atom = this.parseAtom(breakOnTokenText);
202
+ if (!atom) {
203
+ break;
204
+ } else if (atom.type === "internal") {
205
+ continue;
206
+ }
207
+ body.push(atom);
208
+ // Keep a record of the atom type, so that op.js can set correct spacing.
209
+ this.prevAtomType = atom.type === "atom" ? atom.family : atom.type;
210
+ }
211
+ if (this.mode === "text") {
212
+ this.formLigatures(body);
213
+ }
214
+ return this.handleInfixNodes(body);
215
+ }
216
+
217
+ /**
218
+ * Rewrites infix operators such as \over with corresponding commands such
219
+ * as \frac.
220
+ *
221
+ * There can only be one infix operator per group. If there's more than one
222
+ * then the expression is ambiguous. This can be resolved by adding {}.
223
+ */
224
+ handleInfixNodes(body) {
225
+ let overIndex = -1;
226
+ let funcName;
227
+
228
+ for (let i = 0; i < body.length; i++) {
229
+ if (body[i].type === "infix") {
230
+ if (overIndex !== -1) {
231
+ throw new ParseError("only one infix operator per group", body[i].token);
232
+ }
233
+ overIndex = i;
234
+ funcName = body[i].replaceWith;
235
+ }
236
+ }
237
+
238
+ if (overIndex !== -1 && funcName) {
239
+ let numerNode;
240
+ let denomNode;
241
+
242
+ const numerBody = body.slice(0, overIndex);
243
+ const denomBody = body.slice(overIndex + 1);
244
+
245
+ if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
246
+ numerNode = numerBody[0];
247
+ } else {
248
+ numerNode = { type: "ordgroup", mode: this.mode, body: numerBody };
249
+ }
250
+
251
+ if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
252
+ denomNode = denomBody[0];
253
+ } else {
254
+ denomNode = { type: "ordgroup", mode: this.mode, body: denomBody };
255
+ }
256
+
257
+ let node;
258
+ if (funcName === "\\\\abovefrac") {
259
+ node = this.callFunction(funcName, [numerNode, body[overIndex], denomNode], []);
260
+ } else {
261
+ node = this.callFunction(funcName, [numerNode, denomNode], []);
262
+ }
263
+ return [node];
264
+ } else {
265
+ return body;
266
+ }
267
+ }
268
+
269
+ /**
270
+ * Handle a subscript or superscript with nice errors.
271
+ */
272
+ handleSupSubscript(
273
+ name // For error reporting.
274
+ ) {
275
+ const symbolToken = this.fetch();
276
+ const symbol = symbolToken.text;
277
+ this.consume();
278
+ this.consumeSpaces(); // ignore spaces before sup/subscript argument
279
+ const group = this.parseGroup(name);
280
+
281
+ if (!group) {
282
+ throw new ParseError("Expected group after '" + symbol + "'", symbolToken);
283
+ }
284
+
285
+ return group;
286
+ }
287
+
288
+ /**
289
+ * Converts the textual input of an unsupported command into a text node
290
+ * contained within a color node whose color is determined by errorColor
291
+ */
292
+ formatUnsupportedCmd(text) {
293
+ const textordArray = [];
294
+
295
+ for (let i = 0; i < text.length; i++) {
296
+ textordArray.push({ type: "textord", mode: "text", text: text[i] });
297
+ }
298
+
299
+ const textNode = {
300
+ type: "text",
301
+ mode: this.mode,
302
+ body: textordArray
303
+ };
304
+
305
+ const colorNode = {
306
+ type: "color",
307
+ mode: this.mode,
308
+ color: this.settings.errorColor,
309
+ body: [textNode]
310
+ };
311
+
312
+ return colorNode;
313
+ }
314
+
315
+ /**
316
+ * Parses a group with optional super/subscripts.
317
+ */
318
+ parseAtom(breakOnTokenText) {
319
+ // The body of an atom is an implicit group, so that things like
320
+ // \left(x\right)^2 work correctly.
321
+ const base = this.parseGroup("atom", breakOnTokenText);
322
+
323
+ // In text mode, we don't have superscripts or subscripts
324
+ if (this.mode === "text") {
325
+ return base;
326
+ }
327
+
328
+ // Note that base may be empty (i.e. null) at this point.
329
+
330
+ let superscript;
331
+ let subscript;
332
+ while (true) {
333
+ // Guaranteed in math mode, so eat any spaces first.
334
+ this.consumeSpaces();
335
+
336
+ // Lex the first token
337
+ const lex = this.fetch();
338
+
339
+ if (lex.text === "\\limits" || lex.text === "\\nolimits") {
340
+ // We got a limit control
341
+ if (base && base.type === "op") {
342
+ const limits = lex.text === "\\limits";
343
+ base.limits = limits;
344
+ base.alwaysHandleSupSub = true;
345
+ } else if (base && base.type === "operatorname") {
346
+ if (base.alwaysHandleSupSub) {
347
+ base.limits = lex.text === "\\limits"
348
+ }
349
+ } else {
350
+ throw new ParseError("Limit controls must follow a math operator", lex);
351
+ }
352
+ this.consume();
353
+ } else if (lex.text === "^") {
354
+ // We got a superscript start
355
+ if (superscript) {
356
+ throw new ParseError("Double superscript", lex);
357
+ }
358
+ superscript = this.handleSupSubscript("superscript");
359
+ } else if (lex.text === "_") {
360
+ // We got a subscript start
361
+ if (subscript) {
362
+ throw new ParseError("Double subscript", lex);
363
+ }
364
+ subscript = this.handleSupSubscript("subscript");
365
+ } else if (lex.text === "'") {
366
+ // We got a prime
367
+ if (superscript) {
368
+ throw new ParseError("Double superscript", lex);
369
+ }
370
+ const prime = { type: "textord", mode: this.mode, text: "\\prime" };
371
+
372
+ // Many primes can be grouped together, so we handle this here
373
+ const primes = [prime];
374
+ this.consume();
375
+ // Keep lexing tokens until we get something that's not a prime
376
+ while (this.fetch().text === "'") {
377
+ // For each one, add another prime to the list
378
+ primes.push(prime);
379
+ this.consume();
380
+ }
381
+ // If there's a superscript following the primes, combine that
382
+ // superscript in with the primes.
383
+ if (this.fetch().text === "^") {
384
+ primes.push(this.handleSupSubscript("superscript"));
385
+ }
386
+ // Put everything into an ordgroup as the superscript
387
+ superscript = { type: "ordgroup", mode: this.mode, body: primes };
388
+ } else if (uSubsAndSups[lex.text]) {
389
+ // A Unicode subscript or superscript character.
390
+ // We treat these similarly to the unicode-math package.
391
+ // So we render a string of Unicode (sub|super)scripts the
392
+ // same as a (sub|super)script of regular characters.
393
+ const isSub = unicodeSubRegEx.test(lex.text)
394
+ const subsupTokens = [];
395
+ subsupTokens.push(new Token(uSubsAndSups[lex.text]))
396
+ this.consume()
397
+ // Continue fetching tokens to fill out the group.
398
+ while (true) {
399
+ const token = this.fetch().text
400
+ if (!(uSubsAndSups[token])) { break }
401
+ if (unicodeSubRegEx.test(token) !== isSub) { break }
402
+ subsupTokens.unshift(new Token(uSubsAndSups[token]))
403
+ this.consume()
404
+ }
405
+ // Now create a (sub|super)script.
406
+ const body = this.subparse(subsupTokens)
407
+ if (isSub) {
408
+ subscript = { type: "ordgroup", mode: "math", body }
409
+ } else {
410
+ superscript = { type: "ordgroup", mode: "math", body }
411
+ }
412
+ } else {
413
+ // If it wasn't ^, _, a Unicode (sub|super)script, or ', stop parsing super/subscripts
414
+ break;
415
+ }
416
+ }
417
+
418
+ if (superscript || subscript) {
419
+ if (base && base.type === "multiscript" && !base.postscripts) {
420
+ // base is the result of a \prescript function.
421
+ // Write the sub- & superscripts into the multiscript element.
422
+ base.postscripts = { sup: superscript, sub: subscript }
423
+ return base
424
+ } else {
425
+ // We got either a superscript or subscript, create a supsub
426
+ return {
427
+ type: "supsub",
428
+ mode: this.mode,
429
+ base: base,
430
+ sup: superscript,
431
+ sub: subscript
432
+ }
433
+ }
434
+ } else {
435
+ // Otherwise return the original body
436
+ return base;
437
+ }
438
+ }
439
+
440
+ /**
441
+ * Parses an entire function, including its base and all of its arguments.
442
+ */
443
+ parseFunction(
444
+ breakOnTokenText,
445
+ name // For determining its context
446
+ ) {
447
+ const token = this.fetch();
448
+ const func = token.text;
449
+ const funcData = functions[func];
450
+ if (!funcData) {
451
+ return null;
452
+ }
453
+ this.consume(); // consume command token
454
+
455
+ if (name && name !== "atom" && !funcData.allowedInArgument) {
456
+ throw new ParseError(
457
+ "Got function '" + func + "' with no arguments" + (name ? " as " + name : ""),
458
+ token
459
+ );
460
+ } else if (this.mode === "text" && !funcData.allowedInText) {
461
+ throw new ParseError("Can't use function '" + func + "' in text mode", token);
462
+ } else if (this.mode === "math" && funcData.allowedInMath === false) {
463
+ throw new ParseError("Can't use function '" + func + "' in math mode", token);
464
+ }
465
+
466
+ const prevAtomType = this.prevAtomType;
467
+ const { args, optArgs } = this.parseArguments(func, funcData);
468
+ this.prevAtomType = prevAtomType;
469
+ return this.callFunction(func, args, optArgs, token, breakOnTokenText);
470
+ }
471
+
472
+ /**
473
+ * Call a function handler with a suitable context and arguments.
474
+ */
475
+ callFunction(name, args, optArgs, token, breakOnTokenText) {
476
+ const context = {
477
+ funcName: name,
478
+ parser: this,
479
+ token,
480
+ breakOnTokenText
481
+ };
482
+ const func = functions[name];
483
+ if (func && func.handler) {
484
+ return func.handler(context, args, optArgs);
485
+ } else {
486
+ throw new ParseError(`No function handler for ${name}`);
487
+ }
488
+ }
489
+
490
+ /**
491
+ * Parses the arguments of a function or environment
492
+ */
493
+ parseArguments(
494
+ func, // Should look like "\name" or "\begin{name}".
495
+ funcData
496
+ ) {
497
+ const totalArgs = funcData.numArgs + funcData.numOptionalArgs;
498
+ if (totalArgs === 0) {
499
+ return { args: [], optArgs: [] };
500
+ }
501
+
502
+ const args = [];
503
+ const optArgs = [];
504
+
505
+ for (let i = 0; i < totalArgs; i++) {
506
+ let argType = funcData.argTypes && funcData.argTypes[i];
507
+ const isOptional = i < funcData.numOptionalArgs;
508
+
509
+ if (
510
+ (funcData.primitive && argType == null) ||
511
+ // \sqrt expands into primitive if optional argument doesn't exist
512
+ (funcData.type === "sqrt" && i === 1 && optArgs[0] == null)
513
+ ) {
514
+ argType = "primitive";
515
+ }
516
+
517
+ const arg = this.parseGroupOfType(`argument to '${func}'`, argType, isOptional);
518
+ if (isOptional) {
519
+ optArgs.push(arg);
520
+ } else if (arg != null) {
521
+ args.push(arg);
522
+ } else {
523
+ // should be unreachable
524
+ throw new ParseError("Null argument, please report this as a bug");
525
+ }
526
+ }
527
+
528
+ return { args, optArgs };
529
+ }
530
+
531
+ /**
532
+ * Parses a group when the mode is changing.
533
+ */
534
+ parseGroupOfType(name, type, optional) {
535
+ switch (type) {
536
+ case "size":
537
+ return this.parseSizeGroup(optional);
538
+ case "url":
539
+ return this.parseUrlGroup(optional);
540
+ case "math":
541
+ case "text":
542
+ return this.parseArgumentGroup(optional, type);
543
+ case "hbox": {
544
+ // hbox argument type wraps the argument in the equivalent of
545
+ // \hbox, which is like \text but switching to \textstyle size.
546
+ const group = this.parseArgumentGroup(optional, "text");
547
+ return group != null
548
+ ? {
549
+ type: "styling",
550
+ mode: group.mode,
551
+ body: [group],
552
+ scriptLevel: "text" // simulate \textstyle
553
+ }
554
+ : null;
555
+ }
556
+ case "raw": {
557
+ const token = this.parseStringGroup("raw", optional);
558
+ return token != null
559
+ ? {
560
+ type: "raw",
561
+ mode: "text",
562
+ string: token.text
563
+ }
564
+ : null;
565
+ }
566
+ case "primitive": {
567
+ if (optional) {
568
+ throw new ParseError("A primitive argument cannot be optional");
569
+ }
570
+ const group = this.parseGroup(name);
571
+ if (group == null) {
572
+ throw new ParseError("Expected group as " + name, this.fetch());
573
+ }
574
+ return group;
575
+ }
576
+ case "original":
577
+ case null:
578
+ case undefined:
579
+ return this.parseArgumentGroup(optional);
580
+ default:
581
+ throw new ParseError("Unknown group type as " + name, this.fetch());
582
+ }
583
+ }
584
+
585
+ /**
586
+ * Discard any space tokens, fetching the next non-space token.
587
+ */
588
+ consumeSpaces() {
589
+ while (true) {
590
+ const ch = this.fetch().text
591
+ // \ufe0e is the Unicode variation selector to supress emoji. Ignore it.
592
+ if (ch === " " || ch === "\ufe0e") {
593
+ this.consume()
594
+ } else {
595
+ break
596
+ }
597
+ }
598
+ }
599
+
600
+ /**
601
+ * Parses a group, essentially returning the string formed by the
602
+ * brace-enclosed tokens plus some position information.
603
+ */
604
+ parseStringGroup(
605
+ modeName, // Used to describe the mode in error messages.
606
+ optional
607
+ ) {
608
+ const argToken = this.gullet.scanArgument(optional);
609
+ if (argToken == null) {
610
+ return null;
611
+ }
612
+ let str = "";
613
+ let nextToken;
614
+ while ((nextToken = this.fetch()).text !== "EOF") {
615
+ str += nextToken.text;
616
+ this.consume();
617
+ }
618
+ this.consume(); // consume the end of the argument
619
+ argToken.text = str;
620
+ return argToken;
621
+ }
622
+
623
+ /**
624
+ * Parses a regex-delimited group: the largest sequence of tokens
625
+ * whose concatenated strings match `regex`. Returns the string
626
+ * formed by the tokens plus some position information.
627
+ */
628
+ parseRegexGroup(
629
+ regex,
630
+ modeName // Used to describe the mode in error messages.
631
+ ) {
632
+ const firstToken = this.fetch();
633
+ let lastToken = firstToken;
634
+ let str = "";
635
+ let nextToken;
636
+ while ((nextToken = this.fetch()).text !== "EOF" && regex.test(str + nextToken.text)) {
637
+ lastToken = nextToken;
638
+ str += lastToken.text;
639
+ this.consume();
640
+ }
641
+ if (str === "") {
642
+ throw new ParseError("Invalid " + modeName + ": '" + firstToken.text + "'", firstToken);
643
+ }
644
+ return firstToken.range(lastToken, str);
645
+ }
646
+
647
+ /**
648
+ * Parses a size specification, consisting of magnitude and unit.
649
+ */
650
+ parseSizeGroup(optional) {
651
+ let res;
652
+ let isBlank = false;
653
+ // don't expand before parseStringGroup
654
+ this.gullet.consumeSpaces();
655
+ if (!optional && this.gullet.future().text !== "{") {
656
+ res = this.parseRegexGroup(/^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/, "size");
657
+ } else {
658
+ res = this.parseStringGroup("size", optional);
659
+ }
660
+ if (!res) {
661
+ return null;
662
+ }
663
+ if (!optional && res.text.length === 0) {
664
+ // Because we've tested for what is !optional, this block won't
665
+ // affect \kern, \hspace, etc. It will capture the mandatory arguments
666
+ // to \genfrac and \above.
667
+ res.text = "0pt"; // Enable \above{}
668
+ isBlank = true; // This is here specifically for \genfrac
669
+ }
670
+ const match = /([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/.exec(res.text);
671
+ if (!match) {
672
+ throw new ParseError("Invalid size: '" + res.text + "'", res);
673
+ }
674
+ const data = {
675
+ number: +(match[1] + match[2]), // sign + magnitude, cast to number
676
+ unit: match[3]
677
+ };
678
+ if (!validUnit(data)) {
679
+ throw new ParseError("Invalid unit: '" + data.unit + "'", res);
680
+ }
681
+ return {
682
+ type: "size",
683
+ mode: this.mode,
684
+ value: data,
685
+ isBlank
686
+ };
687
+ }
688
+
689
+ /**
690
+ * Parses an URL, checking escaped letters and allowed protocols,
691
+ * and setting the catcode of % as an active character (as in \hyperref).
692
+ */
693
+ parseUrlGroup(optional) {
694
+ this.gullet.lexer.setCatcode("%", 13); // active character
695
+ this.gullet.lexer.setCatcode("~", 12); // other character
696
+ const res = this.parseStringGroup("url", optional);
697
+ this.gullet.lexer.setCatcode("%", 14); // comment character
698
+ this.gullet.lexer.setCatcode("~", 13); // active character
699
+ if (res == null) {
700
+ return null;
701
+ }
702
+ // hyperref package allows backslashes alone in href, but doesn't
703
+ // generate valid links in such cases; we interpret this as
704
+ // "undefined" behaviour, and keep them as-is. Some browser will
705
+ // replace backslashes with forward slashes.
706
+ let url = res.text.replace(/\\([#$%&~_^{}])/g, "$1");
707
+ url = res.text.replace(/{\u2044}/g, "/");
708
+ return {
709
+ type: "url",
710
+ mode: this.mode,
711
+ url
712
+ };
713
+ }
714
+
715
+ /**
716
+ * Parses an argument with the mode specified.
717
+ */
718
+ parseArgumentGroup(optional, mode) {
719
+ const argToken = this.gullet.scanArgument(optional);
720
+ if (argToken == null) {
721
+ return null;
722
+ }
723
+ const outerMode = this.mode;
724
+ if (mode) {
725
+ // Switch to specified mode
726
+ this.switchMode(mode);
727
+ }
728
+
729
+ this.gullet.beginGroup();
730
+ const expression = this.parseExpression(false, "EOF");
731
+ // TODO: find an alternative way to denote the end
732
+ this.expect("EOF"); // expect the end of the argument
733
+ this.gullet.endGroup();
734
+ const result = {
735
+ type: "ordgroup",
736
+ mode: this.mode,
737
+ loc: argToken.loc,
738
+ body: expression
739
+ };
740
+
741
+ if (mode) {
742
+ // Switch mode back
743
+ this.switchMode(outerMode);
744
+ }
745
+ return result;
746
+ }
747
+
748
+ /**
749
+ * Parses an ordinary group, which is either a single nucleus (like "x")
750
+ * or an expression in braces (like "{x+y}") or an implicit group, a group
751
+ * that starts at the current position, and ends right before a higher explicit
752
+ * group ends, or at EOF.
753
+ */
754
+ parseGroup(
755
+ name, // For error reporting.
756
+ breakOnTokenText
757
+ ) {
758
+ const firstToken = this.fetch();
759
+ const text = firstToken.text;
760
+
761
+ let result;
762
+ // Try to parse an open brace or \begingroup
763
+ if (text === "{" || text === "\\begingroup" || text === "\\toggle") {
764
+ this.consume();
765
+ const groupEnd = text === "{"
766
+ ? "}"
767
+ : text === "\\begingroup"
768
+ ? "\\endgroup"
769
+ : "\\endtoggle"
770
+
771
+ this.gullet.beginGroup();
772
+ // If we get a brace, parse an expression
773
+ const expression = this.parseExpression(false, groupEnd);
774
+ const lastToken = this.fetch();
775
+ this.expect(groupEnd); // Check that we got a matching closing brace
776
+ this.gullet.endGroup();
777
+ result = {
778
+ type: (lastToken.text === "\\endtoggle" ? "toggle" : "ordgroup"),
779
+ mode: this.mode,
780
+ loc: SourceLocation.range(firstToken, lastToken),
781
+ body: expression,
782
+ // A group formed by \begingroup...\endgroup is a semi-simple group
783
+ // which doesn't affect spacing in math mode, i.e., is transparent.
784
+ // https://tex.stackexchange.com/questions/1930/when-should-one-
785
+ // use-begingroup-instead-of-bgroup
786
+ semisimple: text === "\\begingroup" || undefined
787
+ };
788
+ } else {
789
+ // If there exists a function with this name, parse the function.
790
+ // Otherwise, just return a nucleus
791
+ result = this.parseFunction(breakOnTokenText, name) || this.parseSymbol();
792
+ if (result == null && text[0] === "\\" &&
793
+ !Object.prototype.hasOwnProperty.call(implicitCommands, text )) {
794
+ result = this.formatUnsupportedCmd(text);
795
+ this.consume();
796
+ }
797
+ }
798
+ return result;
799
+ }
800
+
801
+ /**
802
+ * Form ligature-like combinations of characters for text mode.
803
+ * This includes inputs like "--", "---", "``" and "''".
804
+ * The result will simply replace multiple textord nodes with a single
805
+ * character in each value by a single textord node having multiple
806
+ * characters in its value. The representation is still ASCII source.
807
+ * The group will be modified in place.
808
+ */
809
+ formLigatures(group) {
810
+ let n = group.length - 1;
811
+ for (let i = 0; i < n; ++i) {
812
+ const a = group[i];
813
+ const v = a.text;
814
+ if (v === "-" && group[i + 1].text === "-") {
815
+ if (i + 1 < n && group[i + 2].text === "-") {
816
+ group.splice(i, 3, {
817
+ type: "textord",
818
+ mode: "text",
819
+ loc: SourceLocation.range(a, group[i + 2]),
820
+ text: "---"
821
+ });
822
+ n -= 2;
823
+ } else {
824
+ group.splice(i, 2, {
825
+ type: "textord",
826
+ mode: "text",
827
+ loc: SourceLocation.range(a, group[i + 1]),
828
+ text: "--"
829
+ });
830
+ n -= 1;
831
+ }
832
+ }
833
+ if ((v === "'" || v === "`") && group[i + 1].text === v) {
834
+ group.splice(i, 2, {
835
+ type: "textord",
836
+ mode: "text",
837
+ loc: SourceLocation.range(a, group[i + 1]),
838
+ text: v + v
839
+ });
840
+ n -= 1;
841
+ }
842
+ }
843
+ }
844
+
845
+ /**
846
+ * Parse a single symbol out of the string. Here, we handle single character
847
+ * symbols and special functions like \verb.
848
+ */
849
+ parseSymbol() {
850
+ const nucleus = this.fetch();
851
+ let text = nucleus.text;
852
+
853
+ if (/^\\verb[^a-zA-Z]/.test(text)) {
854
+ this.consume();
855
+ let arg = text.slice(5);
856
+ const star = arg.charAt(0) === "*";
857
+ if (star) {
858
+ arg = arg.slice(1);
859
+ }
860
+ // Lexer's tokenRegex is constructed to always have matching
861
+ // first/last characters.
862
+ if (arg.length < 2 || arg.charAt(0) !== arg.slice(-1)) {
863
+ throw new ParseError(`\\verb assertion failed --
864
+ please report what input caused this bug`);
865
+ }
866
+ arg = arg.slice(1, -1); // remove first and last char
867
+ return {
868
+ type: "verb",
869
+ mode: "text",
870
+ body: arg,
871
+ star
872
+ };
873
+ }
874
+ // At this point, we should have a symbol, possibly with accents.
875
+ // First expand any accented base symbol according to unicodeSymbols.
876
+ if (Object.prototype.hasOwnProperty.call(unicodeSymbols, text[0]) &&
877
+ !symbols[this.mode][text[0]]) {
878
+ // This behavior is not strict (XeTeX-compatible) in math mode.
879
+ if (this.settings.strict && this.mode === "math") {
880
+ throw new ParseError(`Accented Unicode text character "${text[0]}" used in ` + `math mode`,
881
+ nucleus
882
+ );
883
+ }
884
+ text = unicodeSymbols[text[0]] + text.slice(1);
885
+ }
886
+ // Strip off any combining characters
887
+ const match = combiningDiacriticalMarksEndRegex.exec(text);
888
+ if (match) {
889
+ text = text.substring(0, match.index);
890
+ if (text === "i") {
891
+ text = "\u0131"; // dotless i, in math and text mode
892
+ } else if (text === "j") {
893
+ text = "\u0237"; // dotless j, in math and text mode
894
+ }
895
+ }
896
+ // Recognize base symbol
897
+ let symbol;
898
+ if (symbols[this.mode][text]) {
899
+ const group = symbols[this.mode][text].group;
900
+ const loc = SourceLocation.range(nucleus);
901
+ let s;
902
+ if (Object.prototype.hasOwnProperty.call(ATOMS, group )) {
903
+ const family = group;
904
+ s = {
905
+ type: "atom",
906
+ mode: this.mode,
907
+ family,
908
+ loc,
909
+ text
910
+ };
911
+ } else {
912
+ s = {
913
+ type: group,
914
+ mode: this.mode,
915
+ loc,
916
+ text
917
+ };
918
+ }
919
+ symbol = s;
920
+ } else if (!this.strict && numberRegEx.test(text)) {
921
+ // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
922
+ this.consume()
923
+ return {
924
+ type: "textord",
925
+ mode: this.mode,
926
+ loc: SourceLocation.range(nucleus),
927
+ text
928
+ }
929
+ } else if (text.charCodeAt(0) >= 0x80) {
930
+ // no symbol for e.g. ^
931
+ if (this.settings.strict) {
932
+ if (!supportedCodepoint(text.charCodeAt(0))) {
933
+ throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
934
+ ` (${text.charCodeAt(0)})`, nucleus);
935
+ } else if (this.mode === "math") {
936
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
937
+ }
938
+ }
939
+ // All nonmathematical Unicode characters are rendered as if they
940
+ // are in text mode (wrapped in \text) because that's what it
941
+ // takes to render them in LaTeX.
942
+ symbol = {
943
+ type: "textord",
944
+ mode: "text",
945
+ loc: SourceLocation.range(nucleus),
946
+ text
947
+ };
948
+ } else {
949
+ return null; // EOF, ^, _, {, }, etc.
950
+ }
951
+ this.consume();
952
+ // Transform combining characters into accents
953
+ if (match) {
954
+ for (let i = 0; i < match[0].length; i++) {
955
+ const accent = match[0][i];
956
+ if (!unicodeAccents[accent]) {
957
+ throw new ParseError(`Unknown accent ' ${accent}'`, nucleus);
958
+ }
959
+ const command = unicodeAccents[accent][this.mode] ||
960
+ unicodeAccents[accent].text;
961
+ if (!command) {
962
+ throw new ParseError(`Accent ${accent} unsupported in ${this.mode} mode`, nucleus);
963
+ }
964
+ symbol = {
965
+ type: "accent",
966
+ mode: this.mode,
967
+ loc: SourceLocation.range(nucleus),
968
+ label: command,
969
+ isStretchy: false,
970
+ isShifty: true,
971
+ base: symbol
972
+ };
973
+ }
974
+ }
975
+ return symbol;
976
+ }
977
+ }