temml 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +44 -0
  3. package/contrib/auto-render/README.md +89 -0
  4. package/contrib/auto-render/auto-render.js +128 -0
  5. package/contrib/auto-render/dist/auto-render.js +217 -0
  6. package/contrib/auto-render/dist/auto-render.min.js +1 -0
  7. package/contrib/auto-render/splitAtDelimiters.js +84 -0
  8. package/contrib/auto-render/test/auto-render-spec.js +234 -0
  9. package/contrib/auto-render/test/auto-render.js +217 -0
  10. package/contrib/auto-render/test/test_page.html +59 -0
  11. package/contrib/mhchem/README.md +26 -0
  12. package/contrib/mhchem/mhchem.js +1705 -0
  13. package/contrib/mhchem/mhchem.min.js +1 -0
  14. package/contrib/physics/README.md +20 -0
  15. package/contrib/physics/physics.js +131 -0
  16. package/contrib/texvc/README.md +23 -0
  17. package/contrib/texvc/texvc.js +61 -0
  18. package/dist/Temml-Asana.css +201 -0
  19. package/dist/Temml-Latin-Modern.css +216 -0
  20. package/dist/Temml-Libertinus.css +214 -0
  21. package/dist/Temml-Local.css +194 -0
  22. package/dist/Temml-STIX2.css +203 -0
  23. package/dist/Temml.woff2 +0 -0
  24. package/dist/temml.cjs +13122 -0
  25. package/dist/temml.js +11225 -0
  26. package/dist/temml.min.js +1 -0
  27. package/dist/temml.mjs +13120 -0
  28. package/dist/temmlPostProcess.js +70 -0
  29. package/package.json +34 -0
  30. package/src/Lexer.js +121 -0
  31. package/src/MacroExpander.js +437 -0
  32. package/src/Namespace.js +107 -0
  33. package/src/ParseError.js +64 -0
  34. package/src/Parser.js +977 -0
  35. package/src/Settings.js +49 -0
  36. package/src/SourceLocation.js +29 -0
  37. package/src/Style.js +144 -0
  38. package/src/Token.js +40 -0
  39. package/src/buildMathML.js +235 -0
  40. package/src/constants.js +25 -0
  41. package/src/defineEnvironment.js +25 -0
  42. package/src/defineFunction.js +69 -0
  43. package/src/defineMacro.js +11 -0
  44. package/src/domTree.js +185 -0
  45. package/src/environments/array.js +791 -0
  46. package/src/environments/cd.js +252 -0
  47. package/src/environments.js +8 -0
  48. package/src/functions/accent.js +127 -0
  49. package/src/functions/accentunder.js +38 -0
  50. package/src/functions/arrow.js +204 -0
  51. package/src/functions/cancelto.js +36 -0
  52. package/src/functions/char.js +33 -0
  53. package/src/functions/color.js +253 -0
  54. package/src/functions/cr.js +46 -0
  55. package/src/functions/def.js +259 -0
  56. package/src/functions/delimsizing.js +304 -0
  57. package/src/functions/enclose.js +193 -0
  58. package/src/functions/envTag.js +38 -0
  59. package/src/functions/environment.js +59 -0
  60. package/src/functions/font.js +123 -0
  61. package/src/functions/genfrac.js +333 -0
  62. package/src/functions/hbox.js +29 -0
  63. package/src/functions/horizBrace.js +32 -0
  64. package/src/functions/href.js +90 -0
  65. package/src/functions/html.js +95 -0
  66. package/src/functions/includegraphics.js +131 -0
  67. package/src/functions/kern.js +75 -0
  68. package/src/functions/label.js +29 -0
  69. package/src/functions/lap.js +75 -0
  70. package/src/functions/math.js +40 -0
  71. package/src/functions/mathchoice.js +41 -0
  72. package/src/functions/mclass.js +201 -0
  73. package/src/functions/multiscript.js +91 -0
  74. package/src/functions/not.js +46 -0
  75. package/src/functions/op.js +338 -0
  76. package/src/functions/operatorname.js +139 -0
  77. package/src/functions/ordgroup.js +9 -0
  78. package/src/functions/phantom.js +73 -0
  79. package/src/functions/pmb.js +31 -0
  80. package/src/functions/raise.js +68 -0
  81. package/src/functions/ref.js +28 -0
  82. package/src/functions/relax.js +16 -0
  83. package/src/functions/rule.js +52 -0
  84. package/src/functions/sizing.js +64 -0
  85. package/src/functions/smash.js +66 -0
  86. package/src/functions/sqrt.js +31 -0
  87. package/src/functions/styling.js +58 -0
  88. package/src/functions/supsub.js +135 -0
  89. package/src/functions/symbolsOp.js +53 -0
  90. package/src/functions/symbolsOrd.js +102 -0
  91. package/src/functions/symbolsSpacing.js +53 -0
  92. package/src/functions/tag.js +8 -0
  93. package/src/functions/text.js +75 -0
  94. package/src/functions/tip.js +63 -0
  95. package/src/functions/toggle.js +13 -0
  96. package/src/functions/verb.js +33 -0
  97. package/src/functions.js +57 -0
  98. package/src/linebreaking.js +159 -0
  99. package/src/macros.js +708 -0
  100. package/src/mathMLTree.js +175 -0
  101. package/src/parseNode.js +42 -0
  102. package/src/parseTree.js +40 -0
  103. package/src/postProcess.js +57 -0
  104. package/src/replace.js +225 -0
  105. package/src/stretchy.js +66 -0
  106. package/src/svg.js +110 -0
  107. package/src/symbols.js +972 -0
  108. package/src/tree.js +50 -0
  109. package/src/unicodeAccents.js +16 -0
  110. package/src/unicodeScripts.js +119 -0
  111. package/src/unicodeSupOrSub.js +108 -0
  112. package/src/unicodeSymbolBuilder.js +31 -0
  113. package/src/unicodeSymbols.js +320 -0
  114. package/src/units.js +109 -0
  115. package/src/utils.js +109 -0
  116. package/src/variant.js +103 -0
  117. package/temml.js +181 -0
package/src/Parser.js ADDED
@@ -0,0 +1,977 @@
1
+ /* eslint no-constant-condition:0 */
2
+ import functions from "./functions";
3
+ import MacroExpander, { implicitCommands } from "./MacroExpander";
4
+ import symbols, { ATOMS } from "./symbols";
5
+ import { validUnit } from "./units";
6
+ import { supportedCodepoint } from "./unicodeScripts";
7
+ import ParseError from "./ParseError";
8
+ import { combiningDiacriticalMarksEndRegex } from "./Lexer";
9
+ import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
10
+ import SourceLocation from "./SourceLocation";
11
+ import { Token } from "./Token";
12
+
13
+ // Pre-evaluate both modules as unicodeSymbols require String.normalize()
14
+ import unicodeAccents from /*preval*/ "./unicodeAccents";
15
+ import unicodeSymbols from /*preval*/ "./unicodeSymbols";
16
+
17
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
18
+
19
+ /**
20
+ * This file contains the parser used to parse out a TeX expression from the
21
+ * input. Since TeX isn't context-free, standard parsers don't work particularly
22
+ * well.
23
+ *
24
+ * The strategy of this parser is as such:
25
+ *
26
+ * The main functions (the `.parse...` ones) take a position in the current
27
+ * parse string to parse tokens from. The lexer (found in Lexer.js, stored at
28
+ * this.gullet.lexer) also supports pulling out tokens at arbitrary places. When
29
+ * individual tokens are needed at a position, the lexer is called to pull out a
30
+ * token, which is then used.
31
+ *
32
+ * The parser has a property called "mode" indicating the mode that
33
+ * the parser is currently in. Currently it has to be one of "math" or
34
+ * "text", which denotes whether the current environment is a math-y
35
+ * one or a text-y one (e.g. inside \text). Currently, this serves to
36
+ * limit the functions which can be used in text mode.
37
+ *
38
+ * The main functions then return an object which contains the useful data that
39
+ * was parsed at its given point, and a new position at the end of the parsed
40
+ * data. The main functions can call each other and continue the parsing by
41
+ * using the returned position as a new starting point.
42
+ *
43
+ * There are also extra `.handle...` functions, which pull out some reused
44
+ * functionality into self-contained functions.
45
+ *
46
+ * The functions return ParseNodes.
47
+ */
48
+
49
+ export default class Parser {
50
+ constructor(input, settings, isPreamble = false) {
51
+ // Start in math mode
52
+ this.mode = "math";
53
+ // Create a new macro expander (gullet) and (indirectly via that) also a
54
+ // new lexer (mouth) for this parser (stomach, in the language of TeX)
55
+ this.gullet = new MacroExpander(input, settings, this.mode);
56
+ // Store the settings for use in parsing
57
+ this.settings = settings;
58
+ // Are we defining a preamble?
59
+ this.isPreamble = isPreamble;
60
+ // Count leftright depth (for \middle errors)
61
+ this.leftrightDepth = 0;
62
+ this.prevAtomType = "";
63
+ }
64
+
65
+ /**
66
+ * Checks a result to make sure it has the right type, and throws an
67
+ * appropriate error otherwise.
68
+ */
69
+ expect(text, consume = true) {
70
+ if (this.fetch().text !== text) {
71
+ throw new ParseError(`Expected '${text}', got '${this.fetch().text}'`, this.fetch());
72
+ }
73
+ if (consume) {
74
+ this.consume();
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Discards the current lookahead token, considering it consumed.
80
+ */
81
+ consume() {
82
+ this.nextToken = null;
83
+ }
84
+
85
+ /**
86
+ * Return the current lookahead token, or if there isn't one (at the
87
+ * beginning, or if the previous lookahead token was consume()d),
88
+ * fetch the next token as the new lookahead token and return it.
89
+ */
90
+ fetch() {
91
+ if (this.nextToken == null) {
92
+ this.nextToken = this.gullet.expandNextToken();
93
+ }
94
+ return this.nextToken;
95
+ }
96
+
97
+ /**
98
+ * Switches between "text" and "math" modes.
99
+ */
100
+ switchMode(newMode) {
101
+ this.mode = newMode;
102
+ this.gullet.switchMode(newMode);
103
+ }
104
+
105
+ /**
106
+ * Main parsing function, which parses an entire input.
107
+ */
108
+ parse() {
109
+ // Create a group namespace for every $...$, $$...$$, \[...\].)
110
+ // A \def is then valid only within that pair of delimiters.
111
+ this.gullet.beginGroup();
112
+
113
+ if (this.settings.colorIsTextColor) {
114
+ // Use old \color behavior (same as LaTeX's \textcolor) if requested.
115
+ // We do this within the group for the math expression, so it doesn't
116
+ // pollute settings.macros.
117
+ this.gullet.macros.set("\\color", "\\textcolor");
118
+ }
119
+
120
+ // Try to parse the input
121
+ const parse = this.parseExpression(false);
122
+
123
+ // If we succeeded, make sure there's an EOF at the end
124
+ this.expect("EOF");
125
+
126
+ if (this.isPreamble) {
127
+ const macros = Object.create(null)
128
+ Object.entries(this.gullet.macros.current).forEach(([key, value]) => {
129
+ macros[key] = value
130
+ })
131
+ this.gullet.endGroup();
132
+ return macros
133
+ }
134
+
135
+ // The only local macro that we want to save is from \tag.
136
+ const tag = this.gullet.macros.get("\\df@tag")
137
+
138
+ // End the group namespace for the expression
139
+ this.gullet.endGroup();
140
+
141
+ if (tag) { this.gullet.macros.current["\\df@tag"] = tag }
142
+
143
+ return parse;
144
+ }
145
+
146
+ static get endOfExpression() {
147
+ return ["}", "\\endgroup", "\\end", "\\right", "\\endtoggle", "&"];
148
+ }
149
+
150
+ /**
151
+ * Fully parse a separate sequence of tokens as a separate job.
152
+ * Tokens should be specified in reverse order, as in a MacroDefinition.
153
+ */
154
+ subparse(tokens) {
155
+ // Save the next token from the current job.
156
+ const oldToken = this.nextToken;
157
+ this.consume();
158
+
159
+ // Run the new job, terminating it with an excess '}'
160
+ this.gullet.pushToken(new Token("}"));
161
+ this.gullet.pushTokens(tokens);
162
+ const parse = this.parseExpression(false);
163
+ this.expect("}");
164
+
165
+ // Restore the next token from the current job.
166
+ this.nextToken = oldToken;
167
+
168
+ return parse;
169
+ }
170
+
171
+ /**
172
+ * Parses an "expression", which is a list of atoms.
173
+ *
174
+ * `breakOnInfix`: Should the parsing stop when we hit infix nodes? This
175
+ * happens when functions have higher precendence han infix
176
+ * nodes in implicit parses.
177
+ *
178
+ * `breakOnTokenText`: The text of the token that the expression should end
179
+ * with, or `null` if something else should end the
180
+ * expression.
181
+ */
182
+ parseExpression(breakOnInfix, breakOnTokenText) {
183
+ const body = [];
184
+ // Keep adding atoms to the body until we can't parse any more atoms (either
185
+ // we reached the end, a }, or a \right)
186
+ while (true) {
187
+ // Ignore spaces in math mode
188
+ if (this.mode === "math") {
189
+ this.consumeSpaces();
190
+ }
191
+ const lex = this.fetch();
192
+ if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
193
+ break;
194
+ }
195
+ if (breakOnTokenText && lex.text === breakOnTokenText) {
196
+ break;
197
+ }
198
+ if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
199
+ break;
200
+ }
201
+ const atom = this.parseAtom(breakOnTokenText);
202
+ if (!atom) {
203
+ break;
204
+ } else if (atom.type === "internal") {
205
+ continue;
206
+ }
207
+ body.push(atom);
208
+ // Keep a record of the atom type, so that op.js can set correct spacing.
209
+ this.prevAtomType = atom.type === "atom" ? atom.family : atom.type;
210
+ }
211
+ if (this.mode === "text") {
212
+ this.formLigatures(body);
213
+ }
214
+ return this.handleInfixNodes(body);
215
+ }
216
+
217
+ /**
218
+ * Rewrites infix operators such as \over with corresponding commands such
219
+ * as \frac.
220
+ *
221
+ * There can only be one infix operator per group. If there's more than one
222
+ * then the expression is ambiguous. This can be resolved by adding {}.
223
+ */
224
+ handleInfixNodes(body) {
225
+ let overIndex = -1;
226
+ let funcName;
227
+
228
+ for (let i = 0; i < body.length; i++) {
229
+ if (body[i].type === "infix") {
230
+ if (overIndex !== -1) {
231
+ throw new ParseError("only one infix operator per group", body[i].token);
232
+ }
233
+ overIndex = i;
234
+ funcName = body[i].replaceWith;
235
+ }
236
+ }
237
+
238
+ if (overIndex !== -1 && funcName) {
239
+ let numerNode;
240
+ let denomNode;
241
+
242
+ const numerBody = body.slice(0, overIndex);
243
+ const denomBody = body.slice(overIndex + 1);
244
+
245
+ if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
246
+ numerNode = numerBody[0];
247
+ } else {
248
+ numerNode = { type: "ordgroup", mode: this.mode, body: numerBody };
249
+ }
250
+
251
+ if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
252
+ denomNode = denomBody[0];
253
+ } else {
254
+ denomNode = { type: "ordgroup", mode: this.mode, body: denomBody };
255
+ }
256
+
257
+ let node;
258
+ if (funcName === "\\\\abovefrac") {
259
+ node = this.callFunction(funcName, [numerNode, body[overIndex], denomNode], []);
260
+ } else {
261
+ node = this.callFunction(funcName, [numerNode, denomNode], []);
262
+ }
263
+ return [node];
264
+ } else {
265
+ return body;
266
+ }
267
+ }
268
+
269
+ /**
270
+ * Handle a subscript or superscript with nice errors.
271
+ */
272
+ handleSupSubscript(
273
+ name // For error reporting.
274
+ ) {
275
+ const symbolToken = this.fetch();
276
+ const symbol = symbolToken.text;
277
+ this.consume();
278
+ this.consumeSpaces(); // ignore spaces before sup/subscript argument
279
+ const group = this.parseGroup(name);
280
+
281
+ if (!group) {
282
+ throw new ParseError("Expected group after '" + symbol + "'", symbolToken);
283
+ }
284
+
285
+ return group;
286
+ }
287
+
288
+ /**
289
+ * Converts the textual input of an unsupported command into a text node
290
+ * contained within a color node whose color is determined by errorColor
291
+ */
292
+ formatUnsupportedCmd(text) {
293
+ const textordArray = [];
294
+
295
+ for (let i = 0; i < text.length; i++) {
296
+ textordArray.push({ type: "textord", mode: "text", text: text[i] });
297
+ }
298
+
299
+ const textNode = {
300
+ type: "text",
301
+ mode: this.mode,
302
+ body: textordArray
303
+ };
304
+
305
+ const colorNode = {
306
+ type: "color",
307
+ mode: this.mode,
308
+ color: this.settings.errorColor,
309
+ body: [textNode]
310
+ };
311
+
312
+ return colorNode;
313
+ }
314
+
315
+ /**
316
+ * Parses a group with optional super/subscripts.
317
+ */
318
+ parseAtom(breakOnTokenText) {
319
+ // The body of an atom is an implicit group, so that things like
320
+ // \left(x\right)^2 work correctly.
321
+ const base = this.parseGroup("atom", breakOnTokenText);
322
+
323
+ // In text mode, we don't have superscripts or subscripts
324
+ if (this.mode === "text") {
325
+ return base;
326
+ }
327
+
328
+ // Note that base may be empty (i.e. null) at this point.
329
+
330
+ let superscript;
331
+ let subscript;
332
+ while (true) {
333
+ // Guaranteed in math mode, so eat any spaces first.
334
+ this.consumeSpaces();
335
+
336
+ // Lex the first token
337
+ const lex = this.fetch();
338
+
339
+ if (lex.text === "\\limits" || lex.text === "\\nolimits") {
340
+ // We got a limit control
341
+ if (base && base.type === "op") {
342
+ const limits = lex.text === "\\limits";
343
+ base.limits = limits;
344
+ base.alwaysHandleSupSub = true;
345
+ } else if (base && base.type === "operatorname") {
346
+ if (base.alwaysHandleSupSub) {
347
+ base.limits = lex.text === "\\limits"
348
+ }
349
+ } else {
350
+ throw new ParseError("Limit controls must follow a math operator", lex);
351
+ }
352
+ this.consume();
353
+ } else if (lex.text === "^") {
354
+ // We got a superscript start
355
+ if (superscript) {
356
+ throw new ParseError("Double superscript", lex);
357
+ }
358
+ superscript = this.handleSupSubscript("superscript");
359
+ } else if (lex.text === "_") {
360
+ // We got a subscript start
361
+ if (subscript) {
362
+ throw new ParseError("Double subscript", lex);
363
+ }
364
+ subscript = this.handleSupSubscript("subscript");
365
+ } else if (lex.text === "'") {
366
+ // We got a prime
367
+ if (superscript) {
368
+ throw new ParseError("Double superscript", lex);
369
+ }
370
+ const prime = { type: "textord", mode: this.mode, text: "\\prime" };
371
+
372
+ // Many primes can be grouped together, so we handle this here
373
+ const primes = [prime];
374
+ this.consume();
375
+ // Keep lexing tokens until we get something that's not a prime
376
+ while (this.fetch().text === "'") {
377
+ // For each one, add another prime to the list
378
+ primes.push(prime);
379
+ this.consume();
380
+ }
381
+ // If there's a superscript following the primes, combine that
382
+ // superscript in with the primes.
383
+ if (this.fetch().text === "^") {
384
+ primes.push(this.handleSupSubscript("superscript"));
385
+ }
386
+ // Put everything into an ordgroup as the superscript
387
+ superscript = { type: "ordgroup", mode: this.mode, body: primes };
388
+ } else if (uSubsAndSups[lex.text]) {
389
+ // A Unicode subscript or superscript character.
390
+ // We treat these similarly to the unicode-math package.
391
+ // So we render a string of Unicode (sub|super)scripts the
392
+ // same as a (sub|super)script of regular characters.
393
+ const isSub = unicodeSubRegEx.test(lex.text)
394
+ const subsupTokens = [];
395
+ subsupTokens.push(new Token(uSubsAndSups[lex.text]))
396
+ this.consume()
397
+ // Continue fetching tokens to fill out the group.
398
+ while (true) {
399
+ const token = this.fetch().text
400
+ if (!(uSubsAndSups[token])) { break }
401
+ if (unicodeSubRegEx.test(token) !== isSub) { break }
402
+ subsupTokens.unshift(new Token(uSubsAndSups[token]))
403
+ this.consume()
404
+ }
405
+ // Now create a (sub|super)script.
406
+ const body = this.subparse(subsupTokens)
407
+ if (isSub) {
408
+ subscript = { type: "ordgroup", mode: "math", body }
409
+ } else {
410
+ superscript = { type: "ordgroup", mode: "math", body }
411
+ }
412
+ } else {
413
+ // If it wasn't ^, _, a Unicode (sub|super)script, or ', stop parsing super/subscripts
414
+ break;
415
+ }
416
+ }
417
+
418
+ if (superscript || subscript) {
419
+ if (base && base.type === "multiscript" && !base.postscripts) {
420
+ // base is the result of a \prescript function.
421
+ // Write the sub- & superscripts into the multiscript element.
422
+ base.postscripts = { sup: superscript, sub: subscript }
423
+ return base
424
+ } else {
425
+ // We got either a superscript or subscript, create a supsub
426
+ return {
427
+ type: "supsub",
428
+ mode: this.mode,
429
+ base: base,
430
+ sup: superscript,
431
+ sub: subscript
432
+ }
433
+ }
434
+ } else {
435
+ // Otherwise return the original body
436
+ return base;
437
+ }
438
+ }
439
+
440
+ /**
441
+ * Parses an entire function, including its base and all of its arguments.
442
+ */
443
+ parseFunction(
444
+ breakOnTokenText,
445
+ name // For determining its context
446
+ ) {
447
+ const token = this.fetch();
448
+ const func = token.text;
449
+ const funcData = functions[func];
450
+ if (!funcData) {
451
+ return null;
452
+ }
453
+ this.consume(); // consume command token
454
+
455
+ if (name && name !== "atom" && !funcData.allowedInArgument) {
456
+ throw new ParseError(
457
+ "Got function '" + func + "' with no arguments" + (name ? " as " + name : ""),
458
+ token
459
+ );
460
+ } else if (this.mode === "text" && !funcData.allowedInText) {
461
+ throw new ParseError("Can't use function '" + func + "' in text mode", token);
462
+ } else if (this.mode === "math" && funcData.allowedInMath === false) {
463
+ throw new ParseError("Can't use function '" + func + "' in math mode", token);
464
+ }
465
+
466
+ const prevAtomType = this.prevAtomType;
467
+ const { args, optArgs } = this.parseArguments(func, funcData);
468
+ this.prevAtomType = prevAtomType;
469
+ return this.callFunction(func, args, optArgs, token, breakOnTokenText);
470
+ }
471
+
472
+ /**
473
+ * Call a function handler with a suitable context and arguments.
474
+ */
475
+ callFunction(name, args, optArgs, token, breakOnTokenText) {
476
+ const context = {
477
+ funcName: name,
478
+ parser: this,
479
+ token,
480
+ breakOnTokenText
481
+ };
482
+ const func = functions[name];
483
+ if (func && func.handler) {
484
+ return func.handler(context, args, optArgs);
485
+ } else {
486
+ throw new ParseError(`No function handler for ${name}`);
487
+ }
488
+ }
489
+
490
+ /**
491
+ * Parses the arguments of a function or environment
492
+ */
493
+ parseArguments(
494
+ func, // Should look like "\name" or "\begin{name}".
495
+ funcData
496
+ ) {
497
+ const totalArgs = funcData.numArgs + funcData.numOptionalArgs;
498
+ if (totalArgs === 0) {
499
+ return { args: [], optArgs: [] };
500
+ }
501
+
502
+ const args = [];
503
+ const optArgs = [];
504
+
505
+ for (let i = 0; i < totalArgs; i++) {
506
+ let argType = funcData.argTypes && funcData.argTypes[i];
507
+ const isOptional = i < funcData.numOptionalArgs;
508
+
509
+ if (
510
+ (funcData.primitive && argType == null) ||
511
+ // \sqrt expands into primitive if optional argument doesn't exist
512
+ (funcData.type === "sqrt" && i === 1 && optArgs[0] == null)
513
+ ) {
514
+ argType = "primitive";
515
+ }
516
+
517
+ const arg = this.parseGroupOfType(`argument to '${func}'`, argType, isOptional);
518
+ if (isOptional) {
519
+ optArgs.push(arg);
520
+ } else if (arg != null) {
521
+ args.push(arg);
522
+ } else {
523
+ // should be unreachable
524
+ throw new ParseError("Null argument, please report this as a bug");
525
+ }
526
+ }
527
+
528
+ return { args, optArgs };
529
+ }
530
+
531
+ /**
532
+ * Parses a group when the mode is changing.
533
+ */
534
+ parseGroupOfType(name, type, optional) {
535
+ switch (type) {
536
+ case "size":
537
+ return this.parseSizeGroup(optional);
538
+ case "url":
539
+ return this.parseUrlGroup(optional);
540
+ case "math":
541
+ case "text":
542
+ return this.parseArgumentGroup(optional, type);
543
+ case "hbox": {
544
+ // hbox argument type wraps the argument in the equivalent of
545
+ // \hbox, which is like \text but switching to \textstyle size.
546
+ const group = this.parseArgumentGroup(optional, "text");
547
+ return group != null
548
+ ? {
549
+ type: "styling",
550
+ mode: group.mode,
551
+ body: [group],
552
+ scriptLevel: "text" // simulate \textstyle
553
+ }
554
+ : null;
555
+ }
556
+ case "raw": {
557
+ const token = this.parseStringGroup("raw", optional);
558
+ return token != null
559
+ ? {
560
+ type: "raw",
561
+ mode: "text",
562
+ string: token.text
563
+ }
564
+ : null;
565
+ }
566
+ case "primitive": {
567
+ if (optional) {
568
+ throw new ParseError("A primitive argument cannot be optional");
569
+ }
570
+ const group = this.parseGroup(name);
571
+ if (group == null) {
572
+ throw new ParseError("Expected group as " + name, this.fetch());
573
+ }
574
+ return group;
575
+ }
576
+ case "original":
577
+ case null:
578
+ case undefined:
579
+ return this.parseArgumentGroup(optional);
580
+ default:
581
+ throw new ParseError("Unknown group type as " + name, this.fetch());
582
+ }
583
+ }
584
+
585
+ /**
586
+ * Discard any space tokens, fetching the next non-space token.
587
+ */
588
+ consumeSpaces() {
589
+ while (true) {
590
+ const ch = this.fetch().text
591
+ // \ufe0e is the Unicode variation selector to supress emoji. Ignore it.
592
+ if (ch === " " || ch === "\ufe0e") {
593
+ this.consume()
594
+ } else {
595
+ break
596
+ }
597
+ }
598
+ }
599
+
600
+ /**
601
+ * Parses a group, essentially returning the string formed by the
602
+ * brace-enclosed tokens plus some position information.
603
+ */
604
+ parseStringGroup(
605
+ modeName, // Used to describe the mode in error messages.
606
+ optional
607
+ ) {
608
+ const argToken = this.gullet.scanArgument(optional);
609
+ if (argToken == null) {
610
+ return null;
611
+ }
612
+ let str = "";
613
+ let nextToken;
614
+ while ((nextToken = this.fetch()).text !== "EOF") {
615
+ str += nextToken.text;
616
+ this.consume();
617
+ }
618
+ this.consume(); // consume the end of the argument
619
+ argToken.text = str;
620
+ return argToken;
621
+ }
622
+
623
+ /**
624
+ * Parses a regex-delimited group: the largest sequence of tokens
625
+ * whose concatenated strings match `regex`. Returns the string
626
+ * formed by the tokens plus some position information.
627
+ */
628
+ parseRegexGroup(
629
+ regex,
630
+ modeName // Used to describe the mode in error messages.
631
+ ) {
632
+ const firstToken = this.fetch();
633
+ let lastToken = firstToken;
634
+ let str = "";
635
+ let nextToken;
636
+ while ((nextToken = this.fetch()).text !== "EOF" && regex.test(str + nextToken.text)) {
637
+ lastToken = nextToken;
638
+ str += lastToken.text;
639
+ this.consume();
640
+ }
641
+ if (str === "") {
642
+ throw new ParseError("Invalid " + modeName + ": '" + firstToken.text + "'", firstToken);
643
+ }
644
+ return firstToken.range(lastToken, str);
645
+ }
646
+
647
+ /**
648
+ * Parses a size specification, consisting of magnitude and unit.
649
+ */
650
+ parseSizeGroup(optional) {
651
+ let res;
652
+ let isBlank = false;
653
+ // don't expand before parseStringGroup
654
+ this.gullet.consumeSpaces();
655
+ if (!optional && this.gullet.future().text !== "{") {
656
+ res = this.parseRegexGroup(/^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/, "size");
657
+ } else {
658
+ res = this.parseStringGroup("size", optional);
659
+ }
660
+ if (!res) {
661
+ return null;
662
+ }
663
+ if (!optional && res.text.length === 0) {
664
+ // Because we've tested for what is !optional, this block won't
665
+ // affect \kern, \hspace, etc. It will capture the mandatory arguments
666
+ // to \genfrac and \above.
667
+ res.text = "0pt"; // Enable \above{}
668
+ isBlank = true; // This is here specifically for \genfrac
669
+ }
670
+ const match = /([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})/.exec(res.text);
671
+ if (!match) {
672
+ throw new ParseError("Invalid size: '" + res.text + "'", res);
673
+ }
674
+ const data = {
675
+ number: +(match[1] + match[2]), // sign + magnitude, cast to number
676
+ unit: match[3]
677
+ };
678
+ if (!validUnit(data)) {
679
+ throw new ParseError("Invalid unit: '" + data.unit + "'", res);
680
+ }
681
+ return {
682
+ type: "size",
683
+ mode: this.mode,
684
+ value: data,
685
+ isBlank
686
+ };
687
+ }
688
+
689
+ /**
690
+ * Parses an URL, checking escaped letters and allowed protocols,
691
+ * and setting the catcode of % as an active character (as in \hyperref).
692
+ */
693
+ parseUrlGroup(optional) {
694
+ this.gullet.lexer.setCatcode("%", 13); // active character
695
+ this.gullet.lexer.setCatcode("~", 12); // other character
696
+ const res = this.parseStringGroup("url", optional);
697
+ this.gullet.lexer.setCatcode("%", 14); // comment character
698
+ this.gullet.lexer.setCatcode("~", 13); // active character
699
+ if (res == null) {
700
+ return null;
701
+ }
702
+ // hyperref package allows backslashes alone in href, but doesn't
703
+ // generate valid links in such cases; we interpret this as
704
+ // "undefined" behaviour, and keep them as-is. Some browser will
705
+ // replace backslashes with forward slashes.
706
+ let url = res.text.replace(/\\([#$%&~_^{}])/g, "$1");
707
+ url = res.text.replace(/{\u2044}/g, "/");
708
+ return {
709
+ type: "url",
710
+ mode: this.mode,
711
+ url
712
+ };
713
+ }
714
+
715
+ /**
716
+ * Parses an argument with the mode specified.
717
+ */
718
+ parseArgumentGroup(optional, mode) {
719
+ const argToken = this.gullet.scanArgument(optional);
720
+ if (argToken == null) {
721
+ return null;
722
+ }
723
+ const outerMode = this.mode;
724
+ if (mode) {
725
+ // Switch to specified mode
726
+ this.switchMode(mode);
727
+ }
728
+
729
+ this.gullet.beginGroup();
730
+ const expression = this.parseExpression(false, "EOF");
731
+ // TODO: find an alternative way to denote the end
732
+ this.expect("EOF"); // expect the end of the argument
733
+ this.gullet.endGroup();
734
+ const result = {
735
+ type: "ordgroup",
736
+ mode: this.mode,
737
+ loc: argToken.loc,
738
+ body: expression
739
+ };
740
+
741
+ if (mode) {
742
+ // Switch mode back
743
+ this.switchMode(outerMode);
744
+ }
745
+ return result;
746
+ }
747
+
748
+ /**
749
+ * Parses an ordinary group, which is either a single nucleus (like "x")
750
+ * or an expression in braces (like "{x+y}") or an implicit group, a group
751
+ * that starts at the current position, and ends right before a higher explicit
752
+ * group ends, or at EOF.
753
+ */
754
+ parseGroup(
755
+ name, // For error reporting.
756
+ breakOnTokenText
757
+ ) {
758
+ const firstToken = this.fetch();
759
+ const text = firstToken.text;
760
+
761
+ let result;
762
+ // Try to parse an open brace or \begingroup
763
+ if (text === "{" || text === "\\begingroup" || text === "\\toggle") {
764
+ this.consume();
765
+ const groupEnd = text === "{"
766
+ ? "}"
767
+ : text === "\\begingroup"
768
+ ? "\\endgroup"
769
+ : "\\endtoggle"
770
+
771
+ this.gullet.beginGroup();
772
+ // If we get a brace, parse an expression
773
+ const expression = this.parseExpression(false, groupEnd);
774
+ const lastToken = this.fetch();
775
+ this.expect(groupEnd); // Check that we got a matching closing brace
776
+ this.gullet.endGroup();
777
+ result = {
778
+ type: (lastToken.text === "\\endtoggle" ? "toggle" : "ordgroup"),
779
+ mode: this.mode,
780
+ loc: SourceLocation.range(firstToken, lastToken),
781
+ body: expression,
782
+ // A group formed by \begingroup...\endgroup is a semi-simple group
783
+ // which doesn't affect spacing in math mode, i.e., is transparent.
784
+ // https://tex.stackexchange.com/questions/1930/when-should-one-
785
+ // use-begingroup-instead-of-bgroup
786
+ semisimple: text === "\\begingroup" || undefined
787
+ };
788
+ } else {
789
+ // If there exists a function with this name, parse the function.
790
+ // Otherwise, just return a nucleus
791
+ result = this.parseFunction(breakOnTokenText, name) || this.parseSymbol();
792
+ if (result == null && text[0] === "\\" &&
793
+ !Object.prototype.hasOwnProperty.call(implicitCommands, text )) {
794
+ result = this.formatUnsupportedCmd(text);
795
+ this.consume();
796
+ }
797
+ }
798
+ return result;
799
+ }
800
+
801
+ /**
802
+ * Form ligature-like combinations of characters for text mode.
803
+ * This includes inputs like "--", "---", "``" and "''".
804
+ * The result will simply replace multiple textord nodes with a single
805
+ * character in each value by a single textord node having multiple
806
+ * characters in its value. The representation is still ASCII source.
807
+ * The group will be modified in place.
808
+ */
809
+ formLigatures(group) {
810
+ let n = group.length - 1;
811
+ for (let i = 0; i < n; ++i) {
812
+ const a = group[i];
813
+ const v = a.text;
814
+ if (v === "-" && group[i + 1].text === "-") {
815
+ if (i + 1 < n && group[i + 2].text === "-") {
816
+ group.splice(i, 3, {
817
+ type: "textord",
818
+ mode: "text",
819
+ loc: SourceLocation.range(a, group[i + 2]),
820
+ text: "---"
821
+ });
822
+ n -= 2;
823
+ } else {
824
+ group.splice(i, 2, {
825
+ type: "textord",
826
+ mode: "text",
827
+ loc: SourceLocation.range(a, group[i + 1]),
828
+ text: "--"
829
+ });
830
+ n -= 1;
831
+ }
832
+ }
833
+ if ((v === "'" || v === "`") && group[i + 1].text === v) {
834
+ group.splice(i, 2, {
835
+ type: "textord",
836
+ mode: "text",
837
+ loc: SourceLocation.range(a, group[i + 1]),
838
+ text: v + v
839
+ });
840
+ n -= 1;
841
+ }
842
+ }
843
+ }
844
+
845
+ /**
846
+ * Parse a single symbol out of the string. Here, we handle single character
847
+ * symbols and special functions like \verb.
848
+ */
849
+ parseSymbol() {
850
+ const nucleus = this.fetch();
851
+ let text = nucleus.text;
852
+
853
+ if (/^\\verb[^a-zA-Z]/.test(text)) {
854
+ this.consume();
855
+ let arg = text.slice(5);
856
+ const star = arg.charAt(0) === "*";
857
+ if (star) {
858
+ arg = arg.slice(1);
859
+ }
860
+ // Lexer's tokenRegex is constructed to always have matching
861
+ // first/last characters.
862
+ if (arg.length < 2 || arg.charAt(0) !== arg.slice(-1)) {
863
+ throw new ParseError(`\\verb assertion failed --
864
+ please report what input caused this bug`);
865
+ }
866
+ arg = arg.slice(1, -1); // remove first and last char
867
+ return {
868
+ type: "verb",
869
+ mode: "text",
870
+ body: arg,
871
+ star
872
+ };
873
+ }
874
+ // At this point, we should have a symbol, possibly with accents.
875
+ // First expand any accented base symbol according to unicodeSymbols.
876
+ if (Object.prototype.hasOwnProperty.call(unicodeSymbols, text[0]) &&
877
+ !symbols[this.mode][text[0]]) {
878
+ // This behavior is not strict (XeTeX-compatible) in math mode.
879
+ if (this.settings.strict && this.mode === "math") {
880
+ throw new ParseError(`Accented Unicode text character "${text[0]}" used in ` + `math mode`,
881
+ nucleus
882
+ );
883
+ }
884
+ text = unicodeSymbols[text[0]] + text.slice(1);
885
+ }
886
+ // Strip off any combining characters
887
+ const match = combiningDiacriticalMarksEndRegex.exec(text);
888
+ if (match) {
889
+ text = text.substring(0, match.index);
890
+ if (text === "i") {
891
+ text = "\u0131"; // dotless i, in math and text mode
892
+ } else if (text === "j") {
893
+ text = "\u0237"; // dotless j, in math and text mode
894
+ }
895
+ }
896
+ // Recognize base symbol
897
+ let symbol;
898
+ if (symbols[this.mode][text]) {
899
+ const group = symbols[this.mode][text].group;
900
+ const loc = SourceLocation.range(nucleus);
901
+ let s;
902
+ if (Object.prototype.hasOwnProperty.call(ATOMS, group )) {
903
+ const family = group;
904
+ s = {
905
+ type: "atom",
906
+ mode: this.mode,
907
+ family,
908
+ loc,
909
+ text
910
+ };
911
+ } else {
912
+ s = {
913
+ type: group,
914
+ mode: this.mode,
915
+ loc,
916
+ text
917
+ };
918
+ }
919
+ symbol = s;
920
+ } else if (!this.strict && numberRegEx.test(text)) {
921
+ // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
922
+ this.consume()
923
+ return {
924
+ type: "textord",
925
+ mode: this.mode,
926
+ loc: SourceLocation.range(nucleus),
927
+ text
928
+ }
929
+ } else if (text.charCodeAt(0) >= 0x80) {
930
+ // no symbol for e.g. ^
931
+ if (this.settings.strict) {
932
+ if (!supportedCodepoint(text.charCodeAt(0))) {
933
+ throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
934
+ ` (${text.charCodeAt(0)})`, nucleus);
935
+ } else if (this.mode === "math") {
936
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
937
+ }
938
+ }
939
+ // All nonmathematical Unicode characters are rendered as if they
940
+ // are in text mode (wrapped in \text) because that's what it
941
+ // takes to render them in LaTeX.
942
+ symbol = {
943
+ type: "textord",
944
+ mode: "text",
945
+ loc: SourceLocation.range(nucleus),
946
+ text
947
+ };
948
+ } else {
949
+ return null; // EOF, ^, _, {, }, etc.
950
+ }
951
+ this.consume();
952
+ // Transform combining characters into accents
953
+ if (match) {
954
+ for (let i = 0; i < match[0].length; i++) {
955
+ const accent = match[0][i];
956
+ if (!unicodeAccents[accent]) {
957
+ throw new ParseError(`Unknown accent ' ${accent}'`, nucleus);
958
+ }
959
+ const command = unicodeAccents[accent][this.mode] ||
960
+ unicodeAccents[accent].text;
961
+ if (!command) {
962
+ throw new ParseError(`Accent ${accent} unsupported in ${this.mode} mode`, nucleus);
963
+ }
964
+ symbol = {
965
+ type: "accent",
966
+ mode: this.mode,
967
+ loc: SourceLocation.range(nucleus),
968
+ label: command,
969
+ isStretchy: false,
970
+ isShifty: true,
971
+ base: symbol
972
+ };
973
+ }
974
+ }
975
+ return symbol;
976
+ }
977
+ }