katex 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +141 -0
  2. package/LICENSE +1 -1
  3. package/README.md +6 -6
  4. package/cli.js +0 -0
  5. package/contrib/auto-render/auto-render.js +12 -3
  6. package/contrib/copy-tex/README.md +3 -5
  7. package/contrib/mathtex-script-type/README.md +12 -14
  8. package/contrib/mhchem/README.md +3 -1
  9. package/contrib/render-a11y-string/render-a11y-string.js +712 -0
  10. package/contrib/render-a11y-string/test/render-a11y-string-spec.js +526 -0
  11. package/dist/README.md +6 -6
  12. package/dist/contrib/auto-render.js +14 -3
  13. package/dist/contrib/auto-render.min.js +1 -1
  14. package/dist/contrib/auto-render.mjs +14 -3
  15. package/dist/contrib/mhchem.min.js +1 -1
  16. package/dist/contrib/render-a11y-string.js +870 -0
  17. package/dist/contrib/render-a11y-string.min.js +1 -0
  18. package/dist/contrib/render-a11y-string.mjs +753 -0
  19. package/dist/fonts/KaTeX_AMS-Regular.ttf +0 -0
  20. package/dist/fonts/KaTeX_AMS-Regular.woff +0 -0
  21. package/dist/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  22. package/dist/fonts/KaTeX_Caligraphic-Bold.ttf +0 -0
  23. package/dist/fonts/KaTeX_Caligraphic-Bold.woff +0 -0
  24. package/dist/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  25. package/dist/fonts/KaTeX_Caligraphic-Regular.ttf +0 -0
  26. package/dist/fonts/KaTeX_Caligraphic-Regular.woff +0 -0
  27. package/dist/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  28. package/dist/fonts/KaTeX_Fraktur-Bold.ttf +0 -0
  29. package/dist/fonts/KaTeX_Fraktur-Bold.woff +0 -0
  30. package/dist/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  31. package/dist/fonts/KaTeX_Fraktur-Regular.ttf +0 -0
  32. package/dist/fonts/KaTeX_Fraktur-Regular.woff +0 -0
  33. package/dist/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  34. package/dist/fonts/KaTeX_Main-Bold.ttf +0 -0
  35. package/dist/fonts/KaTeX_Main-Bold.woff +0 -0
  36. package/dist/fonts/KaTeX_Main-Bold.woff2 +0 -0
  37. package/dist/fonts/KaTeX_Main-BoldItalic.ttf +0 -0
  38. package/dist/fonts/KaTeX_Main-BoldItalic.woff +0 -0
  39. package/dist/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  40. package/dist/fonts/KaTeX_Main-Italic.ttf +0 -0
  41. package/dist/fonts/KaTeX_Main-Italic.woff +0 -0
  42. package/dist/fonts/KaTeX_Main-Italic.woff2 +0 -0
  43. package/dist/fonts/KaTeX_Main-Regular.ttf +0 -0
  44. package/dist/fonts/KaTeX_Main-Regular.woff +0 -0
  45. package/dist/fonts/KaTeX_Main-Regular.woff2 +0 -0
  46. package/dist/fonts/KaTeX_Math-BoldItalic.ttf +0 -0
  47. package/dist/fonts/KaTeX_Math-BoldItalic.woff +0 -0
  48. package/dist/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  49. package/dist/fonts/KaTeX_Math-Italic.ttf +0 -0
  50. package/dist/fonts/KaTeX_Math-Italic.woff +0 -0
  51. package/dist/fonts/KaTeX_Math-Italic.woff2 +0 -0
  52. package/dist/fonts/KaTeX_SansSerif-Bold.ttf +0 -0
  53. package/dist/fonts/KaTeX_SansSerif-Bold.woff +0 -0
  54. package/dist/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  55. package/dist/fonts/KaTeX_SansSerif-Italic.ttf +0 -0
  56. package/dist/fonts/KaTeX_SansSerif-Italic.woff +0 -0
  57. package/dist/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  58. package/dist/fonts/KaTeX_SansSerif-Regular.ttf +0 -0
  59. package/dist/fonts/KaTeX_SansSerif-Regular.woff +0 -0
  60. package/dist/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  61. package/dist/fonts/KaTeX_Script-Regular.ttf +0 -0
  62. package/dist/fonts/KaTeX_Script-Regular.woff +0 -0
  63. package/dist/fonts/KaTeX_Script-Regular.woff2 +0 -0
  64. package/dist/fonts/KaTeX_Size1-Regular.ttf +0 -0
  65. package/dist/fonts/KaTeX_Size1-Regular.woff +0 -0
  66. package/dist/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  67. package/dist/fonts/KaTeX_Size2-Regular.ttf +0 -0
  68. package/dist/fonts/KaTeX_Size2-Regular.woff +0 -0
  69. package/dist/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  70. package/dist/fonts/KaTeX_Size3-Regular.ttf +0 -0
  71. package/dist/fonts/KaTeX_Size3-Regular.woff +0 -0
  72. package/dist/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  73. package/dist/fonts/KaTeX_Size4-Regular.ttf +0 -0
  74. package/dist/fonts/KaTeX_Size4-Regular.woff +0 -0
  75. package/dist/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  76. package/dist/fonts/KaTeX_Typewriter-Regular.ttf +0 -0
  77. package/dist/fonts/KaTeX_Typewriter-Regular.woff +0 -0
  78. package/dist/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  79. package/dist/katex.css +34 -10
  80. package/dist/katex.js +2906 -2115
  81. package/dist/katex.min.css +1 -1
  82. package/dist/katex.min.js +1 -1
  83. package/dist/katex.mjs +2809 -2020
  84. package/package.json +12 -11
  85. package/src/Lexer.js +1 -0
  86. package/src/MacroExpander.js +39 -10
  87. package/src/Options.js +15 -75
  88. package/src/Parser.js +152 -115
  89. package/src/Settings.js +70 -7
  90. package/src/Token.js +2 -0
  91. package/src/buildCommon.js +24 -90
  92. package/src/buildHTML.js +31 -31
  93. package/src/buildMathML.js +52 -9
  94. package/src/buildTree.js +13 -6
  95. package/src/defineFunction.js +7 -22
  96. package/src/delimiter.js +66 -27
  97. package/src/domTree.js +71 -4
  98. package/src/environments/array.js +235 -25
  99. package/src/fontMetrics.js +11 -2
  100. package/src/functions/accent.js +9 -9
  101. package/src/functions/accentunder.js +2 -2
  102. package/src/functions/arrow.js +15 -5
  103. package/src/functions/color.js +9 -38
  104. package/src/functions/def.js +184 -0
  105. package/src/functions/delimsizing.js +32 -8
  106. package/src/functions/enclose.js +33 -6
  107. package/src/functions/font.js +4 -1
  108. package/src/functions/genfrac.js +39 -27
  109. package/src/functions/horizBrace.js +6 -7
  110. package/src/functions/href.js +16 -0
  111. package/src/functions/html.js +102 -0
  112. package/src/functions/includegraphics.js +153 -0
  113. package/src/functions/lap.js +4 -7
  114. package/src/functions/math.js +1 -5
  115. package/src/functions/mclass.js +41 -2
  116. package/src/functions/op.js +27 -111
  117. package/src/functions/operatorname.js +136 -92
  118. package/src/functions/ordgroup.js +1 -1
  119. package/src/functions/overline.js +3 -2
  120. package/src/functions/phantom.js +5 -2
  121. package/src/functions/raisebox.js +4 -16
  122. package/src/functions/rule.js +20 -9
  123. package/src/functions/styling.js +0 -9
  124. package/src/functions/supsub.js +27 -7
  125. package/src/functions/symbolsOp.js +4 -0
  126. package/src/functions/tag.js +20 -4
  127. package/src/functions/text.js +4 -3
  128. package/src/functions/underline.js +3 -2
  129. package/src/functions/utils/assembleSupSub.js +110 -0
  130. package/src/functions.js +3 -0
  131. package/src/katex.less +45 -9
  132. package/src/macros.js +259 -98
  133. package/src/mathMLTree.js +6 -4
  134. package/src/parseNode.js +37 -57
  135. package/src/stretchy.js +3 -1
  136. package/src/svgGeometry.js +136 -44
  137. package/src/symbols.js +52 -69
  138. package/src/tree.js +2 -2
  139. package/src/types.js +2 -1
  140. package/src/unicodeAccents.js +3 -1
  141. package/src/unicodeSymbols.js +30 -321
  142. package/src/utils.js +10 -0
  143. package/src/wide-character.js +2 -2
  144. package/src/unicodeMake.js +0 -70
package/src/Parser.js CHANGED
@@ -5,16 +5,18 @@ import MacroExpander, {implicitCommands} from "./MacroExpander";
5
5
  import symbols, {ATOMS, extraLatin} from "./symbols";
6
6
  import {validUnit} from "./units";
7
7
  import {supportedCodepoint} from "./unicodeScripts";
8
- import unicodeAccents from "./unicodeAccents";
9
- import unicodeSymbols from "./unicodeSymbols";
10
- import utils from "./utils";
11
- import {checkNodeType} from "./parseNode";
12
8
  import ParseError from "./ParseError";
13
9
  import {combiningDiacriticalMarksEndRegex} from "./Lexer";
14
10
  import Settings from "./Settings";
15
11
  import SourceLocation from "./SourceLocation";
16
12
  import {Token} from "./Token";
17
- import type {ParseNode, AnyParseNode, SymbolParseNode} from "./parseNode";
13
+
14
+ // Pre-evaluate both modules as unicodeSymbols require String.normalize()
15
+ import unicodeAccents from /*preval*/ "./unicodeAccents";
16
+ import unicodeSymbols from /*preval*/ "./unicodeSymbols";
17
+
18
+ import type {ParseNode, AnyParseNode, SymbolParseNode, UnsupportedCmdParseNode}
19
+ from "./parseNode";
18
20
  import type {Atom, Group} from "./symbols";
19
21
  import type {Mode, ArgType, BreakToken} from "./types";
20
22
  import type {FunctionContext, FunctionSpec} from "./defineFunction";
@@ -55,7 +57,7 @@ export default class Parser {
55
57
  gullet: MacroExpander;
56
58
  settings: Settings;
57
59
  leftrightDepth: number;
58
- nextToken: Token;
60
+ nextToken: ?Token;
59
61
 
60
62
  constructor(input: string, settings: Settings) {
61
63
  // Start in math mode
@@ -74,10 +76,9 @@ export default class Parser {
74
76
  * appropriate error otherwise.
75
77
  */
76
78
  expect(text: string, consume?: boolean = true) {
77
- if (this.nextToken.text !== text) {
79
+ if (this.fetch().text !== text) {
78
80
  throw new ParseError(
79
- "Expected '" + text + "', got '" + this.nextToken.text + "'",
80
- this.nextToken
81
+ `Expected '${text}', got '${this.fetch().text}'`, this.fetch()
81
82
  );
82
83
  }
83
84
  if (consume) {
@@ -86,11 +87,22 @@ export default class Parser {
86
87
  }
87
88
 
88
89
  /**
89
- * Considers the current look ahead token as consumed,
90
- * and fetches the one after that as the new look ahead.
90
+ * Discards the current lookahead token, considering it consumed.
91
91
  */
92
92
  consume() {
93
- this.nextToken = this.gullet.expandNextToken();
93
+ this.nextToken = null;
94
+ }
95
+
96
+ /**
97
+ * Return the current lookahead token, or if there isn't one (at the
98
+ * beginning, or if the previous lookahead token was consume()d),
99
+ * fetch the next token as the new lookahead token and return it.
100
+ */
101
+ fetch(): Token {
102
+ if (this.nextToken == null) {
103
+ this.nextToken = this.gullet.expandNextToken();
104
+ }
105
+ return this.nextToken;
94
106
  }
95
107
 
96
108
  /**
@@ -105,9 +117,11 @@ export default class Parser {
105
117
  * Main parsing function, which parses an entire input.
106
118
  */
107
119
  parse(): AnyParseNode[] {
108
- // Create a group namespace for the math expression.
109
- // (LaTeX creates a new group for every $...$, $$...$$, \[...\].)
110
- this.gullet.beginGroup();
120
+ if (!this.settings.globalGroup) {
121
+ // Create a group namespace for the math expression.
122
+ // (LaTeX creates a new group for every $...$, $$...$$, \[...\].)
123
+ this.gullet.beginGroup();
124
+ }
111
125
 
112
126
  // Use old \color behavior (same as LaTeX's \textcolor) if requested.
113
127
  // We do this within the group for the math expression, so it doesn't
@@ -117,14 +131,15 @@ export default class Parser {
117
131
  }
118
132
 
119
133
  // Try to parse the input
120
- this.consume();
121
134
  const parse = this.parseExpression(false);
122
135
 
123
136
  // If we succeeded, make sure there's an EOF at the end
124
- this.expect("EOF", false);
137
+ this.expect("EOF");
125
138
 
126
139
  // End the group namespace for the expression
127
- this.gullet.endGroup();
140
+ if (!this.settings.globalGroup) {
141
+ this.gullet.endGroup();
142
+ }
128
143
  return parse;
129
144
  }
130
145
 
@@ -159,7 +174,7 @@ export default class Parser {
159
174
  if (this.mode === "math") {
160
175
  this.consumeSpaces();
161
176
  }
162
- const lex = this.nextToken;
177
+ const lex = this.fetch();
163
178
  if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
164
179
  break;
165
180
  }
@@ -172,6 +187,8 @@ export default class Parser {
172
187
  const atom = this.parseAtom(breakOnTokenText);
173
188
  if (!atom) {
174
189
  break;
190
+ } else if (atom.type === "internal") {
191
+ continue;
175
192
  }
176
193
  body.push(atom);
177
194
  }
@@ -193,15 +210,14 @@ export default class Parser {
193
210
  let funcName;
194
211
 
195
212
  for (let i = 0; i < body.length; i++) {
196
- const node = checkNodeType(body[i], "infix");
197
- if (node) {
213
+ if (body[i].type === "infix") {
198
214
  if (overIndex !== -1) {
199
215
  throw new ParseError(
200
216
  "only one infix operator per group",
201
- node.token);
217
+ body[i].token);
202
218
  }
203
219
  overIndex = i;
204
- funcName = node.replaceWith;
220
+ funcName = body[i].replaceWith;
205
221
  }
206
222
  }
207
223
 
@@ -246,11 +262,12 @@ export default class Parser {
246
262
  handleSupSubscript(
247
263
  name: string, // For error reporting.
248
264
  ): AnyParseNode {
249
- const symbolToken = this.nextToken;
265
+ const symbolToken = this.fetch();
250
266
  const symbol = symbolToken.text;
251
267
  this.consume();
252
- this.consumeSpaces(); // ignore spaces before sup/subscript argument
253
- const group = this.parseGroup(name, false, Parser.SUPSUB_GREEDINESS);
268
+ const group = this.parseGroup(name, false, Parser.SUPSUB_GREEDINESS,
269
+ undefined, undefined, true);
270
+ // ignore spaces before sup/subscript argument
254
271
 
255
272
  if (!group) {
256
273
  throw new ParseError(
@@ -266,8 +283,7 @@ export default class Parser {
266
283
  * Converts the textual input of an unsupported command into a text node
267
284
  * contained within a color node whose color is determined by errorColor
268
285
  */
269
- handleUnsupportedCmd(): AnyParseNode {
270
- const text = this.nextToken.text;
286
+ formatUnsupportedCmd(text: string): UnsupportedCmdParseNode {
271
287
  const textordArray = [];
272
288
 
273
289
  for (let i = 0; i < text.length; i++) {
@@ -287,7 +303,6 @@ export default class Parser {
287
303
  body: [textNode],
288
304
  };
289
305
 
290
- this.consume();
291
306
  return colorNode;
292
307
  }
293
308
 
@@ -313,15 +328,18 @@ export default class Parser {
313
328
  this.consumeSpaces();
314
329
 
315
330
  // Lex the first token
316
- const lex = this.nextToken;
331
+ const lex = this.fetch();
317
332
 
318
333
  if (lex.text === "\\limits" || lex.text === "\\nolimits") {
319
334
  // We got a limit control
320
- const opNode = checkNodeType(base, "op");
321
- if (opNode) {
335
+ if (base && base.type === "op") {
322
336
  const limits = lex.text === "\\limits";
323
- opNode.limits = limits;
324
- opNode.alwaysHandleSupSub = true;
337
+ base.limits = limits;
338
+ base.alwaysHandleSupSub = true;
339
+ } else if (base && base.type === "operatorname"
340
+ && base.alwaysHandleSupSub) {
341
+ const limits = lex.text === "\\limits";
342
+ base.limits = limits;
325
343
  } else {
326
344
  throw new ParseError(
327
345
  "Limit controls must follow a math operator",
@@ -351,14 +369,14 @@ export default class Parser {
351
369
  const primes = [prime];
352
370
  this.consume();
353
371
  // Keep lexing tokens until we get something that's not a prime
354
- while (this.nextToken.text === "'") {
372
+ while (this.fetch().text === "'") {
355
373
  // For each one, add another prime to the list
356
374
  primes.push(prime);
357
375
  this.consume();
358
376
  }
359
377
  // If there's a superscript following the primes, combine that
360
378
  // superscript in with the primes.
361
- if (this.nextToken.text === "^") {
379
+ if (this.fetch().text === "^") {
362
380
  primes.push(this.handleSupSubscript("superscript"));
363
381
  }
364
382
  // Put everything into an ordgroup as the superscript
@@ -394,12 +412,14 @@ export default class Parser {
394
412
  name?: string, // For error reporting.
395
413
  greediness?: ?number,
396
414
  ): ?AnyParseNode {
397
- const token = this.nextToken;
415
+ const token = this.fetch();
398
416
  const func = token.text;
399
417
  const funcData = functions[func];
400
418
  if (!funcData) {
401
419
  return null;
402
420
  }
421
+ this.consume(); // consume command token
422
+
403
423
  if (greediness != null && funcData.greediness <= greediness) {
404
424
  throw new ParseError(
405
425
  "Got function '" + func + "' with no arguments" +
@@ -412,22 +432,6 @@ export default class Parser {
412
432
  "Can't use function '" + func + "' in math mode", token);
413
433
  }
414
434
 
415
- // hyperref package sets the catcode of % as an active character
416
- if (funcData.argTypes && funcData.argTypes[0] === "url") {
417
- this.gullet.lexer.setCatcode("%", 13);
418
- }
419
-
420
- // Consume the command token after possibly switching to the
421
- // mode specified by the function (for instant mode switching),
422
- // and then immediately switch back.
423
- if (funcData.consumeMode) {
424
- const oldMode = this.mode;
425
- this.switchMode(funcData.consumeMode);
426
- this.consume();
427
- this.switchMode(oldMode);
428
- } else {
429
- this.consume();
430
- }
431
435
  const {args, optArgs} = this.parseArguments(func, funcData);
432
436
  return this.callFunction(func, args, optArgs, token, breakOnTokenText);
433
437
  }
@@ -482,28 +486,23 @@ export default class Parser {
482
486
  // "After you have said ‘\def\row#1#2{...}’, you are allowed to
483
487
  // put spaces between the arguments (e.g., ‘\row x n’), because
484
488
  // TeX doesn’t use single spaces as undelimited arguments."
485
- if (i > 0 && !isOptional) {
486
- this.consumeSpaces();
487
- }
489
+ const consumeSpaces = (i > 0 && !isOptional) ||
488
490
  // Also consume leading spaces in math mode, as parseSymbol
489
491
  // won't know what to do with them. This can only happen with
490
492
  // macros, e.g. \frac\foo\foo where \foo expands to a space symbol.
491
- // In LaTeX, the \foo's get treated as (blank) arguments).
493
+ // In LaTeX, the \foo's get treated as (blank) arguments.
492
494
  // In KaTeX, for now, both spaces will get consumed.
493
495
  // TODO(edemaine)
494
- if (i === 0 && !isOptional && this.mode === "math") {
495
- this.consumeSpaces();
496
- }
497
- const nextToken = this.nextToken;
498
- const arg = this.parseGroupOfType("argument to '" + func + "'",
499
- argType, isOptional, baseGreediness);
496
+ (i === 0 && !isOptional && this.mode === "math");
497
+ const arg = this.parseGroupOfType(`argument to '${func}'`,
498
+ argType, isOptional, baseGreediness, consumeSpaces);
500
499
  if (!arg) {
501
500
  if (isOptional) {
502
501
  optArgs.push(null);
503
502
  continue;
504
503
  }
505
504
  throw new ParseError(
506
- "Expected group after '" + func + "'", nextToken);
505
+ `Expected group after '${func}'`, this.fetch());
507
506
  }
508
507
  (isOptional ? optArgs : args).push(arg);
509
508
  }
@@ -519,19 +518,46 @@ export default class Parser {
519
518
  type: ?ArgType,
520
519
  optional: boolean,
521
520
  greediness: ?number,
521
+ consumeSpaces: boolean,
522
522
  ): ?AnyParseNode {
523
523
  switch (type) {
524
524
  case "color":
525
+ if (consumeSpaces) {
526
+ this.consumeSpaces();
527
+ }
525
528
  return this.parseColorGroup(optional);
526
529
  case "size":
530
+ if (consumeSpaces) {
531
+ this.consumeSpaces();
532
+ }
527
533
  return this.parseSizeGroup(optional);
528
534
  case "url":
529
- return this.parseUrlGroup(optional);
535
+ return this.parseUrlGroup(optional, consumeSpaces);
530
536
  case "math":
531
537
  case "text":
532
- return this.parseGroup(name, optional, greediness, undefined, type);
538
+ return this.parseGroup(
539
+ name, optional, greediness, undefined, type, consumeSpaces);
540
+ case "hbox": {
541
+ // hbox argument type wraps the argument in the equivalent of
542
+ // \hbox, which is like \text but switching to \textstyle size.
543
+ const group = this.parseGroup(name, optional, greediness,
544
+ undefined, "text", consumeSpaces);
545
+ if (!group) {
546
+ return group;
547
+ }
548
+ const styledGroup = {
549
+ type: "styling",
550
+ mode: group.mode,
551
+ body: [group],
552
+ style: "text", // simulate \textstyle
553
+ };
554
+ return styledGroup;
555
+ }
533
556
  case "raw": {
534
- if (optional && this.nextToken.text === "{") {
557
+ if (consumeSpaces) {
558
+ this.consumeSpaces();
559
+ }
560
+ if (optional && this.fetch().text === "{") {
535
561
  return null;
536
562
  }
537
563
  const token = this.parseStringGroup("raw", optional, true);
@@ -542,21 +568,25 @@ export default class Parser {
542
568
  string: token.text,
543
569
  };
544
570
  } else {
545
- throw new ParseError("Expected raw group", this.nextToken);
571
+ throw new ParseError("Expected raw group", this.fetch());
546
572
  }
547
573
  }
548
574
  case "original":
549
575
  case null:
550
576
  case undefined:
551
- return this.parseGroup(name, optional, greediness);
577
+ return this.parseGroup(name, optional, greediness,
578
+ undefined, undefined, consumeSpaces);
552
579
  default:
553
580
  throw new ParseError(
554
- "Unknown group type as " + name, this.nextToken);
581
+ "Unknown group type as " + name, this.fetch());
555
582
  }
556
583
  }
557
584
 
585
+ /**
586
+ * Discard any space tokens, fetching the next non-space token.
587
+ */
558
588
  consumeSpaces() {
559
- while (this.nextToken.text === " ") {
589
+ while (this.fetch().text === " ") {
560
590
  this.consume();
561
591
  }
562
592
  }
@@ -572,27 +602,27 @@ export default class Parser {
572
602
  ): ?Token {
573
603
  const groupBegin = optional ? "[" : "{";
574
604
  const groupEnd = optional ? "]" : "}";
575
- const nextToken = this.nextToken;
576
- if (nextToken.text !== groupBegin) {
605
+ const beginToken = this.fetch();
606
+ if (beginToken.text !== groupBegin) {
577
607
  if (optional) {
578
608
  return null;
579
- } else if (raw && nextToken.text !== "EOF" &&
580
- /[^{}[\]]/.test(nextToken.text)) {
581
- // allow a single character in raw string group
582
- this.gullet.lexer.setCatcode("%", 14); // reset the catcode of %
609
+ } else if (raw && beginToken.text !== "EOF" &&
610
+ /[^{}[\]]/.test(beginToken.text)) {
583
611
  this.consume();
584
- return nextToken;
612
+ return beginToken;
585
613
  }
586
614
  }
587
615
  const outerMode = this.mode;
588
616
  this.mode = "text";
589
617
  this.expect(groupBegin);
590
618
  let str = "";
591
- const firstToken = this.nextToken;
619
+ const firstToken = this.fetch();
592
620
  let nested = 0; // allow nested braces in raw string group
593
621
  let lastToken = firstToken;
594
- while ((raw && nested > 0) || this.nextToken.text !== groupEnd) {
595
- switch (this.nextToken.text) {
622
+ let nextToken;
623
+ while ((nextToken = this.fetch()).text !== groupEnd ||
624
+ (raw && nested > 0)) {
625
+ switch (nextToken.text) {
596
626
  case "EOF":
597
627
  throw new ParseError(
598
628
  "Unexpected end of input in " + modeName,
@@ -604,13 +634,12 @@ export default class Parser {
604
634
  nested--;
605
635
  break;
606
636
  }
607
- lastToken = this.nextToken;
637
+ lastToken = nextToken;
608
638
  str += lastToken.text;
609
639
  this.consume();
610
640
  }
611
- this.mode = outerMode;
612
- this.gullet.lexer.setCatcode("%", 14); // reset the catcode of %
613
641
  this.expect(groupEnd);
642
+ this.mode = outerMode;
614
643
  return firstToken.range(lastToken, str);
615
644
  }
616
645
 
@@ -625,12 +654,13 @@ export default class Parser {
625
654
  ): Token {
626
655
  const outerMode = this.mode;
627
656
  this.mode = "text";
628
- const firstToken = this.nextToken;
657
+ const firstToken = this.fetch();
629
658
  let lastToken = firstToken;
630
659
  let str = "";
631
- while (this.nextToken.text !== "EOF" &&
632
- regex.test(str + this.nextToken.text)) {
633
- lastToken = this.nextToken;
660
+ let nextToken;
661
+ while ((nextToken = this.fetch()).text !== "EOF" &&
662
+ regex.test(str + nextToken.text)) {
663
+ lastToken = nextToken;
634
664
  str += lastToken.text;
635
665
  this.consume();
636
666
  }
@@ -675,7 +705,7 @@ export default class Parser {
675
705
  parseSizeGroup(optional: boolean): ?ParseNode<"size"> {
676
706
  let res;
677
707
  let isBlank = false;
678
- if (!optional && this.nextToken.text !== "{") {
708
+ if (!optional && this.fetch().text !== "{") {
679
709
  res = this.parseRegexGroup(
680
710
  /^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$/, "size");
681
711
  } else {
@@ -711,10 +741,13 @@ export default class Parser {
711
741
  }
712
742
 
713
743
  /**
714
- * Parses an URL, checking escaped letters and allowed protocols.
744
+ * Parses an URL, checking escaped letters and allowed protocols,
745
+ * and setting the catcode of % as an active character (as in \hyperref).
715
746
  */
716
- parseUrlGroup(optional: boolean): ?ParseNode<"url"> {
747
+ parseUrlGroup(optional: boolean, consumeSpaces: boolean): ?ParseNode<"url"> {
748
+ this.gullet.lexer.setCatcode("%", 13); // active character
717
749
  const res = this.parseStringGroup("url", optional, true); // get raw string
750
+ this.gullet.lexer.setCatcode("%", 14); // comment character
718
751
  if (!res) {
719
752
  return null;
720
753
  }
@@ -723,14 +756,6 @@ export default class Parser {
723
756
  // "undefined" behaviour, and keep them as-is. Some browser will
724
757
  // replace backslashes with forward slashes.
725
758
  const url = res.text.replace(/\\([#$%&~_^{}])/g, '$1');
726
- let protocol = /^\s*([^\\/#]*?)(?::|&#0*58|&#x0*3a)/i.exec(url);
727
- protocol = (protocol != null ? protocol[1] : "_relative");
728
- const allowed = this.settings.allowedProtocols;
729
- if (!utils.contains(allowed, "*") &&
730
- !utils.contains(allowed, protocol)) {
731
- throw new ParseError(
732
- `Forbidden protocol '${protocol}'`, res);
733
- }
734
759
  return {
735
760
  type: "url",
736
761
  mode: this.mode,
@@ -756,27 +781,35 @@ export default class Parser {
756
781
  greediness?: ?number,
757
782
  breakOnTokenText?: BreakToken,
758
783
  mode?: Mode,
784
+ consumeSpaces?: boolean,
759
785
  ): ?AnyParseNode {
760
- const outerMode = this.mode;
761
- const firstToken = this.nextToken;
762
- const text = firstToken.text;
763
786
  // Switch to specified mode
787
+ const outerMode = this.mode;
764
788
  if (mode) {
765
789
  this.switchMode(mode);
766
790
  }
791
+ // Consume spaces if requested, crucially *after* we switch modes,
792
+ // so that the next non-space token is parsed in the correct mode.
793
+ if (consumeSpaces) {
794
+ this.consumeSpaces();
795
+ }
796
+ // Get first token
797
+ const firstToken = this.fetch();
798
+ const text = firstToken.text;
767
799
 
768
- let groupEnd;
769
800
  let result;
770
801
  // Try to parse an open brace or \begingroup
771
802
  if (optional ? text === "[" : text === "{" || text === "\\begingroup") {
772
- groupEnd = Parser.endOfGroup[text];
803
+ this.consume();
804
+ const groupEnd = Parser.endOfGroup[text];
773
805
  // Start a new group namespace
774
806
  this.gullet.beginGroup();
775
807
  // If we get a brace, parse an expression
776
- this.consume();
777
808
  const expression = this.parseExpression(false, groupEnd);
778
- const lastToken = this.nextToken;
779
- // End group namespace before consuming symbol after close brace
809
+ const lastToken = this.fetch();
810
+ // Check that we got a matching closing brace
811
+ this.expect(groupEnd);
812
+ // End group namespace
780
813
  this.gullet.endGroup();
781
814
  result = {
782
815
  type: "ordgroup",
@@ -803,7 +836,8 @@ export default class Parser {
803
836
  throw new ParseError(
804
837
  "Undefined control sequence: " + text, firstToken);
805
838
  }
806
- result = this.handleUnsupportedCmd();
839
+ result = this.formatUnsupportedCmd(text);
840
+ this.consume();
807
841
  }
808
842
  }
809
843
 
@@ -811,10 +845,6 @@ export default class Parser {
811
845
  if (mode) {
812
846
  this.switchMode(outerMode);
813
847
  }
814
- // Make sure we got a close brace
815
- if (groupEnd) {
816
- this.expect(groupEnd);
817
- }
818
848
  return result;
819
849
  }
820
850
 
@@ -865,10 +895,10 @@ export default class Parser {
865
895
 
866
896
  /**
867
897
  * Parse a single symbol out of the string. Here, we handle single character
868
- * symbols and special functions like verbatim
898
+ * symbols and special functions like \verb.
869
899
  */
870
900
  parseSymbol(): ?AnyParseNode {
871
- const nucleus = this.nextToken;
901
+ const nucleus = this.fetch();
872
902
  let text = nucleus.text;
873
903
 
874
904
  if (/^\\verb[^a-zA-Z]/.test(text)) {
@@ -958,9 +988,16 @@ export default class Parser {
958
988
  nucleus);
959
989
  }
960
990
  }
991
+ // All nonmathematical Unicode characters are rendered as if they
992
+ // are in text mode (wrapped in \text) because that's what it
993
+ // takes to render them in LaTeX. Setting `mode: this.mode` is
994
+ // another natural choice (the user requested math mode), but
995
+ // this makes it more difficult for getCharacterMetrics() to
996
+ // distinguish Unicode characters without metrics and those for
997
+ // which we want to simulate the letter M.
961
998
  symbol = {
962
999
  type: "textord",
963
- mode: this.mode,
1000
+ mode: "text",
964
1001
  loc: SourceLocation.range(nucleus),
965
1002
  text,
966
1003
  };