katex 0.10.0-rc → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +214 -126
  2. package/README.md +18 -17
  3. package/cli.js +5 -1
  4. package/contrib/auto-render/README.md +1 -1
  5. package/contrib/auto-render/auto-render.js +4 -1
  6. package/contrib/auto-render/test/auto-render-spec.js +17 -0
  7. package/contrib/copy-tex/README.md +8 -2
  8. package/contrib/copy-tex/copy-tex.js +0 -1
  9. package/contrib/copy-tex/copy-tex.webpack.js +6 -0
  10. package/contrib/mathtex-script-type/README.md +10 -6
  11. package/contrib/mhchem/README.md +19 -0
  12. package/contrib/mhchem/mhchem.js +1695 -0
  13. package/contrib/mhchem/mhchem.patch +235 -0
  14. package/dist/README.md +18 -17
  15. package/dist/contrib/auto-render.js +179 -161
  16. package/dist/contrib/auto-render.min.js +1 -1
  17. package/dist/contrib/auto-render.mjs +215 -0
  18. package/dist/contrib/copy-tex.js +84 -62
  19. package/dist/contrib/copy-tex.min.css +1 -1
  20. package/dist/contrib/copy-tex.min.js +1 -1
  21. package/dist/contrib/copy-tex.mjs +85 -0
  22. package/dist/contrib/mathtex-script-type.js +17 -14
  23. package/dist/contrib/mathtex-script-type.mjs +24 -0
  24. package/dist/contrib/mhchem.js +3241 -0
  25. package/dist/contrib/mhchem.min.js +1 -0
  26. package/dist/contrib/mhchem.mjs +3109 -0
  27. package/dist/fonts/KaTeX_AMS-Regular.ttf +0 -0
  28. package/dist/fonts/KaTeX_AMS-Regular.woff +0 -0
  29. package/dist/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  30. package/dist/fonts/KaTeX_Caligraphic-Bold.ttf +0 -0
  31. package/dist/fonts/KaTeX_Caligraphic-Bold.woff +0 -0
  32. package/dist/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  33. package/dist/fonts/KaTeX_Caligraphic-Regular.ttf +0 -0
  34. package/dist/fonts/KaTeX_Caligraphic-Regular.woff +0 -0
  35. package/dist/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  36. package/dist/fonts/KaTeX_Fraktur-Bold.ttf +0 -0
  37. package/dist/fonts/KaTeX_Fraktur-Bold.woff +0 -0
  38. package/dist/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  39. package/dist/fonts/KaTeX_Fraktur-Regular.ttf +0 -0
  40. package/dist/fonts/KaTeX_Fraktur-Regular.woff +0 -0
  41. package/dist/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  42. package/dist/fonts/KaTeX_Main-Bold.ttf +0 -0
  43. package/dist/fonts/KaTeX_Main-Bold.woff +0 -0
  44. package/dist/fonts/KaTeX_Main-Bold.woff2 +0 -0
  45. package/dist/fonts/KaTeX_Main-BoldItalic.ttf +0 -0
  46. package/dist/fonts/KaTeX_Main-BoldItalic.woff +0 -0
  47. package/dist/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  48. package/dist/fonts/KaTeX_Main-Italic.ttf +0 -0
  49. package/dist/fonts/KaTeX_Main-Italic.woff +0 -0
  50. package/dist/fonts/KaTeX_Main-Italic.woff2 +0 -0
  51. package/dist/fonts/KaTeX_Main-Regular.ttf +0 -0
  52. package/dist/fonts/KaTeX_Main-Regular.woff +0 -0
  53. package/dist/fonts/KaTeX_Main-Regular.woff2 +0 -0
  54. package/dist/fonts/KaTeX_Math-BoldItalic.ttf +0 -0
  55. package/dist/fonts/KaTeX_Math-BoldItalic.woff +0 -0
  56. package/dist/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  57. package/dist/fonts/KaTeX_Math-Italic.ttf +0 -0
  58. package/dist/fonts/KaTeX_Math-Italic.woff +0 -0
  59. package/dist/fonts/KaTeX_Math-Italic.woff2 +0 -0
  60. package/dist/fonts/KaTeX_SansSerif-Bold.ttf +0 -0
  61. package/dist/fonts/KaTeX_SansSerif-Bold.woff +0 -0
  62. package/dist/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  63. package/dist/fonts/KaTeX_SansSerif-Italic.ttf +0 -0
  64. package/dist/fonts/KaTeX_SansSerif-Italic.woff +0 -0
  65. package/dist/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  66. package/dist/fonts/KaTeX_SansSerif-Regular.ttf +0 -0
  67. package/dist/fonts/KaTeX_SansSerif-Regular.woff +0 -0
  68. package/dist/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  69. package/dist/fonts/KaTeX_Script-Regular.ttf +0 -0
  70. package/dist/fonts/KaTeX_Script-Regular.woff +0 -0
  71. package/dist/fonts/KaTeX_Script-Regular.woff2 +0 -0
  72. package/dist/fonts/KaTeX_Size1-Regular.ttf +0 -0
  73. package/dist/fonts/KaTeX_Size1-Regular.woff +0 -0
  74. package/dist/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  75. package/dist/fonts/KaTeX_Size2-Regular.ttf +0 -0
  76. package/dist/fonts/KaTeX_Size2-Regular.woff +0 -0
  77. package/dist/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  78. package/dist/fonts/KaTeX_Size3-Regular.ttf +0 -0
  79. package/dist/fonts/KaTeX_Size3-Regular.woff +0 -0
  80. package/dist/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  81. package/dist/fonts/KaTeX_Size4-Regular.ttf +0 -0
  82. package/dist/fonts/KaTeX_Size4-Regular.woff +0 -0
  83. package/dist/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  84. package/dist/fonts/KaTeX_Typewriter-Regular.ttf +0 -0
  85. package/dist/fonts/KaTeX_Typewriter-Regular.woff +0 -0
  86. package/dist/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  87. package/dist/katex.css +24 -9
  88. package/dist/katex.js +13295 -12413
  89. package/dist/katex.min.css +1 -1
  90. package/dist/katex.min.js +1 -1
  91. package/dist/katex.mjs +13388 -11826
  92. package/katex.js +1 -2
  93. package/package.json +60 -48
  94. package/src/Lexer.js +25 -25
  95. package/src/MacroExpander.js +0 -1
  96. package/src/Options.js +11 -75
  97. package/src/Parser.js +231 -313
  98. package/src/Settings.js +6 -0
  99. package/src/buildCommon.js +140 -103
  100. package/src/buildHTML.js +125 -121
  101. package/src/buildMathML.js +14 -4
  102. package/src/buildTree.js +16 -10
  103. package/src/delimiter.js +4 -3
  104. package/src/domTree.js +91 -44
  105. package/src/environments/array.js +120 -7
  106. package/src/fontMetrics.js +3 -2
  107. package/src/functions/arrow.js +21 -7
  108. package/src/functions/color.js +2 -37
  109. package/src/functions/delimsizing.js +18 -11
  110. package/src/functions/enclose.js +19 -4
  111. package/src/functions/environment.js +35 -4
  112. package/src/functions/font.js +1 -2
  113. package/src/functions/genfrac.js +35 -20
  114. package/src/functions/href.js +5 -3
  115. package/src/functions/includegraphics.js +146 -0
  116. package/src/functions/mclass.js +1 -0
  117. package/src/functions/op.js +21 -32
  118. package/src/functions/operatorname.js +1 -2
  119. package/src/functions/ordgroup.js +4 -0
  120. package/src/functions/phantom.js +7 -3
  121. package/src/functions/rule.js +20 -9
  122. package/src/functions/sizing.js +2 -4
  123. package/src/functions/smash.js +5 -2
  124. package/src/functions/sqrt.js +1 -4
  125. package/src/functions/styling.js +0 -1
  126. package/src/functions/supsub.js +6 -2
  127. package/src/functions/symbolsOp.js +4 -0
  128. package/src/functions/symbolsSpacing.js +29 -6
  129. package/src/functions/tag.js +20 -4
  130. package/src/functions/text.js +6 -4
  131. package/src/functions/verb.js +16 -4
  132. package/src/functions.js +2 -0
  133. package/src/katex.less +35 -12
  134. package/src/macros.js +161 -36
  135. package/src/mathMLTree.js +17 -19
  136. package/src/parseNode.js +27 -1
  137. package/src/stretchy.js +3 -1
  138. package/src/svgGeometry.js +1 -1
  139. package/src/symbols.js +39 -17
  140. package/src/tree.js +0 -4
  141. package/src/types.js +4 -3
  142. package/src/unicodeMake.js +1 -1
  143. package/src/utils.js +1 -62
  144. package/src/wide-character.js +2 -2
package/src/Parser.js CHANGED
@@ -1,21 +1,20 @@
1
1
  // @flow
2
2
  /* eslint no-constant-condition:0 */
3
3
  import functions from "./functions";
4
- import environments from "./environments";
5
- import MacroExpander from "./MacroExpander";
4
+ import MacroExpander, {implicitCommands} from "./MacroExpander";
6
5
  import symbols, {ATOMS, extraLatin} from "./symbols";
7
6
  import {validUnit} from "./units";
8
7
  import {supportedCodepoint} from "./unicodeScripts";
9
8
  import unicodeAccents from "./unicodeAccents";
10
9
  import unicodeSymbols from "./unicodeSymbols";
11
10
  import utils from "./utils";
12
- import {assertNodeType, checkNodeType} from "./parseNode";
11
+ import {checkNodeType} from "./parseNode";
13
12
  import ParseError from "./ParseError";
14
- import {combiningDiacriticalMarksEndRegex, urlFunctionRegex} from "./Lexer";
13
+ import {combiningDiacriticalMarksEndRegex} from "./Lexer";
15
14
  import Settings from "./Settings";
16
15
  import SourceLocation from "./SourceLocation";
17
16
  import {Token} from "./Token";
18
- import type {AnyParseNode, SymbolParseNode} from "./parseNode";
17
+ import type {ParseNode, AnyParseNode, SymbolParseNode} from "./parseNode";
19
18
  import type {Atom, Group} from "./symbols";
20
19
  import type {Mode, ArgType, BreakToken} from "./types";
21
20
  import type {FunctionContext, FunctionSpec} from "./defineFunction";
@@ -48,33 +47,9 @@ import type {EnvSpec} from "./defineEnvironment";
48
47
  * There are also extra `.handle...` functions, which pull out some reused
49
48
  * functionality into self-contained functions.
50
49
  *
51
- * The earlier functions return ParseNodes.
52
- * The later functions (which are called deeper in the parse) sometimes return
53
- * ParsedFuncOrArg, which contain a ParseNode as well as some data about
54
- * whether the parsed object is a function which is missing some arguments, or a
55
- * standalone object which can be used as an argument to another function.
50
+ * The functions return ParseNodes.
56
51
  */
57
52
 
58
- type ParsedFunc = {|
59
- type: "fn",
60
- result: string, // Function name defined via defineFunction (e.g. "\\frac").
61
- token: Token,
62
- |};
63
- type ParsedArg = {|
64
- type: "arg",
65
- result: AnyParseNode,
66
- token: Token,
67
- |};
68
- type ParsedFuncOrArg = ParsedFunc | ParsedArg;
69
-
70
- function newArgument(result: AnyParseNode, token: Token): ParsedArg {
71
- return {type: "arg", result, token};
72
- }
73
-
74
- function newFunction(token: Token): ParsedFunc {
75
- return {type: "fn", result: token.text, token};
76
- }
77
-
78
53
  export default class Parser {
79
54
  mode: Mode;
80
55
  gullet: MacroExpander;
@@ -153,7 +128,13 @@ export default class Parser {
153
128
  return parse;
154
129
  }
155
130
 
156
- static endOfExpression = ["}", "\\end", "\\right", "&"];
131
+ static endOfExpression = ["}", "\\endgroup", "\\end", "\\right", "&"];
132
+
133
+ static endOfGroup = {
134
+ "[": "]",
135
+ "{": "}",
136
+ "\\begingroup": "\\endgroup",
137
+ }
157
138
 
158
139
  /**
159
140
  * Parses an "expression", which is a list of atoms.
@@ -190,12 +171,6 @@ export default class Parser {
190
171
  }
191
172
  const atom = this.parseAtom(breakOnTokenText);
192
173
  if (!atom) {
193
- if (!this.settings.throwOnError && lex.text[0] === "\\") {
194
- const errorNode = this.handleUnsupportedCmd();
195
- body.push(errorNode);
196
- continue;
197
- }
198
-
199
174
  break;
200
175
  }
201
176
  body.push(atom);
@@ -275,33 +250,16 @@ export default class Parser {
275
250
  const symbol = symbolToken.text;
276
251
  this.consume();
277
252
  this.consumeSpaces(); // ignore spaces before sup/subscript argument
278
- const group = this.parseGroup();
253
+ const group = this.parseGroup(name, false, Parser.SUPSUB_GREEDINESS);
279
254
 
280
255
  if (!group) {
281
- if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") {
282
- return this.handleUnsupportedCmd();
283
- } else {
284
- throw new ParseError(
285
- "Expected group after '" + symbol + "'",
286
- symbolToken
287
- );
288
- }
256
+ throw new ParseError(
257
+ "Expected group after '" + symbol + "'",
258
+ symbolToken
259
+ );
289
260
  }
290
261
 
291
- if (group.type === "fn") {
292
- // ^ and _ have a greediness, so handle interactions with functions'
293
- // greediness
294
- const funcGreediness = functions[group.result].greediness;
295
- if (funcGreediness > Parser.SUPSUB_GREEDINESS) {
296
- return this.parseGivenFunction(group);
297
- } else {
298
- throw new ParseError(
299
- "Got function '" + group.result + "' with no arguments " +
300
- "as " + name, symbolToken);
301
- }
302
- } else {
303
- return group.result;
304
- }
262
+ return group;
305
263
  }
306
264
 
307
265
  /**
@@ -339,7 +297,7 @@ export default class Parser {
339
297
  parseAtom(breakOnTokenText?: BreakToken): ?AnyParseNode {
340
298
  // The body of an atom is an implicit group, so that things like
341
299
  // \left(x\right)^2 work correctly.
342
- const base = this.parseImplicitGroup(breakOnTokenText);
300
+ const base = this.parseGroup("atom", false, null, breakOnTokenText);
343
301
 
344
302
  // In text mode, we don't have superscripts or subscripts
345
303
  if (this.mode === "text") {
@@ -429,115 +387,49 @@ export default class Parser {
429
387
  }
430
388
 
431
389
  /**
432
- * Parses an implicit group, which is a group that starts at the end of a
433
- * specified, and ends right before a higher explicit group ends, or at EOL. It
434
- * is used for functions that appear to affect the current style, like \Large or
435
- * \textrm, where instead of keeping a style we just pretend that there is an
436
- * implicit grouping after it until the end of the group. E.g.
437
- * small text {\Large large text} small text again
390
+ * Parses an entire function, including its base and all of its arguments.
438
391
  */
439
- parseImplicitGroup(breakOnTokenText?: BreakToken): ?AnyParseNode {
440
- const start = this.parseSymbol();
441
-
442
- if (start == null) {
443
- // If we didn't get anything we handle, fall back to parseFunction
444
- return this.parseFunction();
445
- } else if (start.type === "arg") {
446
- // Defer to parseGivenFunction if it's not a function we handle
447
- return this.parseGivenFunction(start);
392
+ parseFunction(
393
+ breakOnTokenText?: BreakToken,
394
+ name?: string, // For error reporting.
395
+ greediness?: ?number,
396
+ ): ?AnyParseNode {
397
+ const token = this.nextToken;
398
+ const func = token.text;
399
+ const funcData = functions[func];
400
+ if (!funcData) {
401
+ return null;
448
402
  }
449
-
450
- const func = start.result;
451
-
452
- if (func === "\\begin") {
453
- // begin...end is similar to left...right
454
- const begin =
455
- assertNodeType(this.parseGivenFunction(start), "environment");
456
-
457
- const envName = begin.name;
458
- if (!environments.hasOwnProperty(envName)) {
459
- throw new ParseError(
460
- "No such environment: " + envName, begin.nameGroup);
461
- }
462
- // Build the environment object. Arguments and other information will
463
- // be made available to the begin and end methods using properties.
464
- const env = environments[envName];
465
- const {args, optArgs} =
466
- this.parseArguments("\\begin{" + envName + "}", env);
467
- const context = {
468
- mode: this.mode,
469
- envName: envName,
470
- parser: this,
471
- };
472
- const result = env.handler(context, args, optArgs);
473
- this.expect("\\end", false);
474
- const endNameToken = this.nextToken;
475
- let end = this.parseFunction();
476
- if (!end) {
477
- throw new ParseError("failed to parse function after \\end");
478
- }
479
- end = assertNodeType(end, "environment");
480
- if (end.name !== envName) {
481
- throw new ParseError(
482
- `Mismatch: \\begin{${envName}} matched by \\end{${end.name}}`,
483
- endNameToken);
484
- }
485
- return result;
486
- } else {
487
- // Defer to parseGivenFunction if it's not a function we handle
488
- return this.parseGivenFunction(start, breakOnTokenText);
403
+ if (greediness != null && funcData.greediness <= greediness) {
404
+ throw new ParseError(
405
+ "Got function '" + func + "' with no arguments" +
406
+ (name ? " as " + name : ""), token);
407
+ } else if (this.mode === "text" && !funcData.allowedInText) {
408
+ throw new ParseError(
409
+ "Can't use function '" + func + "' in text mode", token);
410
+ } else if (this.mode === "math" && funcData.allowedInMath === false) {
411
+ throw new ParseError(
412
+ "Can't use function '" + func + "' in math mode", token);
489
413
  }
490
- }
491
414
 
492
- /**
493
- * Parses an entire function, including its base and all of its arguments.
494
- * It also handles the case where the parsed node is not a function.
495
- */
496
- parseFunction(): ?AnyParseNode {
497
- const baseGroup = this.parseGroup();
498
- return baseGroup ? this.parseGivenFunction(baseGroup) : null;
499
- }
500
-
501
- /**
502
- * Same as parseFunction(), except that the base is provided, guaranteeing a
503
- * non-nullable result.
504
- */
505
- parseGivenFunction(
506
- baseGroup: ParsedFuncOrArg,
507
- breakOnTokenText?: BreakToken,
508
- ): AnyParseNode {
509
- if (baseGroup.type === "fn") {
510
- const func = baseGroup.result;
511
- const funcData = functions[func];
512
- if (this.mode === "text" && !funcData.allowedInText) {
513
- throw new ParseError(
514
- "Can't use function '" + func + "' in text mode",
515
- baseGroup.token);
516
- } else if (this.mode === "math" &&
517
- funcData.allowedInMath === false) {
518
- throw new ParseError(
519
- "Can't use function '" + func + "' in math mode",
520
- baseGroup.token);
521
- }
415
+ // hyperref package sets the catcode of % as an active character
416
+ if (funcData.argTypes && funcData.argTypes[0] === "url") {
417
+ this.gullet.lexer.setCatcode("%", 13);
418
+ }
522
419
 
523
- // Consume the command token after possibly switching to the
524
- // mode specified by the function (for instant mode switching),
525
- // and then immediately switch back.
526
- if (funcData.consumeMode) {
527
- const oldMode = this.mode;
528
- this.switchMode(funcData.consumeMode);
529
- this.consume();
530
- this.switchMode(oldMode);
531
- } else {
532
- this.consume();
533
- }
534
- const {args, optArgs} = this.parseArguments(func, funcData);
535
- const token = baseGroup.token;
536
- return this.callFunction(
537
- func, args, optArgs, token, breakOnTokenText);
420
+ // Consume the command token after possibly switching to the
421
+ // mode specified by the function (for instant mode switching),
422
+ // and then immediately switch back.
423
+ if (funcData.consumeMode) {
424
+ const oldMode = this.mode;
425
+ this.switchMode(funcData.consumeMode);
426
+ this.consume();
427
+ this.switchMode(oldMode);
538
428
  } else {
539
- return baseGroup.result;
429
+ this.consume();
540
430
  }
431
+ const {args, optArgs} = this.parseArguments(func, funcData);
432
+ return this.callFunction(func, args, optArgs, token, breakOnTokenText);
541
433
  }
542
434
 
543
435
  /**
@@ -603,37 +495,17 @@ export default class Parser {
603
495
  this.consumeSpaces();
604
496
  }
605
497
  const nextToken = this.nextToken;
606
- let arg = argType ?
607
- this.parseGroupOfType(argType, isOptional) :
608
- this.parseGroup(isOptional);
498
+ const arg = this.parseGroupOfType("argument to '" + func + "'",
499
+ argType, isOptional, baseGreediness);
609
500
  if (!arg) {
610
501
  if (isOptional) {
611
502
  optArgs.push(null);
612
503
  continue;
613
504
  }
614
- if (!this.settings.throwOnError &&
615
- this.nextToken.text[0] === "\\") {
616
- arg = newArgument(this.handleUnsupportedCmd(), nextToken);
617
- } else {
618
- throw new ParseError(
619
- "Expected group after '" + func + "'", nextToken);
620
- }
621
- }
622
- let argNode: AnyParseNode;
623
- if (arg.type === "fn") {
624
- const argGreediness =
625
- functions[arg.result].greediness;
626
- if (argGreediness > baseGreediness) {
627
- argNode = this.parseGivenFunction(arg);
628
- } else {
629
- throw new ParseError(
630
- "Got function '" + arg.result + "' as " +
631
- "argument to '" + func + "'", nextToken);
632
- }
633
- } else {
634
- argNode = arg.result;
505
+ throw new ParseError(
506
+ "Expected group after '" + func + "'", nextToken);
635
507
  }
636
- (isOptional ? optArgs : args).push(argNode);
508
+ (isOptional ? optArgs : args).push(arg);
637
509
  }
638
510
 
639
511
  return {args, optArgs};
@@ -643,29 +515,44 @@ export default class Parser {
643
515
  * Parses a group when the mode is changing.
644
516
  */
645
517
  parseGroupOfType(
646
- type: ArgType, // Used to describe the mode in error messages.
518
+ name: string,
519
+ type: ?ArgType,
647
520
  optional: boolean,
648
- ): ?ParsedFuncOrArg {
649
- // Handle `original` argTypes
650
- if (type === "original") {
651
- type = this.mode;
652
- }
653
-
654
- if (type === "color") {
655
- return this.parseColorGroup(optional);
656
- }
657
- if (type === "size") {
658
- return this.parseSizeGroup(optional);
659
- }
660
- if (type === "url") {
661
- throw new ParseError(
662
- "Internal bug: 'url' arguments should be handled by Lexer",
663
- this.nextToken);
521
+ greediness: ?number,
522
+ ): ?AnyParseNode {
523
+ switch (type) {
524
+ case "color":
525
+ return this.parseColorGroup(optional);
526
+ case "size":
527
+ return this.parseSizeGroup(optional);
528
+ case "url":
529
+ return this.parseUrlGroup(optional);
530
+ case "math":
531
+ case "text":
532
+ return this.parseGroup(name, optional, greediness, undefined, type);
533
+ case "raw": {
534
+ if (optional && this.nextToken.text === "{") {
535
+ return null;
536
+ }
537
+ const token = this.parseStringGroup("raw", optional, true);
538
+ if (token) {
539
+ return {
540
+ type: "raw",
541
+ mode: "text",
542
+ string: token.text,
543
+ };
544
+ } else {
545
+ throw new ParseError("Expected raw group", this.nextToken);
546
+ }
547
+ }
548
+ case "original":
549
+ case null:
550
+ case undefined:
551
+ return this.parseGroup(name, optional, greediness);
552
+ default:
553
+ throw new ParseError(
554
+ "Unknown group type as " + name, this.nextToken);
664
555
  }
665
-
666
- // By the time we get here, type is one of "text" or "math".
667
- // Specify this as mode to parseGroup.
668
- return this.parseGroup(optional, type);
669
556
  }
670
557
 
671
558
  consumeSpaces() {
@@ -681,28 +568,49 @@ export default class Parser {
681
568
  parseStringGroup(
682
569
  modeName: ArgType, // Used to describe the mode in error messages.
683
570
  optional: boolean,
571
+ raw?: boolean,
684
572
  ): ?Token {
685
- if (optional && this.nextToken.text !== "[") {
686
- return null;
573
+ const groupBegin = optional ? "[" : "{";
574
+ const groupEnd = optional ? "]" : "}";
575
+ const nextToken = this.nextToken;
576
+ if (nextToken.text !== groupBegin) {
577
+ if (optional) {
578
+ return null;
579
+ } else if (raw && nextToken.text !== "EOF" &&
580
+ /[^{}[\]]/.test(nextToken.text)) {
581
+ // allow a single character in raw string group
582
+ this.gullet.lexer.setCatcode("%", 14); // reset the catcode of %
583
+ this.consume();
584
+ return nextToken;
585
+ }
687
586
  }
688
587
  const outerMode = this.mode;
689
588
  this.mode = "text";
690
- this.expect(optional ? "[" : "{");
589
+ this.expect(groupBegin);
691
590
  let str = "";
692
591
  const firstToken = this.nextToken;
592
+ let nested = 0; // allow nested braces in raw string group
693
593
  let lastToken = firstToken;
694
- while (this.nextToken.text !== (optional ? "]" : "}")) {
695
- if (this.nextToken.text === "EOF") {
696
- throw new ParseError(
697
- "Unexpected end of input in " + modeName,
698
- firstToken.range(this.nextToken, str));
594
+ while ((raw && nested > 0) || this.nextToken.text !== groupEnd) {
595
+ switch (this.nextToken.text) {
596
+ case "EOF":
597
+ throw new ParseError(
598
+ "Unexpected end of input in " + modeName,
599
+ firstToken.range(lastToken, str));
600
+ case groupBegin:
601
+ nested++;
602
+ break;
603
+ case groupEnd:
604
+ nested--;
605
+ break;
699
606
  }
700
607
  lastToken = this.nextToken;
701
608
  str += lastToken.text;
702
609
  this.consume();
703
610
  }
704
611
  this.mode = outerMode;
705
- this.expect(optional ? "]" : "}");
612
+ this.gullet.lexer.setCatcode("%", 14); // reset the catcode of %
613
+ this.expect(groupEnd);
706
614
  return firstToken.range(lastToken, str);
707
615
  }
708
616
 
@@ -720,8 +628,8 @@ export default class Parser {
720
628
  const firstToken = this.nextToken;
721
629
  let lastToken = firstToken;
722
630
  let str = "";
723
- while (this.nextToken.text !== "EOF"
724
- && regex.test(str + this.nextToken.text)) {
631
+ while (this.nextToken.text !== "EOF" &&
632
+ regex.test(str + this.nextToken.text)) {
725
633
  lastToken = this.nextToken;
726
634
  str += lastToken.text;
727
635
  this.consume();
@@ -738,26 +646,33 @@ export default class Parser {
738
646
  /**
739
647
  * Parses a color description.
740
648
  */
741
- parseColorGroup(optional: boolean): ?ParsedArg {
649
+ parseColorGroup(optional: boolean): ?ParseNode<"color-token"> {
742
650
  const res = this.parseStringGroup("color", optional);
743
651
  if (!res) {
744
652
  return null;
745
653
  }
746
- const match = (/^(#[a-f0-9]{3}|#[a-f0-9]{6}|[a-z]+)$/i).exec(res.text);
654
+ const match = (/^(#[a-f0-9]{3}|#?[a-f0-9]{6}|[a-z]+)$/i).exec(res.text);
747
655
  if (!match) {
748
656
  throw new ParseError("Invalid color: '" + res.text + "'", res);
749
657
  }
750
- return newArgument({
658
+ let color = match[0];
659
+ if (/^[0-9a-f]{6}$/i.test(color)) {
660
+ // We allow a 6-digit HTML color spec without a leading "#".
661
+ // This follows the xcolor package's HTML color model.
662
+ // Predefined color names are all missed by this RegEx pattern.
663
+ color = "#" + color;
664
+ }
665
+ return {
751
666
  type: "color-token",
752
667
  mode: this.mode,
753
- color: match[0],
754
- }, res);
668
+ color,
669
+ };
755
670
  }
756
671
 
757
672
  /**
758
673
  * Parses a size specification, consisting of magnitude and unit.
759
674
  */
760
- parseSizeGroup(optional: boolean): ?ParsedArg {
675
+ parseSizeGroup(optional: boolean): ?ParseNode<"size"> {
761
676
  let res;
762
677
  let isBlank = false;
763
678
  if (!optional && this.nextToken.text !== "{") {
@@ -787,64 +702,120 @@ export default class Parser {
787
702
  if (!validUnit(data)) {
788
703
  throw new ParseError("Invalid unit: '" + data.unit + "'", res);
789
704
  }
790
- return newArgument({
705
+ return {
791
706
  type: "size",
792
707
  mode: this.mode,
793
708
  value: data,
794
709
  isBlank,
795
- }, res);
710
+ };
711
+ }
712
+
713
+ /**
714
+ * Parses an URL, checking escaped letters and allowed protocols.
715
+ */
716
+ parseUrlGroup(optional: boolean): ?ParseNode<"url"> {
717
+ const res = this.parseStringGroup("url", optional, true); // get raw string
718
+ if (!res) {
719
+ return null;
720
+ }
721
+ // hyperref package allows backslashes alone in href, but doesn't
722
+ // generate valid links in such cases; we interpret this as
723
+ // "undefined" behaviour, and keep them as-is. Some browser will
724
+ // replace backslashes with forward slashes.
725
+ const url = res.text.replace(/\\([#$%&~_^{}])/g, '$1');
726
+ let protocol = /^\s*([^\\/#]*?)(?::|&#0*58|&#x0*3a)/i.exec(url);
727
+ protocol = (protocol != null ? protocol[1] : "_relative");
728
+ const allowed = this.settings.allowedProtocols;
729
+ if (!utils.contains(allowed, "*") &&
730
+ !utils.contains(allowed, protocol)) {
731
+ throw new ParseError(
732
+ `Forbidden protocol '${protocol}'`, res);
733
+ }
734
+ return {
735
+ type: "url",
736
+ mode: this.mode,
737
+ url,
738
+ };
796
739
  }
797
740
 
798
741
  /**
799
742
  * If `optional` is false or absent, this parses an ordinary group,
800
743
  * which is either a single nucleus (like "x") or an expression
801
- * in braces (like "{x+y}").
744
+ * in braces (like "{x+y}") or an implicit group, a group that starts
745
+ * at the current position, and ends right before a higher explicit
746
+ * group ends, or at EOF.
802
747
  * If `optional` is true, it parses either a bracket-delimited expression
803
748
  * (like "[x+y]") or returns null to indicate the absence of a
804
749
  * bracket-enclosed group.
805
750
  * If `mode` is present, switches to that mode while parsing the group,
806
751
  * and switches back after.
807
752
  */
808
- parseGroup(optional?: boolean, mode?: Mode): ?ParsedFuncOrArg {
753
+ parseGroup(
754
+ name: string, // For error reporting.
755
+ optional?: boolean,
756
+ greediness?: ?number,
757
+ breakOnTokenText?: BreakToken,
758
+ mode?: Mode,
759
+ ): ?AnyParseNode {
809
760
  const outerMode = this.mode;
810
761
  const firstToken = this.nextToken;
811
- // Try to parse an open brace
812
- if (this.nextToken.text === (optional ? "[" : "{")) {
813
- // Switch to specified mode before we expand symbol after brace
814
- if (mode) {
815
- this.switchMode(mode);
816
- }
762
+ const text = firstToken.text;
763
+ // Switch to specified mode
764
+ if (mode) {
765
+ this.switchMode(mode);
766
+ }
767
+
768
+ let groupEnd;
769
+ let result;
770
+ // Try to parse an open brace or \begingroup
771
+ if (optional ? text === "[" : text === "{" || text === "\\begingroup") {
772
+ groupEnd = Parser.endOfGroup[text];
817
773
  // Start a new group namespace
818
774
  this.gullet.beginGroup();
819
775
  // If we get a brace, parse an expression
820
776
  this.consume();
821
- const expression = this.parseExpression(false, optional ? "]" : "}");
777
+ const expression = this.parseExpression(false, groupEnd);
822
778
  const lastToken = this.nextToken;
823
- // Switch mode back before consuming symbol after close brace
824
- if (mode) {
825
- this.switchMode(outerMode);
826
- }
827
779
  // End group namespace before consuming symbol after close brace
828
780
  this.gullet.endGroup();
829
- // Make sure we get a close brace
830
- this.expect(optional ? "]" : "}");
831
- return newArgument({
781
+ result = {
832
782
  type: "ordgroup",
833
783
  mode: this.mode,
834
784
  loc: SourceLocation.range(firstToken, lastToken),
835
785
  body: expression,
836
- }, firstToken.range(lastToken, firstToken.text));
786
+ // A group formed by \begingroup...\endgroup is a semi-simple group
787
+ // which doesn't affect spacing in math mode, i.e., is transparent.
788
+ // https://tex.stackexchange.com/questions/1930/when-should-one-
789
+ // use-begingroup-instead-of-bgroup
790
+ semisimple: text === "\\begingroup" || undefined,
791
+ };
792
+ } else if (optional) {
793
+ // Return nothing for an optional group
794
+ result = null;
837
795
  } else {
838
- // Otherwise, just return a nucleus, or nothing for an optional group
839
- if (mode) {
840
- this.switchMode(mode);
841
- }
842
- const result = optional ? null : this.parseSymbol();
843
- if (mode) {
844
- this.switchMode(outerMode);
796
+ // If there exists a function with this name, parse the function.
797
+ // Otherwise, just return a nucleus
798
+ result = this.parseFunction(breakOnTokenText, name, greediness) ||
799
+ this.parseSymbol();
800
+ if (result == null && text[0] === "\\" &&
801
+ !implicitCommands.hasOwnProperty(text)) {
802
+ if (this.settings.throwOnError) {
803
+ throw new ParseError(
804
+ "Undefined control sequence: " + text, firstToken);
805
+ }
806
+ result = this.handleUnsupportedCmd();
845
807
  }
846
- return result;
847
808
  }
809
+
810
+ // Switch mode back
811
+ if (mode) {
812
+ this.switchMode(outerMode);
813
+ }
814
+ // Make sure we got a close brace
815
+ if (groupEnd) {
816
+ this.expect(groupEnd);
817
+ }
818
+ return result;
848
819
  }
849
820
 
850
821
  /**
@@ -893,67 +864,14 @@ export default class Parser {
893
864
  }
894
865
 
895
866
  /**
896
- * Parse a single symbol out of the string. Here, we handle both the functions
897
- * we have defined, as well as the single character symbols
867
+ * Parse a single symbol out of the string. Here, we handle single character
868
+ * symbols and special functions like verbatim
898
869
  */
899
- parseSymbol(): ?ParsedFuncOrArg {
870
+ parseSymbol(): ?AnyParseNode {
900
871
  const nucleus = this.nextToken;
901
872
  let text = nucleus.text;
902
873
 
903
- if (functions[text]) {
904
- // If there exists a function with this name, we return the
905
- // function and say that it is a function.
906
- // The token will be consumed later in parseGivenFunction
907
- // (after possibly switching modes).
908
- return newFunction(nucleus);
909
- } else if (/^\\(href|url)[^a-zA-Z]/.test(text)) {
910
- const match = text.match(urlFunctionRegex);
911
- if (!match) {
912
- throw new ParseError(
913
- `Internal error: invalid URL token '${text}'`, nucleus);
914
- }
915
- const funcName = match[1];
916
- // match[2] is the only one that can be an empty string,
917
- // so it must be at the end of the following or chain:
918
- const rawUrl = match[4] || match[3] || match[2];
919
- // hyperref package allows backslashes alone in href, but doesn't
920
- // generate valid links in such cases; we interpret this as
921
- // "undefined" behaviour, and keep them as-is. Some browser will
922
- // replace backslashes with forward slashes.
923
- const url = rawUrl.replace(/\\([#$%&~_^{}])/g, '$1');
924
- let protocol = /^\s*([^\\/#]*?)(?::|&#0*58|&#x0*3a)/i.exec(url);
925
- protocol = (protocol != null ? protocol[1] : "_relative");
926
- const allowed = this.settings.allowedProtocols;
927
- if (!utils.contains(allowed, "*") &&
928
- !utils.contains(allowed, protocol)) {
929
- throw new ParseError(
930
- `Forbidden protocol '${protocol}' in ${funcName}`, nucleus);
931
- }
932
- const urlArg = {
933
- type: "url",
934
- mode: this.mode,
935
- url,
936
- };
937
- this.consume();
938
- if (funcName === "\\href") { // two arguments
939
- this.consumeSpaces(); // ignore spaces between arguments
940
- let description = this.parseGroupOfType("original", false);
941
- if (description == null) {
942
- throw new ParseError(`${funcName} missing second argument`,
943
- nucleus);
944
- }
945
- if (description.type === "fn") {
946
- description = this.parseGivenFunction(description);
947
- } else { // arg.type === "arg"
948
- description = description.result;
949
- }
950
- return newArgument(this.callFunction(
951
- funcName, [urlArg, description], []), nucleus);
952
- } else { // one argument (\url)
953
- return newArgument(this.callFunction(
954
- funcName, [urlArg], []), nucleus);
955
- }
956
- } else if (/^\\verb[^a-zA-Z]/.test(text)) {
874
+ if (/^\\verb[^a-zA-Z]/.test(text)) {
957
875
  this.consume();
958
876
  let arg = text.slice(5);
959
877
  const star = (arg.charAt(0) === "*");
@@ -967,12 +885,12 @@ export default class Parser {
967
885
  please report what input caused this bug`);
968
886
  }
969
887
  arg = arg.slice(1, -1); // remove first and last char
970
- return newArgument({
888
+ return {
971
889
  type: "verb",
972
890
  mode: "text",
973
891
  body: arg,
974
892
  star,
975
- }, nucleus);
893
+ };
976
894
  }
977
895
  // At this point, we should have a symbol, possibly with accents.
978
896
  // First expand any accented base symbol according to unicodeSymbols.
@@ -1074,6 +992,6 @@ export default class Parser {
1074
992
  };
1075
993
  }
1076
994
  }
1077
- return newArgument(symbol, nucleus);
995
+ return symbol;
1078
996
  }
1079
997
  }