tex2typst 0.3.22 → 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/generic.ts CHANGED
@@ -2,25 +2,48 @@ interface IEquatable {
2
2
  eq(other: IEquatable): boolean;
3
3
  }
4
4
 
5
+ export function array_equal<T extends IEquatable>(a: T[], b: T[]): boolean {
6
+ /*
7
+ if (a.length !== b.length) {
8
+ return false;
9
+ }
10
+ for (let i = 0; i < a.length; i++) {
11
+ if (!a[i].eq(b[i])) {
12
+ return false;
13
+ }
14
+ }
15
+ return true;
16
+ */
17
+ return a.length === b.length && a.every((x, i) => x.eq(b[i]));
18
+ }
5
19
 
6
20
  export function array_find<T extends IEquatable>(array: T[], item: T, start: number = 0): number {
21
+ /*
7
22
  for (let i = start; i < array.length; i++) {
8
23
  if (array[i].eq(item)) {
9
24
  return i;
10
25
  }
11
26
  }
12
27
  return -1;
28
+ */
29
+ const index = array.slice(start).findIndex((x) => x.eq(item));
30
+ return index === -1 ? -1 : index + start;
13
31
  }
14
32
 
15
33
  export function array_includes<T extends IEquatable>(array: T[], item: T): boolean {
16
- for (const i of array) {
17
- if (i.eq(item)) {
34
+ /*
35
+ for (const x of array) {
36
+ if (x.eq(item)) {
18
37
  return true;
19
38
  }
20
39
  }
21
40
  return false;
41
+ */
42
+ return array.some((x) => x.eq(item));
22
43
  }
23
44
 
45
+ // e.g. input array=['a', 'b', '+', 'c', '+', 'd', 'e'], sep = '+'
46
+ // return [['a', 'b'], ['c'], ['d', 'e']]
24
47
  export function array_split<T extends IEquatable>(array: T[], sep: T): T[][] {
25
48
  const res: T[][] = [];
26
49
  let current_slice: T[] = [];
@@ -38,7 +61,8 @@ export function array_split<T extends IEquatable>(array: T[], sep: T): T[][] {
38
61
 
39
62
  // e.g. input array=['a', 'b', 'c'], sep = '+'
40
63
  // return ['a','+', 'b', '+','c']
41
- export function array_join<T>(array: T[], sep: T): T[] {
64
+ export function array_intersperse<T>(array: T[], sep: T): T[] {
65
+ /*
42
66
  const res: T[] = [];
43
67
  for (let i = 0; i < array.length; i++) {
44
68
  res.push(array[i]);
@@ -47,4 +71,6 @@ export function array_join<T>(array: T[], sep: T): T[] {
47
71
  }
48
72
  }
49
73
  return res;
74
+ */
75
+ return array.flatMap((x, i) => i !== array.length - 1? [x, sep]: [x]);
50
76
  }
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { parseTex } from "./tex-parser";
2
- import type { Tex2TypstOptions } from "./types";
2
+ import type { Tex2TypstOptions } from "./tex-types";
3
3
  import { TypstWriter, type TypstWriterOptions } from "./typst-writer";
4
4
  import { convert_tex_node_to_typst, convert_typst_node_to_tex } from "./convert";
5
5
  import { symbolMap } from "./map";
package/src/jslex.ts CHANGED
@@ -15,9 +15,8 @@ interface IRule<T> {
15
15
 
16
16
  interface IMatch<T> {
17
17
  index: number;
18
- text: string;
19
- len: number;
20
18
  rule: IRule<T>;
19
+ reMatchArray: RegExpMatchArray;
21
20
  }
22
21
 
23
22
 
@@ -31,8 +30,10 @@ const EOF = {};
31
30
  * @return {int} Difference between the matches.
32
31
  */
33
32
  function matchcompare<T>(m1: IMatch<T>, m2: IMatch<T>): number {
34
- if(m2.len !== m1.len) {
35
- return m2.len - m1.len;
33
+ const m1_len = m1.reMatchArray[0].length;
34
+ const m2_len = m2.reMatchArray[0].length;
35
+ if(m2_len !== m1_len) {
36
+ return m2_len - m1_len;
36
37
  } else {
37
38
  return m1.index - m2.index;
38
39
  }
@@ -59,6 +60,7 @@ export class Scanner<T> {
59
60
 
60
61
  private _text: string | null = null;
61
62
  private _leng: number | null = null;
63
+ private _reMatchArray: RegExpMatchArray | null = null;
62
64
 
63
65
  constructor(input: string, lexer: JSLex<T>) {
64
66
  this._input = input;
@@ -77,6 +79,10 @@ export class Scanner<T> {
77
79
  return this._leng;
78
80
  }
79
81
 
82
+ public reMatchArray(): RegExpMatchArray | null {
83
+ return this._reMatchArray;
84
+ }
85
+
80
86
  /**
81
87
  * Position of in stream, line number and column number of match.
82
88
  */
@@ -180,9 +186,8 @@ export class Scanner<T> {
180
186
  if (mt !== null && mt[0].length > 0) {
181
187
  matches.push({
182
188
  index: i,
183
- text: mt[0],
184
- len: mt[0].length,
185
- rule: rule
189
+ rule: rule,
190
+ reMatchArray: mt,
186
191
  });
187
192
  }
188
193
  }
@@ -193,22 +198,24 @@ export class Scanner<T> {
193
198
  this._go = true;
194
199
 
195
200
  let result: T | T[];
196
- let m: IMatch<T>;
201
+ let matched_text: string;
197
202
  for (let j = 0, n = matches.length; j < n && this._go; j++) {
198
203
  this._offset = 0;
199
204
  this._less = null;
200
205
  this._go = false;
201
206
  this._newstate = null;
202
- m = matches[j];
203
- this._text = m.text;
204
- this._leng = m.len;
207
+ const m = matches[j];
208
+ matched_text = m.reMatchArray[0];
209
+ this._text = matched_text;
210
+ this._leng = matched_text.length;
211
+ this._reMatchArray = m.reMatchArray;
205
212
  result = m.rule.action(this);
206
213
  if (this._newstate && this._newstate != this._state) {
207
214
  this._state = this._newstate;
208
215
  break;
209
216
  }
210
217
  }
211
- const text = this._less === null ? m!.text : m!.text.substring(0, this._less);
218
+ const text = this._less === null ? matched_text! : matched_text!.substring(0, this._less);
212
219
  const len = text.length;
213
220
  this._pos += len + this._offset;
214
221
 
package/src/map.ts CHANGED
@@ -1,10 +1,11 @@
1
1
  const symbolMap = new Map<string, string>([
2
2
  ['displaystyle', 'display'],
3
+ ['hspace', '#h'],
3
4
 
4
5
  ['|', 'bar.v.double'],
5
- ['!', '#h(-math.thin.amount)'],
6
6
  [',', 'thin'],
7
7
  [':', 'med'],
8
+ [' ', 'med'],
8
9
  [';', 'thick'],
9
10
 
10
11
  /* textual operators */
@@ -21,6 +22,10 @@ const symbolMap = new Map<string, string>([
21
22
  ['Xi', 'Xi'],
22
23
  ['Upsilon', 'Upsilon'],
23
24
  ['lim', 'lim'],
25
+ ['binom', 'binom'],
26
+ ['tilde', 'tilde'],
27
+ ['hat', 'hat'],
28
+ ['sqrt', 'sqrt'],
24
29
 
25
30
  ['nonumber', ''],
26
31
  ['vec', 'arrow'],
@@ -1066,7 +1071,6 @@ const reverseSymbolMap = new Map<string, string>();
1066
1071
  for(const [key, value] of Array.from(symbolMap.entries()).reverse()) {
1067
1072
  reverseSymbolMap.set(value, key);
1068
1073
  }
1069
- reverseSymbolMap.set('dif', 'mathrm{d}');
1070
1074
  reverseSymbolMap.set('oo', 'infty');
1071
1075
 
1072
1076
  // force override some one-to-multiple mappings
@@ -1078,8 +1082,6 @@ const typst_to_tex_map = new Map<string, string>([
1078
1082
  ['upright', 'mathrm'],
1079
1083
  ['bold', 'boldsymbol'],
1080
1084
  ['infinity', 'infty'],
1081
-
1082
- ['hyph.minus', '\\text{-}'],
1083
1085
  ]);
1084
1086
 
1085
1087
  for(const [key, value] of typst_to_tex_map) {
package/src/tex-parser.ts CHANGED
@@ -1,5 +1,4 @@
1
- import { symbolMap } from "./map";
2
- import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
1
+ import { TexBeginEnd, TexFuncCall, TexLeftRight, TexNode, TexGroup, TexSupSub, TexSupsubData, TexText, TexToken, TexTokenType } from "./tex-types";
3
2
  import { assert } from "./util";
4
3
  import { array_find } from "./generic";
5
4
  import { TEX_BINARY_COMMANDS, TEX_UNARY_COMMANDS, tokenize_tex } from "./tex-tokenizer";
@@ -11,7 +10,7 @@ const IGNORED_COMMANDS = [
11
10
  'Biggl', 'Biggr',
12
11
  ];
13
12
 
14
- const EMPTY_NODE: TexNode = new TexNode('empty', '');
13
+ const EMPTY_NODE: TexNode = TexToken.EMPTY.toNode();
15
14
 
16
15
  function get_command_param_num(command: string): number {
17
16
  if (TEX_UNARY_COMMANDS.includes(command)) {
@@ -120,7 +119,8 @@ export class LatexParser {
120
119
  }
121
120
 
122
121
  parse(tokens: TexToken[]): TexNode {
123
- const idx = array_find(tokens, new TexToken(TexTokenType.COMMAND, '\\displaystyle'));
122
+ const token_displaystyle = new TexToken(TexTokenType.COMMAND, '\\displaystyle');
123
+ const idx = array_find(tokens, token_displaystyle);
124
124
  if (idx === -1) {
125
125
  // no \displaystyle, normal execution path
126
126
  const [tree, _] = this.parseGroup(tokens, 0, tokens.length);
@@ -128,13 +128,13 @@ export class LatexParser {
128
128
  } else if (idx === 0) {
129
129
  // \displaystyle at the beginning. Wrap the whole thing in \displaystyle
130
130
  const [tree, _] = this.parseGroup(tokens, 1, tokens.length);
131
- return new TexNode('unaryFunc', '\\displaystyle', [tree]);
131
+ return new TexFuncCall(token_displaystyle, [tree]);
132
132
  } else {
133
133
  // \displaystyle somewhere in the middle. Split the expression to two parts
134
134
  const [tree1, _1] = this.parseGroup(tokens, 0, idx);
135
135
  const [tree2, _2] = this.parseGroup(tokens, idx + 1, tokens.length);
136
- const display = new TexNode('unaryFunc', '\\displaystyle', [tree2]);
137
- return new TexNode('ordgroup', '', [tree1, display]);
136
+ const display = new TexFuncCall(token_displaystyle, [tree2]);
137
+ return new TexGroup([tree1, display]);
138
138
  }
139
139
  }
140
140
 
@@ -144,15 +144,15 @@ export class LatexParser {
144
144
  while (pos < end) {
145
145
  const [res, newPos] = this.parseNextExpr(tokens, pos);
146
146
  pos = newPos;
147
- if(res.type === 'whitespace') {
148
- if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
147
+ if(res.head.type === TexTokenType.SPACE || res.head.type === TexTokenType.NEWLINE) {
148
+ if (!this.space_sensitive && res.head.value.replace(/ /g, '').length === 0) {
149
149
  continue;
150
150
  }
151
- if (!this.newline_sensitive && res.content === '\n') {
151
+ if (!this.newline_sensitive && res.head.value === '\n') {
152
152
  continue;
153
153
  }
154
154
  }
155
- if (res.type === 'control' && res.content === '&') {
155
+ if (res.head.eq(new TexToken(TexTokenType.CONTROL, '&'))) {
156
156
  throw new LatexParserError('Unexpected & outside of an alignment');
157
157
  }
158
158
  results.push(res);
@@ -162,7 +162,7 @@ export class LatexParser {
162
162
  if (results.length === 1) {
163
163
  node = results[0];
164
164
  } else {
165
- node = new TexNode('ordgroup', '', results);
165
+ node = new TexGroup(results);
166
166
  }
167
167
  return [node, end + 1];
168
168
  }
@@ -199,14 +199,14 @@ export class LatexParser {
199
199
  }
200
200
 
201
201
  if (sub !== null || sup !== null || num_prime > 0) {
202
- const res: TexSupsubData = { base };
202
+ const res: TexSupsubData = { base, sup: null, sub: null };
203
203
  if (sub) {
204
204
  res.sub = sub;
205
205
  }
206
206
  if (num_prime > 0) {
207
- res.sup = new TexNode('ordgroup', '', []);
207
+ res.sup = new TexGroup([]);
208
208
  for (let i = 0; i < num_prime; i++) {
209
- res.sup.args!.push(new TexNode('element', "'"));
209
+ res.sup.args!.push(new TexToken(TexTokenType.ELEMENT, "'").toNode());
210
210
  }
211
211
  if (sup) {
212
212
  res.sup.args!.push(sup);
@@ -217,7 +217,7 @@ export class LatexParser {
217
217
  } else if (sup) {
218
218
  res.sup = sup;
219
219
  }
220
- return [new TexNode('supsub', '', [], res), pos];
220
+ return [new TexSupSub(res), pos];
221
221
  } else {
222
222
  return [base, pos];
223
223
  }
@@ -230,14 +230,11 @@ export class LatexParser {
230
230
  const firstToken = tokens[start];
231
231
  switch (firstToken.type) {
232
232
  case TexTokenType.ELEMENT:
233
- return [new TexNode('element', firstToken.value), start + 1];
234
- case TexTokenType.TEXT:
235
- return [new TexNode('text', firstToken.value), start + 1];
233
+ case TexTokenType.LITERAL:
236
234
  case TexTokenType.COMMENT:
237
- return [new TexNode('comment', firstToken.value), start + 1];
238
235
  case TexTokenType.SPACE:
239
236
  case TexTokenType.NEWLINE:
240
- return [new TexNode('whitespace', firstToken.value), start + 1];
237
+ return [firstToken.toNode(), start + 1];
241
238
  case TexTokenType.COMMAND:
242
239
  const commandName = firstToken.value.slice(1);
243
240
  if (IGNORED_COMMANDS.includes(commandName)) {
@@ -266,14 +263,14 @@ export class LatexParser {
266
263
  case '\\,':
267
264
  case '\\:':
268
265
  case '\\;':
269
- return [new TexNode('control', controlChar), start + 1];
266
+ return [firstToken.toNode(), start + 1];
270
267
  case '\\ ':
271
- return [new TexNode('control', '\\:'), start + 1];
268
+ return [firstToken.toNode(), start + 1];
272
269
  case '_':
273
270
  case '^':
274
271
  return [ EMPTY_NODE, start];
275
272
  case '&':
276
- return [new TexNode('control', '&'), start + 1];
273
+ return [firstToken.toNode(), start + 1];
277
274
  default:
278
275
  throw new LatexParserError('Unknown control sequence');
279
276
  }
@@ -285,7 +282,8 @@ export class LatexParser {
285
282
  parseCommandExpr(tokens: TexToken[], start: number): ParseResult {
286
283
  assert(tokens[start].type === TexTokenType.COMMAND);
287
284
 
288
- const command = tokens[start].value; // command name starts with a \
285
+ const command_token = tokens[start];
286
+ const command = command_token.value; // command name starts with a \
289
287
 
290
288
  let pos = start + 1;
291
289
 
@@ -297,10 +295,7 @@ export class LatexParser {
297
295
  const paramNum = get_command_param_num(command.slice(1));
298
296
  switch (paramNum) {
299
297
  case 0:
300
- if (!symbolMap.has(command.slice(1))) {
301
- return [new TexNode('unknownMacro', command), pos];
302
- }
303
- return [new TexNode('symbol', command), pos];
298
+ return [command_token.toNode(), pos];
304
299
  case 1: {
305
300
  // TODO: JavaScript gives undefined instead of throwing an error when accessing an index out of bounds,
306
301
  // so index checking like this should be everywhere. This is rough.
@@ -315,24 +310,24 @@ export class LatexParser {
315
310
  }
316
311
  const [exponent, _] = this.parseGroup(tokens, posLeftSquareBracket + 1, posRightSquareBracket);
317
312
  const [arg1, newPos] = this.parseNextArg(tokens, posRightSquareBracket + 1);
318
- return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
313
+ return [new TexFuncCall(command_token, [arg1], exponent), newPos];
319
314
  } else if (command === '\\text') {
320
315
  if (pos + 2 >= tokens.length) {
321
316
  throw new LatexParserError('Expecting content for \\text command');
322
317
  }
323
318
  assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
324
- assert(tokens[pos + 1].type === TexTokenType.TEXT);
319
+ assert(tokens[pos + 1].type === TexTokenType.LITERAL);
325
320
  assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
326
- const text = tokens[pos + 1].value;
327
- return [new TexNode('text', text), pos + 3];
321
+ const literal = tokens[pos + 1];
322
+ return [new TexText(literal), pos + 3];
328
323
  }
329
324
  let [arg1, newPos] = this.parseNextArg(tokens, pos);
330
- return [new TexNode('unaryFunc', command, [arg1]), newPos];
325
+ return [new TexFuncCall(command_token, [arg1]), newPos];
331
326
  }
332
327
  case 2: {
333
328
  const [arg1, pos1] = this.parseNextArg(tokens, pos);
334
329
  const [arg2, pos2] = this.parseNextArg(tokens, pos1);
335
- return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
330
+ return [new TexFuncCall(command_token, [arg1, arg2]), pos2];
336
331
  }
337
332
  default:
338
333
  throw new Error( 'Invalid number of parameters');
@@ -342,9 +337,9 @@ export class LatexParser {
342
337
  /*
343
338
  Extract a non-space argument from the token stream.
344
339
  So that `\frac{12} 3` is parsed as
345
- TexCommand{ content: '\frac', args: ['12', '3'] }
340
+ TypstFuncCall{ head: '\frac', args: [ELEMENT_12, ELEMENT_3] }
346
341
  rather than
347
- TexCommand{ content: '\frac', args: ['12', ' '] }, TexElement{ content: '3' }
342
+ TypstFuncCall{ head: '\frac', args: [ELEMENT_12, SPACE] }, ELEMENT_3
348
343
  */
349
344
  parseNextArg(tokens: TexToken[], start: number): ParseResult {
350
345
  let pos = start;
@@ -352,7 +347,7 @@ export class LatexParser {
352
347
  while (pos < tokens.length) {
353
348
  let node: TexNode;
354
349
  [node, pos] = this.parseNextExprWithoutSupSub(tokens, pos);
355
- if (node.type !== 'whitespace') {
350
+ if (!(node.head.type === TexTokenType.SPACE || node.head.type === TexTokenType.NEWLINE)) {
356
351
  arg = node;
357
352
  break;
358
353
  }
@@ -398,12 +393,10 @@ export class LatexParser {
398
393
  pos++;
399
394
 
400
395
  const [body, _] = this.parseGroup(tokens, exprInsideStart, exprInsideEnd);
401
- const args: TexNode[] = [
402
- new TexNode('element', leftDelimiter.value),
403
- body,
404
- new TexNode('element', rightDelimiter.value)
405
- ]
406
- const res = new TexNode('leftright', '', args);
396
+ const args = [ body ];
397
+ const left = leftDelimiter.value === '.'? null: leftDelimiter;
398
+ const right = rightDelimiter.value === '.'? null: rightDelimiter;
399
+ const res = new TexLeftRight(args, {left: left, right: right});
407
400
  return [res, pos];
408
401
  }
409
402
 
@@ -412,24 +405,23 @@ export class LatexParser {
412
405
 
413
406
  let pos = start + 1;
414
407
  assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
415
- assert(tokens[pos + 1].type === TexTokenType.TEXT);
408
+ assert(tokens[pos + 1].type === TexTokenType.LITERAL);
416
409
  assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
417
410
  const envName = tokens[pos + 1].value;
418
411
  pos += 3;
419
412
 
420
- const args: TexNode[] = [];
421
- while (pos < tokens.length) {
422
- const whitespaceCount = eat_whitespaces(tokens, pos).length;
423
- pos += whitespaceCount;
424
413
 
425
- if (pos >= tokens.length || !tokens[pos].eq(LEFT_CURLY_BRACKET)) {
426
- break;
427
- }
414
+ const args: TexNode[] = [];
415
+ if(['array', 'subarray'].includes(envName)) {
416
+ pos += eat_whitespaces(tokens, pos).length;
428
417
  const [arg, newPos] = this.parseNextArg(tokens, pos);
429
418
  args.push(arg);
430
419
  pos = newPos;
431
420
  }
432
421
 
422
+ pos += eat_whitespaces(tokens, pos).length; // ignore whitespaces and '\n' after \begin{envName}
423
+
424
+
433
425
  const exprInsideStart = pos;
434
426
 
435
427
  const endIdx = find_closing_end_command(tokens, start);
@@ -440,7 +432,7 @@ export class LatexParser {
440
432
  pos = endIdx + 1;
441
433
 
442
434
  assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
443
- assert(tokens[pos + 1].type === TexTokenType.TEXT);
435
+ assert(tokens[pos + 1].type === TexTokenType.LITERAL);
444
436
  assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
445
437
  if (tokens[pos + 1].value !== envName) {
446
438
  throw new LatexParserError('Mismatched \\begin and \\end environments');
@@ -453,7 +445,7 @@ export class LatexParser {
453
445
  exprInside.pop();
454
446
  }
455
447
  const body = this.parseAligned(exprInside);
456
- const res = new TexNode('beginend', envName, args, body);
448
+ const res = new TexBeginEnd(new TexToken(TexTokenType.LITERAL, envName), args, body);
457
449
  return [res, pos];
458
450
  }
459
451
 
@@ -462,29 +454,29 @@ export class LatexParser {
462
454
  const allRows: TexNode[][] = [];
463
455
  let row: TexNode[] = [];
464
456
  allRows.push(row);
465
- let group = new TexNode('ordgroup', '', []);
457
+ let group = new TexGroup([]);
466
458
  row.push(group);
467
459
 
468
460
  while (pos < tokens.length) {
469
461
  const [res, newPos] = this.parseNextExpr(tokens, pos);
470
462
  pos = newPos;
471
463
 
472
- if (res.type === 'whitespace') {
473
- if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
464
+ if (res.head.type === TexTokenType.SPACE || res.head.type === TexTokenType.NEWLINE) {
465
+ if (!this.space_sensitive && res.head.value.replace(/ /g, '').length === 0) {
474
466
  continue;
475
467
  }
476
- if (!this.newline_sensitive && res.content === '\n') {
468
+ if (!this.newline_sensitive && res.head.value === '\n') {
477
469
  continue;
478
470
  }
479
471
  }
480
472
 
481
- if (res.type === 'control' && res.content === '\\\\') {
473
+ if (res.head.eq(new TexToken(TexTokenType.CONTROL, '\\\\'))) {
482
474
  row = [];
483
- group = new TexNode('ordgroup', '', []);
475
+ group = new TexGroup([]);
484
476
  row.push(group);
485
477
  allRows.push(row);
486
- } else if (res.type === 'control' && res.content === '&') {
487
- group = new TexNode('ordgroup', '', []);
478
+ } else if (res.head.eq(new TexToken(TexTokenType.CONTROL, '&'))) {
479
+ group = new TexGroup([]);
488
480
  row.push(group);
489
481
  } else {
490
482
  group.args!.push(res);
@@ -1,5 +1,4 @@
1
- import { TexToken, TexTokenType } from "./types";
2
- import { assert } from "./util";
1
+ import { TexToken, TexTokenType } from "./tex-types";
3
2
  import { JSLex, Scanner } from "./jslex";
4
3
 
5
4
  export const TEX_UNARY_COMMANDS = [
@@ -59,15 +58,28 @@ function unescape(str: string): string {
59
58
  }
60
59
 
61
60
  const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
61
+ // math `\begin{array}{cc}`
62
62
  [
63
- String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
64
- const text = s.text()!;
65
- const command = text.substring(0, text.indexOf('{'));
66
- const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
63
+ String.raw`\\begin{(array|subarry)}{(.+?)}`, (s) => {
64
+ const match = s.reMatchArray()!;
67
65
  return [
68
- new TexToken(TexTokenType.COMMAND, command),
66
+ new TexToken(TexTokenType.COMMAND, '\\begin'),
67
+ new TexToken(TexTokenType.CONTROL, '{'),
68
+ new TexToken(TexTokenType.LITERAL, match[1]),
69
+ new TexToken(TexTokenType.CONTROL, '}'),
70
+ new TexToken(TexTokenType.CONTROL, '{'),
71
+ new TexToken(TexTokenType.LITERAL, match[2]),
72
+ new TexToken(TexTokenType.CONTROL, '}'),
73
+ ]
74
+ }
75
+ ],
76
+ [
77
+ String.raw`\\(text|operatorname|begin|end|hspace|array){(.+?)}`, (s) => {
78
+ const match = s.reMatchArray()!;
79
+ return [
80
+ new TexToken(TexTokenType.COMMAND, '\\' + match[1]),
69
81
  new TexToken(TexTokenType.CONTROL, '{'),
70
- new TexToken(TexTokenType.TEXT, unescape(text_inside)),
82
+ new TexToken(TexTokenType.LITERAL, unescape(match[2])),
71
83
  new TexToken(TexTokenType.CONTROL, '}')
72
84
  ]
73
85
  }
@@ -80,14 +92,11 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
80
92
  [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
81
93
  // e.g. match `\frac13`, `\frac1 b`, `\frac a b`
82
94
  [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
83
- const text = s.text()!;
84
- const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
85
- const match = text.match(regex);
86
- assert(match !== null);
95
+ const match = s.reMatchArray()!;
87
96
  const command = match![1];
88
97
  if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
89
- const arg1 = match![2].trimStart();
90
- const arg2 = match![3];
98
+ const arg1 = match[2].trimStart();
99
+ const arg2 = match[3];
91
100
  return [
92
101
  new TexToken(TexTokenType.COMMAND, command),
93
102
  new TexToken(TexTokenType.ELEMENT, arg1),
@@ -100,13 +109,10 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
100
109
  }],
101
110
  // e.g. match `\sqrt3`, `\sqrt a`
102
111
  [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
103
- const text = s.text()!;
104
- const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
105
- const match = text.match(regex);
106
- assert(match !== null);
107
- const command = match![1];
112
+ const match = s.reMatchArray()!;
113
+ const command = match[1];
108
114
  if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
109
- const arg1 = match![2].trimStart();
115
+ const arg1 = match[2].trimStart();
110
116
  return [
111
117
  new TexToken(TexTokenType.COMMAND, command),
112
118
  new TexToken(TexTokenType.ELEMENT, arg1),
@@ -116,10 +122,7 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
116
122
  return [];
117
123
  }
118
124
  }],
119
- [String.raw`\\[a-zA-Z]+`, (s) => {
120
- const command = s.text()!;
121
- return [ new TexToken(TexTokenType.COMMAND, command), ];
122
- }],
125
+ [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
123
126
  // Numbers like "123", "3.14"
124
127
  [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
125
128
  [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],