tex2typst 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/tex-parser.ts CHANGED
@@ -48,7 +48,12 @@ const BINARY_COMMANDS = [
48
48
  'overset',
49
49
  ]
50
50
 
51
-
51
+ const IGNORED_COMMANDS = [
52
+ 'bigl', 'bigr',
53
+ 'biggl', 'biggr',
54
+ 'Bigl', 'Bigr',
55
+ 'Biggl', 'Biggr',
56
+ ];
52
57
 
53
58
  const EMPTY_NODE: TexNode = new TexNode('empty', '');
54
59
 
@@ -164,7 +169,36 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
164
169
  [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
165
170
  [String.raw`\\[\\,:;]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
166
171
  [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
167
- [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
172
+ [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
173
+ const text = s.text()!;
174
+ const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
175
+ const match = text.match(regex);
176
+ assert(match !== null);
177
+ const command = match![1];
178
+ const arg1 = match![2].trimStart();
179
+ const arg2 = match![3];
180
+ return [
181
+ new TexToken(TexTokenType.COMMAND, command),
182
+ new TexToken(TexTokenType.ELEMENT, arg1),
183
+ new TexToken(TexTokenType.ELEMENT, arg2),
184
+ ];
185
+ }],
186
+ [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
187
+ const text = s.text()!;
188
+ const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
189
+ const match = text.match(regex);
190
+ assert(match !== null);
191
+ const command = match![1];
192
+ const arg1 = match![2].trimStart();
193
+ return [
194
+ new TexToken(TexTokenType.COMMAND, command),
195
+ new TexToken(TexTokenType.ELEMENT, arg1),
196
+ ];
197
+ }],
198
+ [String.raw`\\[a-zA-Z]+`, (s) => {
199
+ const command = s.text()!;
200
+ return [ new TexToken(TexTokenType.COMMAND, command), ];
201
+ }],
168
202
  [String.raw`[0-9]+`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
169
203
  [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
170
204
  [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
@@ -296,6 +330,9 @@ export class LatexParser {
296
330
  }
297
331
 
298
332
  parseNextExprWithoutSupSub(tokens: TexToken[], start: number): ParseResult {
333
+ if (start >= tokens.length) {
334
+ return [EMPTY_NODE, start];
335
+ }
299
336
  const firstToken = tokens[start];
300
337
  switch (firstToken.type) {
301
338
  case TexTokenType.ELEMENT:
@@ -308,6 +345,10 @@ export class LatexParser {
308
345
  case TexTokenType.NEWLINE:
309
346
  return [new TexNode('whitespace', firstToken.value), start + 1];
310
347
  case TexTokenType.COMMAND:
348
+ const commandName = firstToken.value.slice(1);
349
+ if (IGNORED_COMMANDS.includes(commandName)) {
350
+ return this.parseNextExprWithoutSupSub(tokens, start + 1);
351
+ }
311
352
  if (firstToken.eq(BEGIN_COMMAND)) {
312
353
  return this.parseBeginEndExpr(tokens, start);
313
354
  } else if (firstToken.eq(LEFT_COMMAND)) {
@@ -376,7 +417,7 @@ export class LatexParser {
376
417
  throw new LatexParserError('No matching right square bracket for [');
377
418
  }
378
419
  const [exponent, _] = this.parseGroup(tokens, posLeftSquareBracket + 1, posRightSquareBracket);
379
- const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
420
+ const [arg1, newPos] = this.parseNextArg(tokens, posRightSquareBracket + 1);
380
421
  return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
381
422
  } else if (command === '\\text') {
382
423
  if (pos + 2 >= tokens.length) {
@@ -388,12 +429,12 @@ export class LatexParser {
388
429
  const text = tokens[pos + 1].value;
389
430
  return [new TexNode('text', text), pos + 3];
390
431
  }
391
- let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
432
+ let [arg1, newPos] = this.parseNextArg(tokens, pos);
392
433
  return [new TexNode('unaryFunc', command, [arg1]), newPos];
393
434
  }
394
435
  case 2: {
395
- const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
396
- const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
436
+ const [arg1, pos1] = this.parseNextArg(tokens, pos);
437
+ const [arg2, pos2] = this.parseNextArg(tokens, pos1);
397
438
  return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
398
439
  }
399
440
  default:
@@ -401,6 +442,30 @@ export class LatexParser {
401
442
  }
402
443
  }
403
444
 
445
+ /*
446
+ Extract a non-space argument from the token stream.
447
+ So that `\frac{12} 3` is parsed as
448
+ TexCommand{ content: '\frac', args: ['12', '3'] }
449
+ rather than
450
+ TexCommand{ content: '\frac', args: ['12', ' '] }, TexElement{ content: '3' }
451
+ */
452
+ parseNextArg(tokens: TexToken[], start: number): ParseResult {
453
+ let pos = start;
454
+ let arg: TexNode | null = null;
455
+ while (pos < tokens.length) {
456
+ let node: TexNode;
457
+ [node, pos] = this.parseNextExprWithoutSupSub(tokens, pos);
458
+ if (node.type !== 'whitespace') {
459
+ arg = node;
460
+ break;
461
+ }
462
+ }
463
+ if (arg === null) {
464
+ throw new LatexParserError('Expecting argument but token stream ended');
465
+ }
466
+ return [arg, pos];
467
+ }
468
+
404
469
  parseLeftRightExpr(tokens: TexToken[], start: number): ParseResult {
405
470
  assert(tokens[start].eq(LEFT_COMMAND));
406
471
 
@@ -84,8 +84,6 @@ export class TypstWriter {
84
84
  no_need_space ||= /^[_^,;!]$/.test(str);
85
85
  // putting a prime
86
86
  no_need_space ||= str === "'";
87
- // continue a number
88
- no_need_space ||= /[0-9]$/.test(this.buffer) && /^[0-9]/.test(str);
89
87
  // leading sign. e.g. produce "+1" instead of " +1"
90
88
  no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
91
89
  // new line
@@ -26,5 +26,10 @@ if __name__ == '__main__':
26
26
  # We only keep the first one
27
27
  if latex not in symbol_map:
28
28
  symbol_map[latex] = typst
29
- # print(f" ['{latex[1:]}', '{typst}'],")
30
- print(f'{latex[1:]} = "{typst}"')
29
+
30
+ # sort the pairs with alphabetical order of latex
31
+ sorted_keys = sorted(list(symbol_map.keys()), key=str.lower)
32
+ sorted_symbol_map = [(key, symbol_map[key]) for key in sorted_keys]
33
+ for latex, typst in sorted_symbol_map:
34
+ print(f" ['{latex[1:]}', '{typst}'],")
35
+ # print(f'{latex[1:]} = "{typst}"')