tex2typst 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/tex-parser.ts CHANGED
@@ -48,7 +48,12 @@ const BINARY_COMMANDS = [
48
48
  'overset',
49
49
  ]
50
50
 
51
-
51
+ const IGNORED_COMMANDS = [
52
+ 'bigl', 'bigr',
53
+ 'biggl', 'biggr',
54
+ 'Bigl', 'Bigr',
55
+ 'Biggl', 'Biggr',
56
+ ];
52
57
 
53
58
  const EMPTY_NODE: TexNode = new TexNode('empty', '');
54
59
 
@@ -164,7 +169,46 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
164
169
  [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
165
170
  [String.raw`\\[\\,:;]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
166
171
  [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
167
- [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
172
+ [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
173
+ const text = s.text()!;
174
+ const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
175
+ const match = text.match(regex);
176
+ assert(match !== null);
177
+ const command = match![1];
178
+ if (BINARY_COMMANDS.includes(command.substring(1))) {
179
+ const arg1 = match![2].trimStart();
180
+ const arg2 = match![3];
181
+ return [
182
+ new TexToken(TexTokenType.COMMAND, command),
183
+ new TexToken(TexTokenType.ELEMENT, arg1),
184
+ new TexToken(TexTokenType.ELEMENT, arg2),
185
+ ];
186
+ } else {
187
+ s.reject();
188
+ return [];
189
+ }
190
+ }],
191
+ [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
192
+ const text = s.text()!;
193
+ const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
194
+ const match = text.match(regex);
195
+ assert(match !== null);
196
+ const command = match![1];
197
+ if (UNARY_COMMANDS.includes(command.substring(1))) {
198
+ const arg1 = match![2].trimStart();
199
+ return [
200
+ new TexToken(TexTokenType.COMMAND, command),
201
+ new TexToken(TexTokenType.ELEMENT, arg1),
202
+ ];
203
+ } else {
204
+ s.reject();
205
+ return [];
206
+ }
207
+ }],
208
+ [String.raw`\\[a-zA-Z]+`, (s) => {
209
+ const command = s.text()!;
210
+ return [ new TexToken(TexTokenType.COMMAND, command), ];
211
+ }],
168
212
  [String.raw`[0-9]+`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
169
213
  [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
170
214
  [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
@@ -296,6 +340,9 @@ export class LatexParser {
296
340
  }
297
341
 
298
342
  parseNextExprWithoutSupSub(tokens: TexToken[], start: number): ParseResult {
343
+ if (start >= tokens.length) {
344
+ return [EMPTY_NODE, start];
345
+ }
299
346
  const firstToken = tokens[start];
300
347
  switch (firstToken.type) {
301
348
  case TexTokenType.ELEMENT:
@@ -308,6 +355,10 @@ export class LatexParser {
308
355
  case TexTokenType.NEWLINE:
309
356
  return [new TexNode('whitespace', firstToken.value), start + 1];
310
357
  case TexTokenType.COMMAND:
358
+ const commandName = firstToken.value.slice(1);
359
+ if (IGNORED_COMMANDS.includes(commandName)) {
360
+ return this.parseNextExprWithoutSupSub(tokens, start + 1);
361
+ }
311
362
  if (firstToken.eq(BEGIN_COMMAND)) {
312
363
  return this.parseBeginEndExpr(tokens, start);
313
364
  } else if (firstToken.eq(LEFT_COMMAND)) {
@@ -376,7 +427,7 @@ export class LatexParser {
376
427
  throw new LatexParserError('No matching right square bracket for [');
377
428
  }
378
429
  const [exponent, _] = this.parseGroup(tokens, posLeftSquareBracket + 1, posRightSquareBracket);
379
- const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
430
+ const [arg1, newPos] = this.parseNextArg(tokens, posRightSquareBracket + 1);
380
431
  return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
381
432
  } else if (command === '\\text') {
382
433
  if (pos + 2 >= tokens.length) {
@@ -388,12 +439,12 @@ export class LatexParser {
388
439
  const text = tokens[pos + 1].value;
389
440
  return [new TexNode('text', text), pos + 3];
390
441
  }
391
- let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
442
+ let [arg1, newPos] = this.parseNextArg(tokens, pos);
392
443
  return [new TexNode('unaryFunc', command, [arg1]), newPos];
393
444
  }
394
445
  case 2: {
395
- const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
396
- const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
446
+ const [arg1, pos1] = this.parseNextArg(tokens, pos);
447
+ const [arg2, pos2] = this.parseNextArg(tokens, pos1);
397
448
  return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
398
449
  }
399
450
  default:
@@ -401,6 +452,30 @@ export class LatexParser {
401
452
  }
402
453
  }
403
454
 
455
+ /*
456
+ Extract a non-space argument from the token stream.
457
+ So that `\frac{12} 3` is parsed as
458
+ TexCommand{ content: '\frac', args: ['12', '3'] }
459
+ rather than
460
+ TexCommand{ content: '\frac', args: ['12', ' '] }, TexElement{ content: '3' }
461
+ */
462
+ parseNextArg(tokens: TexToken[], start: number): ParseResult {
463
+ let pos = start;
464
+ let arg: TexNode | null = null;
465
+ while (pos < tokens.length) {
466
+ let node: TexNode;
467
+ [node, pos] = this.parseNextExprWithoutSupSub(tokens, pos);
468
+ if (node.type !== 'whitespace') {
469
+ arg = node;
470
+ break;
471
+ }
472
+ }
473
+ if (arg === null) {
474
+ throw new LatexParserError('Expecting argument but token stream ended');
475
+ }
476
+ return [arg, pos];
477
+ }
478
+
404
479
  parseLeftRightExpr(tokens: TexToken[], start: number): ParseResult {
405
480
  assert(tokens[start].eq(LEFT_COMMAND));
406
481
 
package/src/types.ts CHANGED
@@ -348,6 +348,10 @@ export interface TypstSupsubData {
348
348
  }
349
349
 
350
350
  export type TypstArrayData = TypstNode[][];
351
+ export interface TypstLrData {
352
+ leftDelim: string | null;
353
+ rightDelim: string | null;
354
+ }
351
355
 
352
356
  type TypstNodeType = 'atom' | 'symbol' | 'text' | 'control' | 'comment' | 'whitespace'
353
357
  | 'empty' | 'group' | 'supsub' | 'funcCall' | 'fraction' | 'align' | 'matrix' | 'cases' | 'unknown';
@@ -364,12 +368,12 @@ export class TypstNode {
364
368
  type: TypstNodeType;
365
369
  content: string;
366
370
  args?: TypstNode[];
367
- data?: TypstSupsubData | TypstArrayData;
371
+ data?: TypstSupsubData | TypstArrayData | TypstLrData;
368
372
  // Some Typst functions accept additional options. e.g. mat() has option "delim", op() has option "limits"
369
373
  options?: TypstNamedParams;
370
374
 
371
375
  constructor(type: TypstNodeType, content: string, args?: TypstNode[],
372
- data?: TypstSupsubData | TypstArrayData) {
376
+ data?: TypstSupsubData | TypstArrayData| TypstLrData) {
373
377
  this.type = type;
374
378
  this.content = content;
375
379
  this.args = args;
@@ -1,6 +1,6 @@
1
1
 
2
2
  import { array_find } from "./generic";
3
- import { TYPST_NONE, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
3
+ import { TYPST_NONE, TypstLrData, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
4
4
  import { assert, isalpha } from "./util";
5
5
  import { reverseShorthandMap } from "./typst-shorthands";
6
6
  import { JSLex, Scanner } from "./jslex";
@@ -91,8 +91,9 @@ export function tokenize_typst(input: string): TypstToken[] {
91
91
  }
92
92
 
93
93
 
94
- function find_closing_match(tokens: TypstToken[], start: number): number {
95
- assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
94
+ function _find_closing_match(tokens: TypstToken[], start: number,
95
+ leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number {
96
+ assert(tokens[start].isOneOf(leftBrackets));
96
97
  let count = 1;
97
98
  let pos = start + 1;
98
99
 
@@ -100,10 +101,10 @@ function find_closing_match(tokens: TypstToken[], start: number): number {
100
101
  if (pos >= tokens.length) {
101
102
  throw new Error('Unmatched brackets');
102
103
  }
103
- if (tokens[pos].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET])) {
104
- count += 1;
105
- } else if (tokens[pos].isOneOf([RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET])) {
104
+ if (tokens[pos].isOneOf(rightBrackets)) {
106
105
  count -= 1;
106
+ }else if (tokens[pos].isOneOf(leftBrackets)) {
107
+ count += 1;
107
108
  }
108
109
  pos += 1;
109
110
  }
@@ -111,6 +112,25 @@ function find_closing_match(tokens: TypstToken[], start: number): number {
111
112
  return pos - 1;
112
113
  }
113
114
 
115
+ function find_closing_match(tokens: TypstToken[], start: number): number {
116
+ return _find_closing_match(
117
+ tokens,
118
+ start,
119
+ [LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET],
120
+ [RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET]
121
+ );
122
+ }
123
+
124
+ function find_closing_delim(tokens: TypstToken[], start: number): number {
125
+ return _find_closing_match(
126
+ tokens,
127
+ start,
128
+ [LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET, VERTICAL_BAR],
129
+ [RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET, VERTICAL_BAR]
130
+ );
131
+ }
132
+
133
+
114
134
 
115
135
  function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
116
136
  const left_parenthesis = new TypstNode('atom', '(');
@@ -261,6 +281,7 @@ const LEFT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '[');
261
281
  const RIGHT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ']');
262
282
  const LEFT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '{');
263
283
  const RIGHT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '}');
284
+ const VERTICAL_BAR = new TypstToken(TypstTokenType.ELEMENT, '|');
264
285
  const COMMA = new TypstToken(TypstTokenType.ELEMENT, ',');
265
286
  const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';');
266
287
  const SINGLE_SPACE = new TypstToken(TypstTokenType.SPACE, ' ');
@@ -389,9 +410,13 @@ export class TypstParser {
389
410
  casesNode.setOptions(named_params);
390
411
  return [casesNode, newPos];
391
412
  }
413
+ if (firstToken.value === 'lr') {
414
+ const [args, newPos, lrData] = this.parseLrArguments(tokens, start + 1);
415
+ const func_call = new TypstNode('funcCall', firstToken.value, args, lrData);
416
+ return [func_call, newPos];
417
+ }
392
418
  const [args, newPos] = this.parseArguments(tokens, start + 1);
393
- const func_call = new TypstNode('funcCall', firstToken.value);
394
- func_call.args = args;
419
+ const func_call = new TypstNode('funcCall', firstToken.value, args);
395
420
  return [func_call, newPos];
396
421
  }
397
422
  }
@@ -405,6 +430,28 @@ export class TypstParser {
405
430
  return [this.parseCommaSeparatedArguments(tokens, start + 1, end), end + 1];
406
431
  }
407
432
 
433
+ // start: the position of the left parentheses
434
+ parseLrArguments(tokens: TypstToken[], start: number): [TypstNode[], number, TypstLrData] {
435
+ if (tokens[start + 1].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET, VERTICAL_BAR])) {
436
+ const end = find_closing_match(tokens, start);
437
+ const inner_start = start + 1;
438
+ const inner_end = find_closing_delim(tokens, inner_start);
439
+ const inner_args= this.parseCommaSeparatedArguments(tokens, inner_start + 1, inner_end);
440
+ return [
441
+ inner_args,
442
+ end + 1,
443
+ {leftDelim: tokens[inner_start].value, rightDelim: tokens[inner_end].value} as TypstLrData
444
+ ];
445
+ } else {
446
+ const [args, end] = this.parseArguments(tokens, start);
447
+ return [
448
+ args,
449
+ end,
450
+ {leftDelim: null, rightDelim: null} as TypstLrData,
451
+ ];
452
+ }
453
+ }
454
+
408
455
  // start: the position of the left parentheses
409
456
  parseGroupsOfArguments(tokens: TypstToken[], start: number, newline_token = SEMICOLON): [TypstNode[][], TypstNamedParams, number] {
410
457
  const end = find_closing_match(tokens, start);
@@ -472,7 +519,7 @@ export class TypstParser {
472
519
  pos = next_stop + 1;
473
520
  }
474
521
  }
475
-
522
+
476
523
  return [matrix, named_params, end + 1];
477
524
  }
478
525
 
@@ -481,8 +528,7 @@ export class TypstParser {
481
528
  const args: TypstNode[] = [];
482
529
  let pos = start;
483
530
  while (pos < end) {
484
- let arg = new TypstNode('group', '', []);
485
-
531
+ let nodes: TypstNode[] = [];
486
532
  while(pos < end) {
487
533
  if(tokens[pos].eq(COMMA)) {
488
534
  pos += 1;
@@ -493,14 +539,18 @@ export class TypstParser {
493
539
  }
494
540
  const [argItem, newPos] = this.parseNextExpr(tokens, pos);
495
541
  pos = newPos;
496
- arg.args!.push(argItem);
542
+ nodes.push(argItem);
497
543
  }
498
544
 
499
- if(arg.args!.length === 0) {
545
+ let arg: TypstNode;
546
+ if (nodes.length === 0) {
500
547
  arg = TYPST_EMPTY_NODE;
501
- } else if (arg.args!.length === 1) {
502
- arg = arg.args![0];
548
+ } else if (nodes.length === 1) {
549
+ arg = nodes[0];
550
+ } else {
551
+ arg = process_operators(nodes);
503
552
  }
553
+
504
554
  args.push(arg);
505
555
  }
506
556
  return args;
@@ -84,8 +84,6 @@ export class TypstWriter {
84
84
  no_need_space ||= /^[_^,;!]$/.test(str);
85
85
  // putting a prime
86
86
  no_need_space ||= str === "'";
87
- // continue a number
88
- no_need_space ||= /[0-9]$/.test(this.buffer) && /^[0-9]/.test(str);
89
87
  // leading sign. e.g. produce "+1" instead of " +1"
90
88
  no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
91
89
  // new line
@@ -26,5 +26,10 @@ if __name__ == '__main__':
26
26
  # We only keep the first one
27
27
  if latex not in symbol_map:
28
28
  symbol_map[latex] = typst
29
- # print(f" ['{latex[1:]}', '{typst}'],")
30
- print(f'{latex[1:]} = "{typst}"')
29
+
30
+ # sort the pairs with alphabetical order of latex
31
+ sorted_keys = sorted(list(symbol_map.keys()), key=str.lower)
32
+ sorted_symbol_map = [(key, symbol_map[key]) for key in sorted_keys]
33
+ for latex, typst in sorted_symbol_map:
34
+ print(f" ['{latex[1:]}', '{typst}'],")
35
+ # print(f'{latex[1:]} = "{typst}"')