tex2typst 0.2.16 → 0.3.0-beta-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,507 @@
1
+
2
+ import { array_find } from "./generic";
3
+ import { TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
4
+ import { assert, isalpha, isdigit } from "./util";
5
+
6
+ // TODO: In Typst, y' ' is not the same as y''.
7
+ // The parser should be able to parse the former correctly.
8
+ function eat_primes(tokens: TypstToken[], start: number): number {
9
+ let pos = start;
10
+ while (pos < tokens.length && tokens[pos].eq(new TypstToken(TypstTokenType.ELEMENT, "'"))) {
11
+ pos += 1;
12
+ }
13
+ return pos - start;
14
+ }
15
+
16
+
17
+ function eat_identifier_name(typst: string, start: number): string {
18
+ let pos = start;
19
+ while (pos < typst.length && (isalpha(typst[pos]) || (typst[pos] === '.'))) {
20
+ pos += 1;
21
+ }
22
+ return typst.substring(start, pos);
23
+ }
24
+
25
+
26
+ const TYPST_EMPTY_NODE = new TypstNode('empty', '');
27
+
28
+
29
+ export function tokenize_typst(typst: string): TypstToken[] {
30
+ const tokens: TypstToken[] = [];
31
+
32
+ let pos = 0;
33
+
34
+ while (pos < typst.length) {
35
+ const firstChar = typst[pos];
36
+ let token: TypstToken;
37
+ switch (firstChar) {
38
+ case '_':
39
+ case '^':
40
+ case '&':
41
+ token = new TypstToken(TypstTokenType.CONTROL, firstChar);
42
+ pos++;
43
+ break;
44
+ case '\n':
45
+ token = new TypstToken(TypstTokenType.NEWLINE, firstChar);
46
+ pos++;
47
+ break;
48
+ case '\r': {
49
+ if (pos + 1 < typst.length && typst[pos + 1] === '\n') {
50
+ token = new TypstToken(TypstTokenType.NEWLINE, '\n');
51
+ pos += 2;
52
+ } else {
53
+ token = new TypstToken(TypstTokenType.NEWLINE, '\n');
54
+ pos++;
55
+ }
56
+ break;
57
+ }
58
+ case ' ': {
59
+ let newPos = pos;
60
+ while (newPos < typst.length && typst[newPos] === ' ') {
61
+ newPos++;
62
+ }
63
+ token = new TypstToken(TypstTokenType.SPACE, typst.substring(pos, newPos));
64
+ pos = newPos;
65
+ break;
66
+ }
67
+ case '/': {
68
+ if (pos < typst.length && typst[pos + 1] === '/') {
69
+ let newPos = pos + 2;
70
+ while (newPos < typst.length && typst[newPos] !== '\n') {
71
+ newPos++;
72
+ }
73
+ token = new TypstToken(TypstTokenType.COMMENT, typst.slice(pos + 2, newPos));
74
+ pos = newPos;
75
+ } else {
76
+ token = new TypstToken(TypstTokenType.ELEMENT, '/');
77
+ pos++;
78
+ }
79
+ break;
80
+ }
81
+ case '\\': {
82
+ if (pos + 1 >= typst.length) {
83
+ throw new Error('Expecting a character after \\');
84
+ }
85
+ const firstTwoChars = typst.substring(pos, pos + 2);
86
+ if (['\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
87
+ token = new TypstToken(TypstTokenType.ELEMENT, firstTwoChars);
88
+ pos += 2;
89
+ } else if (firstTwoChars === '\\\n') {
90
+ token = new TypstToken(TypstTokenType.CONTROL, '\\');
91
+ pos += 1;
92
+
93
+ } else {
94
+ // this backslash is dummy and will be ignored in later stages
95
+ token = new TypstToken(TypstTokenType.CONTROL, '');
96
+ pos++;
97
+ }
98
+ break;
99
+ }
100
+ case '"': {
101
+ let newPos = pos + 1;
102
+ while (newPos < typst.length) {
103
+ if (typst[newPos] === '"' && typst[newPos - 1] !== '\\') {
104
+ break;
105
+ }
106
+ newPos++;
107
+ }
108
+ let text = typst.substring(pos + 1, newPos);
109
+ // replace all escape characters with their actual characters
110
+ const chars = ['"', '\\'];
111
+ for (const char of chars) {
112
+ text = text.replaceAll('\\' + char, char);
113
+ }
114
+ token = new TypstToken(TypstTokenType.TEXT, text);
115
+ pos = newPos + 1;
116
+ break;
117
+ }
118
+ default: {
119
+ if (isdigit(firstChar)) {
120
+ let newPos = pos;
121
+ while (newPos < typst.length && isdigit(typst[newPos])) {
122
+ newPos += 1;
123
+ }
124
+ token = new TypstToken(TypstTokenType.ELEMENT, typst.slice(pos, newPos));
125
+ } else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
126
+ token = new TypstToken(TypstTokenType.ELEMENT, firstChar)
127
+ } else if (isalpha(firstChar)) {
128
+ const identifier = eat_identifier_name(typst, pos);
129
+ const _type = identifier.length === 1 ? TypstTokenType.ELEMENT : TypstTokenType.SYMBOL;
130
+ token = new TypstToken(_type, identifier);
131
+ } else {
132
+ token = new TypstToken(TypstTokenType.ELEMENT, firstChar);
133
+ }
134
+ pos += token.value.length;
135
+ }
136
+ }
137
+ tokens.push(token);
138
+ }
139
+
140
+ return tokens;
141
+ }
142
+
143
+ function find_closing_match(tokens: TypstToken[], start: number): number {
144
+ assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
145
+ let count = 1;
146
+ let pos = start + 1;
147
+
148
+ while (count > 0) {
149
+ if (pos >= tokens.length) {
150
+ throw new Error('Unmatched brackets');
151
+ }
152
+ if (tokens[pos].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET])) {
153
+ count += 1;
154
+ } else if (tokens[pos].isOneOf([RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET])) {
155
+ count -= 1;
156
+ }
157
+ pos += 1;
158
+ }
159
+
160
+ return pos - 1;
161
+ }
162
+
163
+
164
+ function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
165
+ const left_parenthesis = new TypstNode('atom', '(');
166
+ const right_parenthesis = new TypstNode('atom', ')');
167
+
168
+ assert(nodes[start].eq(left_parenthesis));
169
+
170
+ let count = 1;
171
+ let pos = start + 1;
172
+
173
+ while (count > 0) {
174
+ if (pos >= nodes.length) {
175
+ throw new Error('Unmatched brackets');
176
+ }
177
+ if (nodes[pos].eq(left_parenthesis)) {
178
+ count += 1;
179
+ } else if (nodes[pos].eq(right_parenthesis)) {
180
+ count -= 1;
181
+ }
182
+ pos += 1;
183
+ }
184
+
185
+ return pos - 1;
186
+ }
187
+
188
+ function primes(num: number): TypstNode[] {
189
+ const res: TypstNode[] = [];
190
+ for (let i = 0; i < num; i++) {
191
+ res.push(new TypstNode('atom', "'"));
192
+ }
193
+ return res;
194
+ }
195
+
196
+ const DIV = new TypstNode('atom', '/');
197
+
198
+
199
+
200
+ function next_non_whitespace(nodes: TypstNode[], start: number): TypstNode {
201
+ let pos = start;
202
+ while (pos < nodes.length && nodes[pos].type === 'whitespace') {
203
+ pos++;
204
+ }
205
+ return pos === nodes.length ? TYPST_EMPTY_NODE : nodes[pos];
206
+ }
207
+
208
+ function trim_whitespace_around_operators(nodes: TypstNode[]): TypstNode[] {
209
+ let after_operator = false;
210
+ const res: TypstNode[] = [];
211
+ for (let i = 0; i < nodes.length; i++) {
212
+ const current = nodes[i];
213
+ if (current.type === 'whitespace') {
214
+ if(after_operator) {
215
+ continue;
216
+ }
217
+ if(next_non_whitespace(nodes, i + 1).eq(DIV)) {
218
+ continue;
219
+ }
220
+ }
221
+ if (current.eq(DIV)) {
222
+ after_operator = true;
223
+ } else {
224
+ after_operator = false;
225
+ }
226
+ res.push(current);
227
+ }
228
+ return res;
229
+ }
230
+
231
+ function process_operators(nodes: TypstNode[], parenthesis = false): TypstNode {
232
+ nodes = trim_whitespace_around_operators(nodes);
233
+
234
+ const opening_bracket = new TypstNode('atom', '(');
235
+ const closing_bracket = new TypstNode('atom', ')');
236
+
237
+ const stack: TypstNode[] = [];
238
+
239
+ const args: TypstNode[] = [];
240
+ let pos = 0;
241
+ while (pos < nodes.length) {
242
+ const current = nodes[pos];
243
+ if (current.eq(closing_bracket)) {
244
+ throw new TypstParserError("Unexpected ')'");
245
+ } else if(current.eq(DIV)) {
246
+ stack.push(current);
247
+ pos++;
248
+ } else {
249
+ let current_tree: TypstNode;
250
+ if(current.eq(opening_bracket)) {
251
+ // the expression is a group wrapped in parenthesis
252
+ const pos_closing = find_closing_parenthesis(nodes, pos);
253
+ current_tree = process_operators(nodes.slice(pos + 1, pos_closing), true);
254
+ pos = pos_closing + 1;
255
+ } else {
256
+ // the expression is just a single item
257
+ current_tree = current;
258
+ pos++;
259
+ }
260
+
261
+ if(stack.length > 0 && stack[stack.length-1].eq(DIV)) {
262
+ const denominator = current_tree;
263
+ if(args.length === 0) {
264
+ throw new TypstParserError("Unexpected '/' operator, no numerator before it");
265
+ }
266
+ const numerator = args.pop()!;
267
+
268
+ if(denominator.type === 'group' && denominator.content === 'parenthesis') {
269
+ denominator.content = '';
270
+ }
271
+ if(numerator.type === 'group' && numerator.content === 'parenthesis') {
272
+ numerator.content = '';
273
+ }
274
+
275
+ args.push(new TypstNode('fraction', '', [numerator, denominator]));
276
+ stack.pop(); // drop the '/' operator
277
+ } else {
278
+ args.push(current_tree);
279
+ }
280
+ }
281
+ }
282
+ if(parenthesis) {
283
+ return new TypstNode('group', 'parenthesis', args);
284
+ } else {
285
+ if(args.length === 0) {
286
+ return TYPST_EMPTY_NODE;
287
+ } else if(args.length === 1) {
288
+ return args[0];
289
+ } else {
290
+ return new TypstNode('group', '', args);
291
+ }
292
+ }
293
+ }
294
+
295
+ export class TypstParserError extends Error {
296
+ constructor(message: string) {
297
+ super(message);
298
+ this.name = 'TypstParserError';
299
+ }
300
+ }
301
+
302
+
303
+ type TypstParseResult = [TypstNode, number];
304
+
305
+ const SUB_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '_');
306
+ const SUP_SYMBOL: TypstToken = new TypstToken(TypstTokenType.CONTROL, '^');
307
+ const LEFT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '(');
308
+ const RIGHT_PARENTHESES: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ')');
309
+ const LEFT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '[');
310
+ const RIGHT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ']');
311
+ const LEFT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '{');
312
+ const RIGHT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '}');
313
+ const COMMA = new TypstToken(TypstTokenType.ELEMENT, ',');
314
+ const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';');
315
+ const SINGLE_SPACE = new TypstToken(TypstTokenType.SPACE, ' ');
316
+
317
+ export class TypstParser {
318
+ space_sensitive: boolean;
319
+ newline_sensitive: boolean;
320
+
321
+ constructor(space_sensitive: boolean = true, newline_sensitive: boolean = true) {
322
+ this.space_sensitive = space_sensitive;
323
+ this.newline_sensitive = newline_sensitive;
324
+ }
325
+
326
+ parse(tokens: TypstToken[]): TypstNode {
327
+ const [tree, _] = this.parseGroup(tokens, 0, tokens.length);
328
+ return tree;
329
+ }
330
+
331
+ parseGroup(tokens: TypstToken[], start: number, end: number, parentheses = false): TypstParseResult {
332
+ const results: TypstNode[] = [];
333
+ let pos = start;
334
+
335
+ while (pos < end) {
336
+ const [res, newPos] = this.parseNextExpr(tokens, pos);
337
+ pos = newPos;
338
+ if (res.type === 'whitespace') {
339
+ if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
340
+ continue;
341
+ }
342
+ if (!this.newline_sensitive && res.content === '\n') {
343
+ continue;
344
+ }
345
+ }
346
+ results.push(res);
347
+ }
348
+
349
+ let node: TypstNode;
350
+ if(parentheses) {
351
+ node = process_operators(results, true);
352
+ } else {
353
+ if (results.length === 0) {
354
+ node = TYPST_EMPTY_NODE;
355
+ } else if (results.length === 1) {
356
+ node = results[0];
357
+ } else {
358
+ node = process_operators(results);
359
+ }
360
+ }
361
+ return [node, end + 1];
362
+ }
363
+
364
+ parseNextExpr(tokens: TypstToken[], start: number): TypstParseResult {
365
+ let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
366
+ let sub: TypstNode | null = null;
367
+ let sup: TypstNode | null = null;
368
+
369
+ const num_base_prime = eat_primes(tokens, pos);
370
+ if (num_base_prime > 0) {
371
+ base = new TypstNode('group', '', [base].concat(primes(num_base_prime)));
372
+ pos += num_base_prime;
373
+ }
374
+ if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) {
375
+ [sub, pos] = this.parseSupOrSub(tokens, pos + 1);
376
+ if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) {
377
+ [sup, pos] = this.parseSupOrSub(tokens, pos + 1);
378
+ }
379
+ } else if (pos < tokens.length && tokens[pos].eq(SUP_SYMBOL)) {
380
+ [sup, pos] = this.parseSupOrSub(tokens, pos + 1);
381
+ if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) {
382
+ [sub, pos] = this.parseSupOrSub(tokens, pos + 1);
383
+ }
384
+ }
385
+
386
+ if (sub !== null || sup !== null) {
387
+ const res: TypstSupsubData = { base };
388
+ if (sub) {
389
+ res.sub = sub;
390
+ }
391
+ if (sup) {
392
+ res.sup = sup;
393
+ }
394
+ return [new TypstNode('supsub', '', [], res), pos];
395
+ } else {
396
+ return [base, pos];
397
+ }
398
+ }
399
+
400
+ parseSupOrSub(tokens: TypstToken[], start: number): TypstParseResult {
401
+ let node: TypstNode;
402
+ let end: number;
403
+ if(tokens[start].eq(LEFT_PARENTHESES)) {
404
+ const pos_closing = find_closing_match(tokens, start);
405
+ [node, end] = this.parseGroup(tokens, start + 1, pos_closing);
406
+ } else {
407
+ [node, end] = this.parseNextExprWithoutSupSub(tokens, start);
408
+ }
409
+ const num_prime = eat_primes(tokens, end);
410
+ if (num_prime > 0) {
411
+ node = new TypstNode('group', '', [node].concat(primes(num_prime)));
412
+ end += num_prime;
413
+ }
414
+ return [node, end];
415
+ }
416
+
417
+ parseNextExprWithoutSupSub(tokens: TypstToken[], start: number): TypstParseResult {
418
+ const firstToken = tokens[start];
419
+ const node = firstToken.toNode();
420
+ if(firstToken.eq(LEFT_PARENTHESES)) {
421
+ const pos_closing = find_closing_match(tokens, start);
422
+ return this.parseGroup(tokens, start + 1, pos_closing, true);
423
+ }
424
+ if(firstToken.type === TypstTokenType.ELEMENT && !isalpha(firstToken.value[0])) {
425
+ return [node, start + 1];
426
+ }
427
+ if ([TypstTokenType.ELEMENT, TypstTokenType.SYMBOL].includes(firstToken.type)) {
428
+ if (start + 1 < tokens.length && tokens[start + 1].eq(LEFT_PARENTHESES)) {
429
+ if(firstToken.value === 'mat') {
430
+ const [matrix, newPos] = this.parseGroupsOfArguments(tokens, start + 1);
431
+ const mat = new TypstNode('matrix', '', [], matrix);
432
+ return [mat, newPos];
433
+ }
434
+ const [args, newPos] = this.parseArguments(tokens, start + 1);
435
+ const func_call = new TypstNode('funcCall', firstToken.value);
436
+ func_call.args = args;
437
+ return [func_call, newPos];
438
+ }
439
+ }
440
+
441
+ return [node, start + 1];
442
+ }
443
+
444
+ // start: the position of the left parentheses
445
+ parseArguments(tokens: TypstToken[], start: number): [TypstNode[], number] {
446
+ const end = find_closing_match(tokens, start);
447
+
448
+ return [this.parseCommaSeparatedArguments(tokens, start + 1, end), end + 1];
449
+ }
450
+
451
+ // start: the position of the left parentheses
452
+ parseGroupsOfArguments(tokens: TypstToken[], start: number): [TypstNode[][], number] {
453
+ const end = find_closing_match(tokens, start);
454
+
455
+ const matrix: TypstNode[][] = [];
456
+ let pos = start + 1;
457
+ while (pos < end) {
458
+ while(pos < end) {
459
+ let next_stop = array_find(tokens, SEMICOLON, pos);
460
+ if (next_stop === -1) {
461
+ next_stop = end;
462
+ }
463
+ const row = this.parseCommaSeparatedArguments(tokens, pos, next_stop);
464
+ matrix.push(row);
465
+ pos = next_stop + 1;
466
+ }
467
+ }
468
+
469
+ return [matrix, end + 1];
470
+ }
471
+
472
+ // start: the position of the first token of arguments
473
+ parseCommaSeparatedArguments(tokens: TypstToken[], start: number, end: number): TypstNode[] {
474
+ const args: TypstNode[] = [];
475
+ let pos = start;
476
+ while (pos < end) {
477
+ let arg = new TypstNode('group', '', []);
478
+
479
+ while(pos < end) {
480
+ if(tokens[pos].eq(COMMA)) {
481
+ pos += 1;
482
+ break;
483
+ } else if(tokens[pos].eq(SINGLE_SPACE)) {
484
+ pos += 1;
485
+ continue;
486
+ }
487
+ const [argItem, newPos] = this.parseNextExpr(tokens, pos);
488
+ pos = newPos;
489
+ arg.args!.push(argItem);
490
+ }
491
+
492
+ if(arg.args!.length === 0) {
493
+ arg = TYPST_EMPTY_NODE;
494
+ } else if (arg.args!.length === 1) {
495
+ arg = arg.args![0];
496
+ }
497
+ args.push(arg);
498
+ }
499
+ return args;
500
+ }
501
+ }
502
+
503
+ export function parseTypst(typst: string): TypstNode {
504
+ const parser = new TypstParser();
505
+ let tokens = tokenize_typst(typst);
506
+ return parser.parse(tokens);
507
+ }
package/src/util.ts ADDED
@@ -0,0 +1,14 @@
1
+
2
+ export function isalpha(char: string): boolean {
3
+ return 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.includes(char);
4
+ }
5
+
6
+ export function isdigit(char: string): boolean {
7
+ return '0123456789'.includes(char);
8
+ }
9
+
10
+ export function assert(condition: boolean, message: string = ''): void {
11
+ if (!condition) {
12
+ throw new Error(message);
13
+ }
14
+ }