tex2typst 0.3.23 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
1
 
2
2
  import { array_find } from "./generic";
3
- import { TYPST_NONE, TypstLrData, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
3
+ import { TypstFraction, TypstFuncCall, TypstGroup, TypstLeftright, TypstLeftRightData, TypstMarkupFunc, TypstMatrixLike, TypstNode, TypstSupsub, TypstTerminal } from "./typst-types";
4
+ import { TypstNamedParams } from "./typst-types";
5
+ import { TypstSupsubData } from "./typst-types";
6
+ import { TypstToken } from "./typst-types";
7
+ import { TypstTokenType } from "./typst-types";
4
8
  import { tokenize_typst } from "./typst-tokenizer";
5
9
  import { assert, isalpha } from "./util";
6
10
 
@@ -15,7 +19,6 @@ function eat_primes(tokens: TypstToken[], start: number): number {
15
19
  return pos - start;
16
20
  }
17
21
 
18
-
19
22
  function _find_closing_match(tokens: TypstToken[], start: number,
20
23
  leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number {
21
24
  assert(tokens[start].isOneOf(leftBrackets));
@@ -24,7 +27,7 @@ function _find_closing_match(tokens: TypstToken[], start: number,
24
27
 
25
28
  while (count > 0) {
26
29
  if (pos >= tokens.length) {
27
- throw new Error('Unmatched brackets');
30
+ throw new Error('Unmatched brackets or parentheses');
28
31
  }
29
32
  if (tokens[pos].isOneOf(rightBrackets)) {
30
33
  count -= 1;
@@ -58,8 +61,10 @@ function find_closing_delim(tokens: TypstToken[], start: number): number {
58
61
 
59
62
 
60
63
  function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
61
- const left_parenthesis = new TypstNode('atom', '(');
62
- const right_parenthesis = new TypstNode('atom', ')');
64
+ const left_parenthesis = new TypstToken(TypstTokenType.ELEMENT, '(').toNode();
65
+ const right_parenthesis = new TypstToken(TypstTokenType.ELEMENT, ')').toNode();
66
+
67
+
63
68
 
64
69
  assert(nodes[start].eq(left_parenthesis));
65
70
 
@@ -68,7 +73,7 @@ function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
68
73
 
69
74
  while (count > 0) {
70
75
  if (pos >= nodes.length) {
71
- throw new Error('Unmatched brackets');
76
+ throw new Error("Unmatched '('");
72
77
  }
73
78
  if (nodes[pos].eq(left_parenthesis)) {
74
79
  count += 1;
@@ -84,18 +89,18 @@ function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
84
89
  function primes(num: number): TypstNode[] {
85
90
  const res: TypstNode[] = [];
86
91
  for (let i = 0; i < num; i++) {
87
- res.push(new TypstNode('atom', "'"));
92
+ res.push(new TypstToken(TypstTokenType.ELEMENT, "'").toNode());
88
93
  }
89
94
  return res;
90
95
  }
91
96
 
92
- const DIV = new TypstNode('atom', '/');
97
+ const DIV = new TypstToken(TypstTokenType.ELEMENT, '/').toNode();
93
98
 
94
99
 
95
100
 
96
101
  function next_non_whitespace(nodes: TypstNode[], start: number): TypstNode | null {
97
102
  let pos = start;
98
- while (pos < nodes.length && nodes[pos].type === 'whitespace') {
103
+ while (pos < nodes.length && (nodes[pos].head.type === TypstTokenType.SPACE || nodes[pos].head.type === TypstTokenType.NEWLINE)) {
99
104
  pos++;
100
105
  }
101
106
  return pos === nodes.length ? null : nodes[pos];
@@ -106,7 +111,7 @@ function trim_whitespace_around_operators(nodes: TypstNode[]): TypstNode[] {
106
111
  const res: TypstNode[] = [];
107
112
  for (let i = 0; i < nodes.length; i++) {
108
113
  const current = nodes[i];
109
- if (current.type === 'whitespace') {
114
+ if (current.head.type === TypstTokenType.SPACE || current.head.type === TypstTokenType.NEWLINE) {
110
115
  if(after_operator) {
111
116
  continue;
112
117
  }
@@ -127,8 +132,8 @@ function trim_whitespace_around_operators(nodes: TypstNode[]): TypstNode[] {
127
132
  function process_operators(nodes: TypstNode[], parenthesis = false): TypstNode {
128
133
  nodes = trim_whitespace_around_operators(nodes);
129
134
 
130
- const opening_bracket = new TypstNode('atom', '(');
131
- const closing_bracket = new TypstNode('atom', ')');
135
+ const opening_bracket = LEFT_PARENTHESES.toNode();
136
+ const closing_bracket = RIGHT_PARENTHESES.toNode();
132
137
 
133
138
  const stack: TypstNode[] = [];
134
139
 
@@ -155,35 +160,44 @@ function process_operators(nodes: TypstNode[], parenthesis = false): TypstNode {
155
160
  }
156
161
 
157
162
  if(stack.length > 0 && stack[stack.length-1].eq(DIV)) {
158
- const denominator = current_tree;
163
+ let denominator = current_tree;
159
164
  if(args.length === 0) {
160
165
  throw new TypstParserError("Unexpected '/' operator, no numerator before it");
161
166
  }
162
- const numerator = args.pop()!;
167
+ let numerator = args.pop()!;
163
168
 
164
- if(denominator.type === 'group' && denominator.content === 'parenthesis') {
165
- denominator.content = '';
169
+ if(denominator.type === 'leftright') {
170
+ denominator = (denominator as TypstLeftright).body;
166
171
  }
167
- if(numerator.type === 'group' && numerator.content === 'parenthesis') {
168
- numerator.content = '';
172
+ if(numerator.type === 'leftright') {
173
+ numerator = (numerator as TypstLeftright).body;
169
174
  }
170
175
 
171
- args.push(new TypstNode('fraction', '', [numerator, denominator]));
176
+ args.push(new TypstFraction([numerator, denominator]));
172
177
  stack.pop(); // drop the '/' operator
173
178
  } else {
174
179
  args.push(current_tree);
175
180
  }
176
181
  }
177
182
  }
183
+ const body = args.length === 1? args[0]: new TypstGroup(args);
178
184
  if(parenthesis) {
179
- return new TypstNode('group', 'parenthesis', args);
185
+ return new TypstLeftright(null, { body: body, left: LEFT_PARENTHESES, right: RIGHT_PARENTHESES } as TypstLeftRightData);
180
186
  } else {
181
- if(args.length === 1) {
182
- return args[0];
183
- } else {
184
- return new TypstNode('group', '', args);
185
- }
187
+ return body;
188
+ }
189
+ }
190
+
191
+ function parse_named_params(groups: TypstGroup[]): TypstNamedParams {
192
+ const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode();
193
+
194
+ const np: TypstNamedParams = {};
195
+ for (const group of groups) {
196
+ assert(group.items.length == 3);
197
+ assert(group.items[1].eq(COLON));
198
+ np[group.items[0].toString()] = new TypstTerminal(new TypstToken(TypstTokenType.LITERAL, group.items[2].toString()));
186
199
  }
200
+ return np;
187
201
  }
188
202
 
189
203
  export class TypstParserError extends Error {
@@ -208,6 +222,7 @@ const VERTICAL_BAR = new TypstToken(TypstTokenType.ELEMENT, '|');
208
222
  const COMMA = new TypstToken(TypstTokenType.ELEMENT, ',');
209
223
  const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';');
210
224
  const SINGLE_SPACE = new TypstToken(TypstTokenType.SPACE, ' ');
225
+ const CONTROL_AND = new TypstToken(TypstTokenType.CONTROL, '&');
211
226
 
212
227
  export class TypstParser {
213
228
  space_sensitive: boolean;
@@ -230,11 +245,11 @@ export class TypstParser {
230
245
  while (pos < end) {
231
246
  const [res, newPos] = this.parseNextExpr(tokens, pos);
232
247
  pos = newPos;
233
- if (res.type === 'whitespace') {
234
- if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
248
+ if (res.head.type === TypstTokenType.SPACE || res.head.type === TypstTokenType.NEWLINE) {
249
+ if (!this.space_sensitive && res.head.value.replace(/ /g, '').length === 0) {
235
250
  continue;
236
251
  }
237
- if (!this.newline_sensitive && res.content === '\n') {
252
+ if (!this.newline_sensitive && res.head.value === '\n') {
238
253
  continue;
239
254
  }
240
255
  }
@@ -261,7 +276,7 @@ export class TypstParser {
261
276
 
262
277
  const num_base_prime = eat_primes(tokens, pos);
263
278
  if (num_base_prime > 0) {
264
- base = new TypstNode('group', '', [base].concat(primes(num_base_prime)));
279
+ base = new TypstGroup([base].concat(primes(num_base_prime)));
265
280
  pos += num_base_prime;
266
281
  }
267
282
  if (pos < tokens.length && tokens[pos].eq(SUB_SYMBOL)) {
@@ -277,14 +292,8 @@ export class TypstParser {
277
292
  }
278
293
 
279
294
  if (sub !== null || sup !== null) {
280
- const res: TypstSupsubData = { base };
281
- if (sub) {
282
- res.sub = sub;
283
- }
284
- if (sup) {
285
- res.sup = sup;
286
- }
287
- return [new TypstNode('supsub', '', [], res), pos];
295
+ const res: TypstSupsubData = { base, sup, sub };
296
+ return [new TypstSupsub(res), pos];
288
297
  } else {
289
298
  return [base, pos];
290
299
  }
@@ -301,7 +310,7 @@ export class TypstParser {
301
310
  }
302
311
  const num_prime = eat_primes(tokens, end);
303
312
  if (num_prime > 0) {
304
- node = new TypstNode('group', '', [node].concat(primes(num_prime)));
313
+ node = new TypstGroup([node].concat(primes(num_prime)));
305
314
  end += num_prime;
306
315
  }
307
316
  return [node, end];
@@ -320,24 +329,34 @@ export class TypstParser {
320
329
  if ([TypstTokenType.ELEMENT, TypstTokenType.SYMBOL].includes(firstToken.type)) {
321
330
  if (start + 1 < tokens.length && tokens[start + 1].eq(LEFT_PARENTHESES)) {
322
331
  if(firstToken.value === 'mat') {
323
- const [matrix, named_params, newPos] = this.parseGroupsOfArguments(tokens, start + 1);
324
- const mat = new TypstNode('matrix', '', [], matrix);
332
+ const [matrix, named_params, newPos] = this.parseMatrix(tokens, start + 1, SEMICOLON, COMMA);
333
+ const mat = new TypstMatrixLike(firstToken, matrix);
325
334
  mat.setOptions(named_params);
326
335
  return [mat, newPos];
327
336
  }
328
337
  if(firstToken.value === 'cases') {
329
- const [cases, named_params, newPos] = this.parseGroupsOfArguments(tokens, start + 1, COMMA);
330
- const casesNode = new TypstNode('cases', '', [], cases);
338
+ const [cases, named_params, newPos] = this.parseMatrix(tokens, start + 1, COMMA, CONTROL_AND);
339
+ const casesNode = new TypstMatrixLike(firstToken, cases);
331
340
  casesNode.setOptions(named_params);
332
341
  return [casesNode, newPos];
333
342
  }
334
343
  if (firstToken.value === 'lr') {
335
- const [args, newPos, lrData] = this.parseLrArguments(tokens, start + 1);
336
- const func_call = new TypstNode('funcCall', firstToken.value, args, lrData);
337
- return [func_call, newPos];
344
+ return this.parseLrArguments(tokens, start + 1);
345
+ }
346
+ if (['#heading', '#text'].includes(firstToken.value)) {
347
+ const [args, newPos] = this.parseArguments(tokens, start + 1);
348
+ const named_params = parse_named_params(args as TypstGroup[]);
349
+ assert(tokens[newPos].eq(LEFT_BRACKET));
350
+ const DOLLAR = new TypstToken(TypstTokenType.ELEMENT, '$');
351
+ const end = _find_closing_match(tokens, newPos + 1, [DOLLAR], [DOLLAR]);
352
+ const [group, _] = this.parseGroup(tokens, newPos + 2, end);
353
+ assert(tokens[end + 1].eq(RIGHT_BRACKET));
354
+ const markup_func = new TypstMarkupFunc(firstToken, [group]);
355
+ markup_func.setOptions(named_params);
356
+ return [markup_func, end + 2];
338
357
  }
339
358
  const [args, newPos] = this.parseArguments(tokens, start + 1);
340
- const func_call = new TypstNode('funcCall', firstToken.value, args);
359
+ const func_call = new TypstFuncCall(firstToken, args);
341
360
  return [func_call, newPos];
342
361
  }
343
362
  }
@@ -348,33 +367,32 @@ export class TypstParser {
348
367
  // start: the position of the left parentheses
349
368
  parseArguments(tokens: TypstToken[], start: number): [TypstNode[], number] {
350
369
  const end = find_closing_match(tokens, start);
351
- return [this.parseCommaSeparatedArguments(tokens, start + 1, end), end + 1];
370
+ return [this.parseArgumentsWithSeparator(tokens, start + 1, end, COMMA), end + 1];
352
371
  }
353
372
 
354
373
  // start: the position of the left parentheses
355
- parseLrArguments(tokens: TypstToken[], start: number): [TypstNode[], number, TypstLrData] {
374
+ parseLrArguments(tokens: TypstToken[], start: number): [TypstNode, number] {
375
+ const lr_token = tokens[start];
356
376
  if (tokens[start + 1].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET, VERTICAL_BAR])) {
357
377
  const end = find_closing_match(tokens, start);
358
378
  const inner_start = start + 1;
359
379
  const inner_end = find_closing_delim(tokens, inner_start);
360
- const inner_args= this.parseCommaSeparatedArguments(tokens, inner_start + 1, inner_end);
380
+ const inner_args= this.parseArgumentsWithSeparator(tokens, inner_start + 1, inner_end, COMMA);
361
381
  return [
362
- inner_args,
382
+ new TypstLeftright(lr_token, { body: new TypstGroup(inner_args), left: tokens[inner_start], right: tokens[inner_end]}),
363
383
  end + 1,
364
- {leftDelim: tokens[inner_start].value, rightDelim: tokens[inner_end].value} as TypstLrData
365
384
  ];
366
385
  } else {
367
386
  const [args, end] = this.parseArguments(tokens, start);
368
387
  return [
369
- args,
388
+ new TypstLeftright(lr_token, { body: new TypstGroup(args), left: null, right: null }),
370
389
  end,
371
- {leftDelim: null, rightDelim: null} as TypstLrData,
372
390
  ];
373
391
  }
374
392
  }
375
393
 
376
394
  // start: the position of the left parentheses
377
- parseGroupsOfArguments(tokens: TypstToken[], start: number, newline_token = SEMICOLON): [TypstNode[][], TypstNamedParams, number] {
395
+ parseMatrix(tokens: TypstToken[], start: number, rowSepToken: TypstToken, cellSepToken: TypstToken): [TypstNode[][], TypstNamedParams, number] {
378
396
  const end = find_closing_match(tokens, start);
379
397
  tokens = tokens.slice(0, end);
380
398
 
@@ -384,16 +402,16 @@ export class TypstParser {
384
402
  let pos = start + 1;
385
403
  while (pos < end) {
386
404
  while(pos < end) {
387
- let next_stop = array_find(tokens, newline_token, pos);
405
+ let next_stop = array_find(tokens, rowSepToken, pos);
388
406
  if (next_stop === -1) {
389
407
  next_stop = end;
390
408
  }
391
409
 
392
- let row = this.parseCommaSeparatedArguments(tokens, pos, next_stop);
410
+ let row = this.parseArgumentsWithSeparator(tokens, pos, next_stop, cellSepToken);
393
411
  let np: TypstNamedParams = {};
394
412
 
395
413
  function extract_named_params(arr: TypstNode[]): [TypstNode[], TypstNamedParams] {
396
- const COLON = new TypstNode('atom', ':');
414
+ const COLON = new TypstToken(TypstTokenType.ELEMENT, ':').toNode();
397
415
  const np: TypstNamedParams = {};
398
416
 
399
417
  const to_delete: number[] = [];
@@ -402,18 +420,18 @@ export class TypstParser {
402
420
  continue;
403
421
  }
404
422
 
405
- const g = arr[i];
406
- const pos_colon = array_find(g.args!, COLON);
423
+ const g = arr[i] as TypstGroup;
424
+ const pos_colon = array_find(g.items, COLON);
407
425
  if(pos_colon === -1 || pos_colon === 0) {
408
426
  continue;
409
427
  }
410
428
  to_delete.push(i);
411
- const param_name = g.args![pos_colon - 1];
412
- if(param_name.eq(new TypstNode('symbol', 'delim'))) {
413
- if(g.args!.length !== 3) {
429
+ const param_name = g.items[pos_colon - 1];
430
+ if(param_name.eq(new TypstToken(TypstTokenType.SYMBOL, 'delim').toNode())) {
431
+ if(g.items.length !== 3) {
414
432
  throw new TypstParserError('Invalid number of arguments for delim');
415
433
  }
416
- np['delim'] = g.args![pos_colon + 1];
434
+ np['delim'] = g.items[pos_colon + 1];
417
435
  } else {
418
436
  throw new TypstParserError('Not implemented for other named parameters');
419
437
  }
@@ -435,13 +453,13 @@ export class TypstParser {
435
453
  }
436
454
 
437
455
  // start: the position of the first token of arguments
438
- parseCommaSeparatedArguments(tokens: TypstToken[], start: number, end: number): TypstNode[] {
456
+ parseArgumentsWithSeparator(tokens: TypstToken[], start: number, end: number, sepToken: TypstToken): TypstNode[] {
439
457
  const args: TypstNode[] = [];
440
458
  let pos = start;
441
459
  while (pos < end) {
442
460
  let nodes: TypstNode[] = [];
443
461
  while(pos < end) {
444
- if(tokens[pos].eq(COMMA)) {
462
+ if(tokens[pos].eq(sepToken)) {
445
463
  pos += 1;
446
464
  break;
447
465
  } else if(tokens[pos].eq(SINGLE_SPACE)) {
@@ -1,85 +1,86 @@
1
- import { TypstToken, TypstTokenType } from "./types";
2
- import { reverseShorthandMap } from "./typst-shorthands";
3
- import { JSLex, Scanner } from "./jslex";
4
-
5
- const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
6
-
7
-
8
- function generate_regex_for_shorthands(): string {
9
- const regex_list = TYPST_SHORTHANDS.map((s) => {
10
- s = s.replaceAll('|', '\\|');
11
- s = s.replaceAll('.', '\\.');
12
- s = s.replaceAll('[', '\\[');
13
- s = s.replaceAll(']', '\\]');
14
- return s;
15
- });
16
- return `(${regex_list.join('|')})`;
17
- }
18
-
19
-
20
- const REGEX_SHORTHANDS = generate_regex_for_shorthands();
21
-
22
- const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
23
- [String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
24
- [String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
25
- [String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
26
- [String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
27
- [String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
28
- [String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
29
- [String.raw`\\\n`, (s) => {
30
- return [
31
- new TypstToken(TypstTokenType.CONTROL, "\\"),
32
- new TypstToken(TypstTokenType.NEWLINE, "\n"),
33
- ]
34
- }],
35
- [String.raw`\\\s`, (s) => {
36
- return [
37
- new TypstToken(TypstTokenType.CONTROL, "\\"),
38
- new TypstToken(TypstTokenType.SPACE, " "),
39
- ]
40
- }],
41
- // this backslash is dummy and will be ignored in later stages
42
- [String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
43
- [
44
- String.raw`"([^"]|(\\"))*"`,
45
- (s) => {
46
- const text = s.text()!.substring(1, s.text()!.length - 1);
47
- // replace all escape characters with their actual characters
48
- text.replaceAll('\\"', '"');
49
- return new TypstToken(TypstTokenType.TEXT, text);
50
- }
51
- ],
52
- [
53
- REGEX_SHORTHANDS,
54
- (s) => {
55
- const shorthand = s.text()!;
56
- const symbol = reverseShorthandMap.get(shorthand)!;
57
- return new TypstToken(TypstTokenType.SYMBOL, symbol);
58
- }
59
- ],
60
- [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
61
- [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
62
- [String.raw`#h\((.+?)\)`, (s) => {
63
- const match = s.reMatchArray()!;
64
- return [
65
- new TypstToken(TypstTokenType.SYMBOL, "#h"),
66
- new TypstToken(TypstTokenType.ELEMENT, "("),
67
- new TypstToken(TypstTokenType.LITERAL, match[1]),
68
- new TypstToken(TypstTokenType.ELEMENT, ")"),
69
- ];
70
- }],
71
- [String.raw`[a-zA-Z\.]+`, (s) => {
72
- return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
73
- }],
74
- [String.raw`#none`, (s) => new TypstToken(TypstTokenType.NONE, s.text()!)],
75
- [String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
76
- ]);
77
-
78
- const spec = {
79
- "start": rules_map
80
- };
81
-
82
- export function tokenize_typst(input: string): TypstToken[] {
83
- const lexer = new JSLex<TypstToken>(spec);
84
- return lexer.collect(input);
85
- }
1
+ import { TypstToken } from "./typst-types";
2
+ import { TypstTokenType } from "./typst-types";
3
+ import { reverseShorthandMap } from "./typst-shorthands";
4
+ import { JSLex, Scanner } from "./jslex";
5
+
6
+ const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
7
+
8
+
9
+ function generate_regex_for_shorthands(): string {
10
+ const regex_list = TYPST_SHORTHANDS.map((s) => {
11
+ s = s.replaceAll('|', '\\|');
12
+ s = s.replaceAll('.', '\\.');
13
+ s = s.replaceAll('[', '\\[');
14
+ s = s.replaceAll(']', '\\]');
15
+ return s;
16
+ });
17
+ return `(${regex_list.join('|')})`;
18
+ }
19
+
20
+
21
+ const REGEX_SHORTHANDS = generate_regex_for_shorthands();
22
+
23
+ const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
24
+ [String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
25
+ [String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
26
+ [String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
27
+ [String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
28
+ [String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
29
+ [String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
30
+ [String.raw`\\\n`, (s) => {
31
+ return [
32
+ new TypstToken(TypstTokenType.CONTROL, "\\"),
33
+ new TypstToken(TypstTokenType.NEWLINE, "\n"),
34
+ ]
35
+ }],
36
+ [String.raw`\\\s`, (s) => {
37
+ return [
38
+ new TypstToken(TypstTokenType.CONTROL, "\\"),
39
+ new TypstToken(TypstTokenType.SPACE, " "),
40
+ ]
41
+ }],
42
+ // this backslash is dummy and will be ignored in later stages
43
+ [String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
44
+ [
45
+ String.raw`"([^"]|(\\"))*"`,
46
+ (s) => {
47
+ const text = s.text()!.substring(1, s.text()!.length - 1);
48
+ // replace all escape characters with their actual characters
49
+ text.replaceAll('\\"', '"');
50
+ return new TypstToken(TypstTokenType.TEXT, text);
51
+ }
52
+ ],
53
+ [
54
+ REGEX_SHORTHANDS,
55
+ (s) => {
56
+ const shorthand = s.text()!;
57
+ const symbol = reverseShorthandMap.get(shorthand)!;
58
+ return new TypstToken(TypstTokenType.SYMBOL, symbol);
59
+ }
60
+ ],
61
+ [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
62
+ [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
63
+ [String.raw`#h\((.+?)\)`, (s) => {
64
+ const match = s.reMatchArray()!;
65
+ return [
66
+ new TypstToken(TypstTokenType.SYMBOL, "#h"),
67
+ new TypstToken(TypstTokenType.ELEMENT, "("),
68
+ new TypstToken(TypstTokenType.LITERAL, match[1]),
69
+ new TypstToken(TypstTokenType.ELEMENT, ")"),
70
+ ];
71
+ }],
72
+ [String.raw`#none`, (s) => new TypstToken(TypstTokenType.NONE, s.text()!)],
73
+ [String.raw`#?[a-zA-Z\.]+`, (s) => {
74
+ return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
75
+ }],
76
+ [String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
77
+ ]);
78
+
79
+ const spec = {
80
+ "start": rules_map
81
+ };
82
+
83
+ export function tokenize_typst(input: string): TypstToken[] {
84
+ const lexer = new JSLex<TypstToken>(spec);
85
+ return lexer.collect(input);
86
+ }