tex2typst 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13,38 +13,34 @@ function get_command_param_num(command) {
13
13
  return 0;
14
14
  }
15
15
  }
16
- function find_closing_curly_bracket(latex, start) {
17
- assert(latex[start] === "{");
16
+ function find_closing_curly_bracket(tokens, start) {
17
+ assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
18
18
  let count = 1;
19
19
  let pos = start + 1;
20
20
  while (count > 0) {
21
- if (pos >= latex.length) {
21
+ if (pos >= tokens.length) {
22
22
  throw new LatexParserError("Unmatched curly brackets");
23
23
  }
24
- if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
25
- pos += 2;
26
- continue;
27
- }
28
- if (latex[pos] === "{") {
24
+ if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
29
25
  count += 1;
30
- } else if (latex[pos] === "}") {
26
+ } else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
31
27
  count -= 1;
32
28
  }
33
29
  pos += 1;
34
30
  }
35
31
  return pos - 1;
36
32
  }
37
- function find_closing_square_bracket(latex, start) {
38
- assert(latex[start] === "[");
33
+ function find_closing_square_bracket(tokens, start) {
34
+ assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
39
35
  let count = 1;
40
36
  let pos = start + 1;
41
37
  while (count > 0) {
42
- if (pos >= latex.length) {
38
+ if (pos >= tokens.length) {
43
39
  throw new LatexParserError("Unmatched square brackets");
44
40
  }
45
- if (latex[pos] === "[") {
41
+ if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
46
42
  count += 1;
47
- } else if (latex[pos] === "]") {
43
+ } else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
48
44
  count -= 1;
49
45
  }
50
46
  pos += 1;
@@ -57,235 +53,208 @@ function isalpha(char) {
57
53
  function isdigit(char) {
58
54
  return "0123456789".includes(char);
59
55
  }
60
- function find_command(latex, start, command_name) {
61
- const len_slash_command = 1 + command_name.length;
56
+ function eat_whitespaces(tokens, start) {
62
57
  let pos = start;
63
- while (pos < latex.length) {
64
- pos = latex.indexOf("\\" + command_name, pos);
65
- if (pos === -1) {
66
- return -1;
67
- }
68
- if (pos + len_slash_command >= latex.length || !isalpha(latex[pos + len_slash_command])) {
69
- return pos;
70
- } else {
71
- pos += len_slash_command;
72
- }
58
+ while (pos < tokens.length && ["whitespace", "newline"].includes(tokens[pos].type)) {
59
+ pos++;
60
+ }
61
+ return tokens.slice(start, pos);
62
+ }
63
+ function eat_parenthesis(tokens, start) {
64
+ const firstToken = tokens[start];
65
+ if (firstToken.type === "element" && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
66
+ return firstToken;
67
+ } else if (firstToken.type === "command" && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
68
+ return firstToken;
69
+ } else {
70
+ return null;
71
+ }
72
+ }
73
+ function eat_primes(tokens, start) {
74
+ let pos = start;
75
+ while (pos < tokens.length && token_eq(tokens[pos], { type: "element", value: "'" })) {
76
+ pos += 1;
77
+ }
78
+ return pos - start;
79
+ }
80
+ function eat_command_name(latex, start) {
81
+ let pos = start;
82
+ while (pos < latex.length && isalpha(latex[pos])) {
83
+ pos += 1;
73
84
  }
74
- return -1;
85
+ return latex.substring(start, pos);
75
86
  }
76
- function find_closing_right_command(latex, start) {
87
+ function find_closing_right_command(tokens, start) {
77
88
  let count = 1;
78
89
  let pos = start;
79
90
  while (count > 0) {
80
- if (pos >= latex.length) {
81
- return -1;
82
- }
83
- const left_idx = find_command(latex, pos, "left");
84
- const right_idx = find_command(latex, pos, "right");
85
- if (right_idx === -1) {
91
+ if (pos >= tokens.length) {
86
92
  return -1;
87
93
  }
88
- if (left_idx === -1 || left_idx > right_idx) {
89
- count -= 1;
90
- pos = right_idx + "\\right".length;
91
- } else {
94
+ if (token_eq(tokens[pos], LEFT_COMMAND)) {
92
95
  count += 1;
93
- pos = left_idx + "\\left".length;
96
+ } else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
97
+ count -= 1;
94
98
  }
99
+ pos += 1;
95
100
  }
96
- return pos - "\\right".length;
101
+ return pos - 1;
97
102
  }
98
- function find_closing_end_command(latex, start) {
103
+ function find_closing_end_command(tokens, start) {
99
104
  let count = 1;
100
105
  let pos = start;
101
106
  while (count > 0) {
102
- if (pos >= latex.length) {
107
+ if (pos >= tokens.length) {
103
108
  return -1;
104
109
  }
105
- const begin_idx = find_command(latex, pos, "begin");
106
- const end_idx = find_command(latex, pos, "end");
107
- if (end_idx === -1) {
108
- return -1;
109
- }
110
- if (begin_idx === -1 || begin_idx > end_idx) {
111
- count -= 1;
112
- pos = end_idx + "\\end".length;
113
- } else {
110
+ if (token_eq(tokens[pos], BEGIN_COMMAND)) {
114
111
  count += 1;
115
- pos = begin_idx + "\\begin".length;
112
+ } else if (token_eq(tokens[pos], END_COMMAND)) {
113
+ count -= 1;
116
114
  }
117
- }
118
- return pos - "\\end".length;
119
- }
120
- function eat_whitespaces(latex, start) {
121
- let pos = start;
122
- while (pos < latex.length && [" ", "\t", "\n"].includes(latex[pos])) {
123
- pos += 1;
124
- }
125
- return latex.substring(start, pos);
126
- }
127
- function eat_command_name(latex, start) {
128
- let pos = start;
129
- while (pos < latex.length && isalpha(latex[pos])) {
130
115
  pos += 1;
131
116
  }
132
- return latex.substring(start, pos);
133
- }
134
- function eat_parenthesis(latex, start) {
135
- if ("()[]|".includes(latex[start])) {
136
- return latex[start];
137
- } else if (start + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(start, start + 2))) {
138
- return latex.substring(start, start + 2);
139
- } else if (start + 6 < latex.length && ["\\lfloor", "\\rfloor"].includes(latex.substring(start, start + 7))) {
140
- return latex.substring(start, start + 7);
141
- } else if (start + 5 < latex.length && ["\\lceil", "\\rceil"].includes(latex.substring(start, start + 6))) {
142
- return latex.substring(start, start + 6);
143
- } else if (start + 6 < latex.length && ["\\langle", "\\rangle"].includes(latex.substring(start, start + 7))) {
144
- return latex.substring(start, start + 7);
145
- } else {
146
- return null;
147
- }
117
+ return pos - 1;
148
118
  }
149
- function eat_primes(latex, start) {
150
- let pos = start;
151
- while (pos < latex.length && latex[pos] === "'") {
119
+ function find_closing_curly_bracket_char(latex, start) {
120
+ assert(latex[start] === "{");
121
+ let count = 1;
122
+ let pos = start + 1;
123
+ while (count > 0) {
124
+ if (pos >= latex.length) {
125
+ throw new LatexParserError("Unmatched curly brackets");
126
+ }
127
+ if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
128
+ pos += 2;
129
+ continue;
130
+ }
131
+ if (latex[pos] === "{") {
132
+ count += 1;
133
+ } else if (latex[pos] === "}") {
134
+ count -= 1;
135
+ }
152
136
  pos += 1;
153
137
  }
154
- return pos - start;
138
+ return pos - 1;
155
139
  }
156
- function latexNodeToTexNode(node) {
157
- try {
158
- let res = {};
159
- switch (node.type) {
160
- case "ordgroup":
161
- res.type = "ordgroup";
162
- res.args = node.args.map((n) => latexNodeToTexNode(n));
163
- if (res.args.length === 1) {
164
- res = res.args[0];
140
+ function tokenize(latex) {
141
+ const tokens = [];
142
+ let pos = 0;
143
+ while (pos < latex.length) {
144
+ const firstChar = latex[pos];
145
+ let token;
146
+ switch (firstChar) {
147
+ case "%": {
148
+ let newPos = pos + 1;
149
+ while (newPos < latex.length && latex[newPos] !== "\n") {
150
+ newPos += 1;
165
151
  }
152
+ token = { type: "comment", value: latex.slice(pos + 1, newPos) };
153
+ pos = newPos;
166
154
  break;
167
- case "empty":
168
- res.type = "empty";
169
- res.content = "";
170
- break;
171
- case "atom":
172
- res.type = "atom";
173
- res.content = node.content;
155
+ }
156
+ case "{":
157
+ case "}":
158
+ case "_":
159
+ case "^":
160
+ case "&":
161
+ token = { type: "control", value: firstChar };
162
+ pos++;
174
163
  break;
175
- case "token":
176
- case "token-letter-var":
177
- case "token-number":
178
- case "token-operator":
179
- case "token-parenthesis":
180
- res.type = "symbol";
181
- res.content = node.content;
164
+ case "\n":
165
+ token = { type: "newline", value: firstChar };
166
+ pos++;
182
167
  break;
183
- case "supsub":
184
- res.type = "supsub";
185
- res.irregularData = {};
186
- if (node["base"]) {
187
- res.irregularData.base = latexNodeToTexNode(node["base"]);
188
- }
189
- if (node["sup"]) {
190
- res.irregularData.sup = latexNodeToTexNode(node["sup"]);
191
- }
192
- if (node["sub"]) {
193
- res.irregularData.sub = latexNodeToTexNode(node["sub"]);
168
+ case "\r": {
169
+ if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
170
+ token = { type: "newline", value: "\n" };
171
+ pos += 2;
172
+ } else {
173
+ token = { type: "newline", value: "\n" };
174
+ pos++;
194
175
  }
195
176
  break;
196
- case "leftright":
197
- res.type = "leftright";
198
- const body = latexNodeToTexNode(node.body);
199
- let left = node["left"];
200
- if (left === "\\{") {
201
- left = "{";
202
- }
203
- let right = node["right"];
204
- if (right === "\\}") {
205
- right = "}";
177
+ }
178
+ case " ": {
179
+ let newPos = pos;
180
+ while (newPos < latex.length && latex[newPos] === " ") {
181
+ newPos += 1;
206
182
  }
207
- const is_atom = (str) => ["(", ")", "[", "]", "{", "}"].includes(str);
208
- res.args = [
209
- { type: is_atom(left) ? "atom" : "symbol", content: left },
210
- body,
211
- { type: is_atom(right) ? "atom" : "symbol", content: right }
212
- ];
183
+ token = { type: "whitespace", value: latex.slice(pos, newPos) };
184
+ pos = newPos;
213
185
  break;
214
- case "beginend":
215
- if (node.content?.startsWith("align")) {
216
- res.type = "align";
217
- } else {
218
- res.type = "matrix";
186
+ }
187
+ case "\\": {
188
+ if (pos + 1 >= latex.length) {
189
+ throw new LatexParserError("Expecting command name after \\");
219
190
  }
220
- res.content = node.content;
221
- res.irregularData = node.body.map((row) => {
222
- return row.map((n) => latexNodeToTexNode(n));
223
- });
224
- break;
225
- case "command":
226
- const num_args = get_command_param_num(node.content);
227
- res.content = "\\" + node.content;
228
- if (num_args === 0) {
229
- res.type = "symbol";
230
- } else if (num_args === 1) {
231
- res.type = "unaryFunc";
232
- res.args = [
233
- latexNodeToTexNode(node.arg1)
234
- ];
235
- if (node.content === "sqrt") {
236
- if (node.exponent) {
237
- res.irregularData = latexNodeToTexNode(node.exponent);
238
- }
239
- }
240
- } else if (num_args === 2) {
241
- res.type = "binaryFunc";
242
- res.args = [
243
- latexNodeToTexNode(node.arg1),
244
- latexNodeToTexNode(node.arg2)
245
- ];
191
+ const firstTwoChars = latex.slice(pos, pos + 2);
192
+ if (firstTwoChars === "\\\\") {
193
+ token = { type: "control", value: "\\\\" };
194
+ pos += 2;
195
+ } else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
196
+ token = { type: "element", value: firstTwoChars };
197
+ pos += 2;
246
198
  } else {
247
- throw new LatexNodeToTexNodeError("Invalid number of arguments", node);
199
+ const command = eat_command_name(latex, pos + 1);
200
+ token = { type: "command", value: "\\" + command };
201
+ pos += 1 + command.length;
248
202
  }
249
203
  break;
250
- case "text":
251
- res.type = "text";
252
- res.content = node.content;
253
- break;
254
- case "comment":
255
- res.type = "comment";
256
- res.content = node.content;
257
- break;
258
- case "whitespace":
259
- res.type = "empty";
260
- break;
261
- case "newline":
262
- res.type = "newline";
263
- res.content = "\n";
264
- break;
265
- case "control":
266
- if (node.content === "\\\\") {
267
- res.type = "symbol";
268
- res.content = node.content;
269
- break;
204
+ }
205
+ default: {
206
+ if (isdigit(firstChar)) {
207
+ let newPos = pos;
208
+ while (newPos < latex.length && isdigit(latex[newPos])) {
209
+ newPos += 1;
210
+ }
211
+ token = { type: "element", value: latex.slice(pos, newPos) };
212
+ } else if (isalpha(firstChar)) {
213
+ token = { type: "element", value: firstChar };
214
+ } else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
215
+ token = { type: "element", value: firstChar };
270
216
  } else {
271
- throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
217
+ token = { type: "unknown", value: firstChar };
272
218
  }
273
- break;
274
- default:
275
- throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
219
+ pos += token.value.length;
220
+ }
221
+ }
222
+ tokens.push(token);
223
+ if (token.type === "command" && ["\\text", "\\begin", "\\end"].includes(token.value)) {
224
+ if (pos >= latex.length || latex[pos] !== "{") {
225
+ throw new LatexParserError(`No content for ${token.value} command`);
226
+ }
227
+ tokens.push({ type: "control", value: "{" });
228
+ const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
229
+ pos++;
230
+ let textInside = latex.slice(pos, posClosingBracket);
231
+ const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
232
+ for (const char of chars) {
233
+ textInside = textInside.replaceAll("\\" + char, char);
234
+ }
235
+ tokens.push({ type: "text", value: textInside });
236
+ tokens.push({ type: "control", value: "}" });
237
+ pos = posClosingBracket + 1;
276
238
  }
277
- return res;
278
- } catch (e) {
279
- throw e;
280
239
  }
240
+ return tokens;
241
+ }
242
+ function token_eq(token1, token2) {
243
+ return token1.type == token2.type && token1.value == token2.value;
281
244
  }
282
245
  function parseTex(tex, customTexMacros) {
283
246
  const parser = new LatexParser;
284
- for (const [macro, replacement] of Object.entries(customTexMacros)) {
285
- tex = tex.replaceAll(macro, replacement);
247
+ const original_tokens = tokenize(tex);
248
+ let processed_tokens = [];
249
+ for (const token of original_tokens) {
250
+ if (token.type === "command" && customTexMacros[token.value]) {
251
+ const expanded_tokens = tokenize(customTexMacros[token.value]);
252
+ processed_tokens = processed_tokens.concat(expanded_tokens);
253
+ } else {
254
+ processed_tokens.push(token);
255
+ }
286
256
  }
287
- const node = parser.parse(tex);
288
- return latexNodeToTexNode(node);
257
+ return parser.parse(processed_tokens);
289
258
  }
290
259
  var UNARY_COMMANDS = [
291
260
  "sqrt",
@@ -337,6 +306,14 @@ var BINARY_COMMANDS = [
337
306
  "tbinom"
338
307
  ];
339
308
  var EMPTY_NODE = { type: "empty", content: "" };
309
+ var LEFT_CURLY_BRACKET = { type: "control", value: "{" };
310
+ var RIGHT_CURLY_BRACKET = { type: "control", value: "}" };
311
+ var LEFT_SQUARE_BRACKET = { type: "element", value: "[" };
312
+ var RIGHT_SQUARE_BRACKET = { type: "element", value: "]" };
313
+ var LEFT_COMMAND = { type: "command", value: "\\left" };
314
+ var RIGHT_COMMAND = { type: "command", value: "\\right" };
315
+ var BEGIN_COMMAND = { type: "command", value: "\\begin" };
316
+ var END_COMMAND = { type: "command", value: "\\end" };
340
317
 
341
318
  class LatexParserError extends Error {
342
319
  constructor(message) {
@@ -344,6 +321,8 @@ class LatexParserError extends Error {
344
321
  this.name = "LatexParserError";
345
322
  }
346
323
  }
324
+ var SUB_SYMBOL = { type: "control", value: "_" };
325
+ var SUP_SYMBOL = { type: "control", value: "^" };
347
326
 
348
327
  class LatexParser {
349
328
  space_sensitive;
@@ -352,69 +331,80 @@ class LatexParser {
352
331
  this.space_sensitive = space_sensitive;
353
332
  this.newline_sensitive = newline_sensitive;
354
333
  }
355
- parse(latex) {
334
+ parse(tokens) {
356
335
  const results = [];
357
336
  let pos = 0;
358
- while (pos < latex.length) {
359
- const [res, newPos] = this.parseNextExpr(latex, pos);
360
- pos = newPos;
361
- if (!this.space_sensitive && res.type === "whitespace") {
362
- continue;
363
- }
364
- if (!this.newline_sensitive && res.type === "newline") {
365
- continue;
337
+ while (pos < tokens.length) {
338
+ const results2 = [];
339
+ let pos2 = 0;
340
+ while (pos2 < tokens.length) {
341
+ const [res, newPos] = this.parseNextExpr(tokens, pos2);
342
+ pos2 = newPos;
343
+ if (!this.space_sensitive && res.type === "whitespace") {
344
+ continue;
345
+ }
346
+ if (!this.newline_sensitive && res.type === "newline") {
347
+ continue;
348
+ }
349
+ if (res.type === "control" && res.content === "&") {
350
+ throw new LatexParserError("Unexpected & outside of an alignment");
351
+ }
352
+ results2.push(res);
366
353
  }
367
- if (res.type === "control" && res.content === "&") {
368
- throw new LatexParserError("Unexpected & outside of an alignment");
354
+ if (results2.length === 0) {
355
+ return EMPTY_NODE;
356
+ } else if (results2.length === 1) {
357
+ return results2[0];
358
+ } else {
359
+ return { type: "ordgroup", content: "", args: results2 };
369
360
  }
370
- results.push(res);
371
361
  }
372
362
  if (results.length === 0) {
373
363
  return EMPTY_NODE;
374
364
  } else if (results.length === 1) {
375
365
  return results[0];
376
366
  } else {
377
- return { type: "ordgroup", args: results };
367
+ return { type: "ordgroup", content: "", args: results };
378
368
  }
379
369
  }
380
- parseNextExpr(latex, start) {
381
- let [base, pos] = this.parseNextExprWithoutSupSub(latex, start);
370
+ parseNextExpr(tokens, start) {
371
+ let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
382
372
  let sub = null;
383
373
  let sup = null;
384
374
  let num_prime = 0;
385
- num_prime += eat_primes(latex, pos);
375
+ num_prime += eat_primes(tokens, pos);
386
376
  pos += num_prime;
387
- if (pos < latex.length && latex[pos] === "_") {
388
- [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
389
- num_prime += eat_primes(latex, pos);
377
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
378
+ [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
379
+ num_prime += eat_primes(tokens, pos);
390
380
  pos += num_prime;
391
- if (pos < latex.length && latex[pos] === "^") {
392
- [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
393
- if (eat_primes(latex, pos) > 0) {
381
+ if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
382
+ [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
383
+ if (eat_primes(tokens, pos) > 0) {
394
384
  throw new LatexParserError("Double superscript");
395
385
  }
396
386
  }
397
- } else if (pos < latex.length && latex[pos] === "^") {
398
- [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
399
- if (eat_primes(latex, pos) > 0) {
387
+ } else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
388
+ [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
389
+ if (eat_primes(tokens, pos) > 0) {
400
390
  throw new LatexParserError("Double superscript");
401
391
  }
402
- if (pos < latex.length && latex[pos] === "_") {
403
- [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
404
- if (eat_primes(latex, pos) > 0) {
392
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
393
+ [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
394
+ if (eat_primes(tokens, pos) > 0) {
405
395
  throw new LatexParserError("Double superscript");
406
396
  }
407
397
  }
408
398
  }
409
399
  if (sub !== null || sup !== null || num_prime > 0) {
410
- const res = { type: "supsub", base };
400
+ const res = { base };
411
401
  if (sub) {
412
402
  res.sub = sub;
413
403
  }
414
404
  if (num_prime > 0) {
415
- res.sup = { type: "ordgroup", args: [] };
405
+ res.sup = { type: "ordgroup", content: "", args: [] };
416
406
  for (let i = 0;i < num_prime; i++) {
417
- res.sup.args.push({ type: "command", content: "prime" });
407
+ res.sup.args.push({ type: "symbol", content: "\\prime" });
418
408
  }
419
409
  if (sup) {
420
410
  res.sup.args.push(sup);
@@ -425,194 +415,183 @@ class LatexParser {
425
415
  } else if (sup) {
426
416
  res.sup = sup;
427
417
  }
428
- return [res, pos];
418
+ return [{ type: "supsub", content: "", data: res }, pos];
429
419
  } else {
430
420
  return [base, pos];
431
421
  }
432
422
  }
433
- parseNextExprWithoutSupSub(latex, start) {
434
- const firstChar = latex[start];
435
- if (firstChar === "{") {
436
- const posClosingBracket = find_closing_curly_bracket(latex, start);
437
- const exprInside = latex.slice(start + 1, posClosingBracket);
438
- return [this.parse(exprInside), posClosingBracket + 1];
439
- } else if (firstChar === "\\") {
440
- if (start + 1 >= latex.length) {
441
- throw new LatexParserError("Expecting command name after \\");
442
- }
443
- const firstTwoChars = latex.slice(start, start + 2);
444
- if (firstTwoChars === "\\\\") {
445
- return [{ type: "control", content: "\\\\" }, start + 2];
446
- } else if (firstTwoChars === "\\{" || firstTwoChars === "\\}") {
447
- return [{ type: "token-parenthesis", content: firstTwoChars }, start + 2];
448
- } else if (["\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
449
- return [{ type: "token", content: firstTwoChars }, start + 2];
450
- } else if (latex.slice(start).startsWith("\\begin{")) {
451
- return this.parseBeginEndExpr(latex, start);
452
- } else if (latex.slice(start).startsWith("\\left") && (start + 5 >= latex.length || !isalpha(latex[start + 5]))) {
453
- return this.parseLeftRightExpr(latex, start);
454
- } else {
455
- return this.parseCommandExpr(latex, start);
456
- }
457
- } else if (firstChar === "%") {
458
- let pos = start + 1;
459
- while (pos < latex.length && latex[pos] !== "\n") {
460
- pos += 1;
461
- }
462
- return [{ type: "comment", content: latex.slice(start + 1, pos) }, pos];
463
- } else if (isdigit(firstChar)) {
464
- let pos = start;
465
- while (pos < latex.length && isdigit(latex[pos])) {
466
- pos += 1;
467
- }
468
- return [{ type: "token-number", content: latex.slice(start, pos) }, pos];
469
- } else if (isalpha(firstChar)) {
470
- return [{ type: "token-letter-var", content: firstChar }, start + 1];
471
- } else if ("+-*/=<>!".includes(firstChar)) {
472
- return [{ type: "token-operator", content: firstChar }, start + 1];
473
- } else if (".,;?".includes(firstChar)) {
474
- return [{ type: "atom", content: firstChar }, start + 1];
475
- } else if ("()[]".includes(firstChar)) {
476
- return [{ type: "token-parenthesis", content: firstChar }, start + 1];
477
- } else if (firstChar === "_") {
478
- let [sub, pos] = this.parseNextExpr(latex, start + 1);
479
- let sup = undefined;
480
- if (pos < latex.length && latex[pos] === "^") {
481
- [sup, pos] = this.parseNextExpr(latex, pos + 1);
482
- }
483
- return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
484
- } else if (firstChar === "^") {
485
- let [sup, pos] = this.parseNextExpr(latex, start + 1);
486
- let sub = undefined;
487
- if (pos < latex.length && latex[pos] === "_") {
488
- [sub, pos] = this.parseNextExpr(latex, pos + 1);
489
- }
490
- return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
491
- } else if (firstChar === " ") {
492
- let pos = start;
493
- while (pos < latex.length && latex[pos] === " ") {
494
- pos += 1;
495
- }
496
- return [{ type: "whitespace", content: latex.slice(start, pos) }, pos];
497
- } else if (firstChar === "\n") {
498
- return [{ type: "newline", content: "\n" }, start + 1];
499
- } else if (firstChar === "\r") {
500
- if (start + 1 < latex.length && latex[start + 1] === "\n") {
501
- return [{ type: "newline", content: "\n" }, start + 2];
502
- } else {
503
- return [{ type: "newline", content: "\n" }, start + 1];
504
- }
505
- } else if (firstChar === "&") {
506
- return [{ type: "control", content: "&" }, start + 1];
507
- } else {
508
- return [{ type: "unknown", content: firstChar }, start + 1];
423
+ parseNextExprWithoutSupSub(tokens, start) {
424
+ const firstToken = tokens[start];
425
+ const tokenType = firstToken.type;
426
+ switch (tokenType) {
427
+ case "element":
428
+ case "text":
429
+ case "comment":
430
+ case "whitespace":
431
+ case "newline":
432
+ return [{ type: tokenType, content: firstToken.value }, start + 1];
433
+ case "command":
434
+ if (token_eq(firstToken, BEGIN_COMMAND)) {
435
+ return this.parseBeginEndExpr(tokens, start);
436
+ } else if (token_eq(firstToken, LEFT_COMMAND)) {
437
+ return this.parseLeftRightExpr(tokens, start);
438
+ } else {
439
+ return this.parseCommandExpr(tokens, start);
440
+ }
441
+ case "control":
442
+ const controlChar = firstToken.value;
443
+ switch (controlChar) {
444
+ case "{":
445
+ const posClosingBracket = find_closing_curly_bracket(tokens, start);
446
+ const exprInside = tokens.slice(start + 1, posClosingBracket);
447
+ return [this.parse(exprInside), posClosingBracket + 1];
448
+ case "}":
449
+ throw new LatexParserError("Unmatched '}'");
450
+ case "\\\\":
451
+ return [{ type: "control", content: "\\\\" }, start + 1];
452
+ case "_": {
453
+ let [sub, pos] = this.parseNextExpr(tokens, start + 1);
454
+ let sup = undefined;
455
+ if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
456
+ [sup, pos] = this.parseNextExpr(tokens, pos + 1);
457
+ }
458
+ const subData = { base: EMPTY_NODE, sub, sup };
459
+ return [{ type: "supsub", content: "", data: subData }, pos];
460
+ }
461
+ case "^": {
462
+ let [sup, pos] = this.parseNextExpr(tokens, start + 1);
463
+ let sub = undefined;
464
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
465
+ [sub, pos] = this.parseNextExpr(tokens, pos + 1);
466
+ }
467
+ const supData = { base: EMPTY_NODE, sub, sup };
468
+ return [{ type: "supsub", content: "", data: supData }, pos];
469
+ }
470
+ case "&":
471
+ return [{ type: "control", content: "&" }, start + 1];
472
+ default:
473
+ throw new LatexParserError("Unknown control sequence");
474
+ }
475
+ default:
476
+ throw new LatexParserError("Unknown token type");
509
477
  }
510
478
  }
511
- parseCommandExpr(latex, start) {
512
- assert(latex[start] === "\\");
479
+ parseCommandExpr(tokens, start) {
480
+ assert(tokens[start].type === "command");
481
+ const command = tokens[start].value;
513
482
  let pos = start + 1;
514
- const command = eat_command_name(latex, pos);
515
- pos += command.length;
516
- const paramNum = get_command_param_num(command);
483
+ if (["left", "right", "begin", "end"].includes(command.slice(1))) {
484
+ throw new LatexParserError("Unexpected command: " + command);
485
+ }
486
+ const paramNum = get_command_param_num(command.slice(1));
517
487
  if (paramNum === 0) {
518
- return [{ type: "command", content: command }, pos];
488
+ return [{ type: "symbol", content: command }, pos];
519
489
  } else if (paramNum === 1) {
520
- if (command === "sqrt" && pos < latex.length && latex[pos] === "[") {
490
+ if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
521
491
  const posLeftSquareBracket = pos;
522
- const posRightSquareBracket = find_closing_square_bracket(latex, pos);
523
- const exprInside = latex.slice(posLeftSquareBracket + 1, posRightSquareBracket);
492
+ const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
493
+ const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
524
494
  const exponent = this.parse(exprInside);
525
- const [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, posRightSquareBracket + 1);
526
- return [{ type: "command", content: command, arg1, exponent }, newPos];
527
- } else if (command === "text") {
528
- assert(latex[pos] === "{");
529
- const posClosingBracket = find_closing_curly_bracket(latex, pos);
530
- const text = latex.slice(pos + 1, posClosingBracket);
531
- return [{ type: "text", content: text }, posClosingBracket + 1];
532
- } else {
533
- let [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, pos);
534
- return [{ type: "command", content: command, arg1 }, newPos];
495
+ const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
496
+ return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
497
+ } else if (command === "\\text") {
498
+ if (pos + 2 >= tokens.length) {
499
+ throw new LatexParserError("Expecting content for \\text command");
500
+ }
501
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
502
+ assert(tokens[pos + 1].type === "text");
503
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
504
+ const text = tokens[pos + 1].value;
505
+ return [{ type: "text", content: text }, pos + 3];
535
506
  }
507
+ let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
508
+ return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
536
509
  } else if (paramNum === 2) {
537
- const [arg1, pos1] = this.parseNextExprWithoutSupSub(latex, pos);
538
- const [arg2, pos2] = this.parseNextExprWithoutSupSub(latex, pos1);
539
- return [{ type: "command", content: command, arg1, arg2 }, pos2];
510
+ const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
511
+ const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
512
+ return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
540
513
  } else {
541
514
  throw new Error("Invalid number of parameters");
542
515
  }
543
516
  }
544
- parseLeftRightExpr(latex, start) {
545
- assert(latex.slice(start, start + 5) === "\\left");
546
- let pos = start + "\\left".length;
547
- pos += eat_whitespaces(latex, pos).length;
548
- if (pos >= latex.length) {
517
+ parseLeftRightExpr(tokens, start) {
518
+ assert(token_eq(tokens[start], LEFT_COMMAND));
519
+ let pos = start + 1;
520
+ pos += eat_whitespaces(tokens, pos).length;
521
+ if (pos >= tokens.length) {
549
522
  throw new LatexParserError("Expecting delimiter after \\left");
550
523
  }
551
- const leftDelimiter = eat_parenthesis(latex, pos);
524
+ const leftDelimiter = eat_parenthesis(tokens, pos);
552
525
  if (leftDelimiter === null) {
553
526
  throw new LatexParserError("Invalid delimiter after \\left");
554
527
  }
555
- pos += leftDelimiter.length;
528
+ pos++;
556
529
  const exprInsideStart = pos;
557
- const idx = find_closing_right_command(latex, pos);
530
+ const idx = find_closing_right_command(tokens, pos);
558
531
  if (idx === -1) {
559
532
  throw new LatexParserError("No matching \\right");
560
533
  }
561
534
  const exprInsideEnd = idx;
562
- pos = idx + "\\right".length;
563
- pos += eat_whitespaces(latex, pos).length;
564
- if (pos >= latex.length) {
565
- throw new LatexParserError("Expecting delimiter after \\right");
535
+ pos = idx + 1;
536
+ pos += eat_whitespaces(tokens, pos).length;
537
+ if (pos >= tokens.length) {
538
+ throw new LatexParserError("Expecting \\right after \\left");
566
539
  }
567
- const rightDelimiter = eat_parenthesis(latex, pos);
540
+ const rightDelimiter = eat_parenthesis(tokens, pos);
568
541
  if (rightDelimiter === null) {
569
542
  throw new LatexParserError("Invalid delimiter after \\right");
570
543
  }
571
- pos += rightDelimiter.length;
572
- const exprInside = latex.slice(exprInsideStart, exprInsideEnd);
544
+ pos++;
545
+ const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
573
546
  const body = this.parse(exprInside);
574
- const res = { type: "leftright", left: leftDelimiter, right: rightDelimiter, body };
547
+ const args = [
548
+ { type: "element", content: leftDelimiter.value },
549
+ body,
550
+ { type: "element", content: rightDelimiter.value }
551
+ ];
552
+ const res = { type: "leftright", content: "", args };
575
553
  return [res, pos];
576
554
  }
577
- parseBeginEndExpr(latex, start) {
578
- assert(latex.slice(start, start + 7) === "\\begin{");
579
- let pos = start + "\\begin".length;
580
- const idx = find_closing_curly_bracket(latex, pos);
581
- if (idx === -1) {
582
- throw new LatexParserError("No matching } after \\begin{");
583
- }
584
- const envName = latex.slice(pos + 1, idx);
585
- pos = idx + 1;
586
- pos += eat_whitespaces(latex, pos).length;
555
+ parseBeginEndExpr(tokens, start) {
556
+ assert(token_eq(tokens[start], BEGIN_COMMAND));
557
+ let pos = start + 1;
558
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
559
+ assert(tokens[pos + 1].type === "text");
560
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
561
+ const envName = tokens[pos + 1].value;
562
+ pos += 3;
563
+ pos += eat_whitespaces(tokens, pos).length;
587
564
  const exprInsideStart = pos;
588
- const endIdx = find_closing_end_command(latex, pos);
565
+ const endIdx = find_closing_end_command(tokens, pos);
589
566
  if (endIdx === -1) {
590
567
  throw new LatexParserError("No matching \\end");
591
568
  }
592
569
  const exprInsideEnd = endIdx;
593
- pos = endIdx + "\\end".length;
594
- const closingIdx = find_closing_curly_bracket(latex, pos);
595
- if (closingIdx === -1) {
596
- throw new LatexParserError("No matching } after \\end{");
597
- }
598
- if (latex.slice(pos + 1, closingIdx) !== envName) {
570
+ pos = endIdx + 1;
571
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
572
+ assert(tokens[pos + 1].type === "text");
573
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
574
+ if (tokens[pos + 1].value !== envName) {
599
575
  throw new LatexParserError("Mismatched \\begin and \\end environments");
600
576
  }
601
- let exprInside = latex.slice(exprInsideStart, exprInsideEnd);
602
- exprInside = exprInside.trimEnd();
577
+ pos += 3;
578
+ const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
579
+ while (exprInside.length > 0 && ["whitespace", "newline"].includes(exprInside[exprInside.length - 1].type)) {
580
+ exprInside.pop();
581
+ }
603
582
  const body = this.parseAligned(exprInside);
604
- const res = { type: "beginend", content: envName, body };
605
- return [res, closingIdx + 1];
583
+ const res = { type: "beginend", content: envName, data: body };
584
+ return [res, pos];
606
585
  }
607
- parseAligned(latex) {
586
+ parseAligned(tokens) {
608
587
  let pos = 0;
609
588
  const allRows = [];
610
589
  let row = [];
611
590
  allRows.push(row);
612
- let group = { type: "ordgroup", args: [] };
591
+ let group = { type: "ordgroup", content: "", args: [] };
613
592
  row.push(group);
614
- while (pos < latex.length) {
615
- const [res, newPos] = this.parseNextExpr(latex, pos);
593
+ while (pos < tokens.length) {
594
+ const [res, newPos] = this.parseNextExpr(tokens, pos);
616
595
  pos = newPos;
617
596
  if (res.type === "whitespace") {
618
597
  continue;
@@ -620,11 +599,11 @@ class LatexParser {
620
599
  continue;
621
600
  } else if (res.type === "control" && res.content === "\\\\") {
622
601
  row = [];
623
- group = { type: "ordgroup", args: [] };
602
+ group = { type: "ordgroup", content: "", args: [] };
624
603
  row.push(group);
625
604
  allRows.push(row);
626
605
  } else if (res.type === "control" && res.content === "&") {
627
- group = { type: "ordgroup", args: [] };
606
+ group = { type: "ordgroup", content: "", args: [] };
628
607
  row.push(group);
629
608
  } else {
630
609
  group.args.push(res);
@@ -634,15 +613,6 @@ class LatexParser {
634
613
  }
635
614
  }
636
615
 
637
- class LatexNodeToTexNodeError extends Error {
638
- node;
639
- constructor(message, node) {
640
- super(message);
641
- this.name = "LatexNodeToTexNodeError";
642
- this.node = node;
643
- }
644
- }
645
-
646
616
  // src/map.ts
647
617
  var symbolMap = new Map([
648
618
  ["gets", "arrow.l"],
@@ -988,22 +958,22 @@ class TypstWriter {
988
958
  this.buffer += str;
989
959
  }
990
960
  append(node) {
991
- if (node.type === "empty") {
961
+ if (node.type === "empty" || node.type === "whitespace") {
992
962
  return;
993
963
  } else if (node.type === "ordgroup") {
994
964
  node.args.forEach((arg) => this.append(arg));
995
- } else if (node.type === "atom") {
965
+ } else if (node.type === "element") {
996
966
  let content = node.content;
997
967
  if (node.content === "," && this.insideFunctionDepth > 0) {
998
968
  content = "comma";
999
969
  }
1000
- this.queue.push({ type: "atom", content });
970
+ this.queue.push({ type: "symbol", content });
1001
971
  } else if (node.type === "symbol") {
1002
972
  this.queue.push({ type: "symbol", content: node.content });
1003
973
  } else if (node.type === "text") {
1004
974
  this.queue.push(node);
1005
975
  } else if (node.type === "supsub") {
1006
- let { base, sup, sub } = node.irregularData;
976
+ let { base, sup, sub } = node.data;
1007
977
  if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
1008
978
  this.append({ type: "binaryFunc", content: "\\overbrace", args: [base.args[0], sup] });
1009
979
  return;
@@ -1035,7 +1005,7 @@ class TypstWriter {
1035
1005
  }
1036
1006
  } else if (node.type === "leftright") {
1037
1007
  const [left, body, right] = node.args;
1038
- if (["[]", "()", "{}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
1008
+ if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
1039
1009
  this.append(left);
1040
1010
  this.append(body);
1041
1011
  this.append(right);
@@ -1064,12 +1034,12 @@ class TypstWriter {
1064
1034
  } else if (node.type === "unaryFunc") {
1065
1035
  const func_symbol = { type: "symbol", content: node.content };
1066
1036
  const arg0 = node.args[0];
1067
- if (node.content === "\\sqrt" && node.irregularData) {
1037
+ if (node.content === "\\sqrt" && node.data) {
1068
1038
  func_symbol.content = "root";
1069
1039
  this.queue.push(func_symbol);
1070
1040
  this.insideFunctionDepth++;
1071
1041
  this.queue.push({ type: "atom", content: "(" });
1072
- this.append(node.irregularData);
1042
+ this.append(node.data);
1073
1043
  this.queue.push({ type: "atom", content: "," });
1074
1044
  this.append(arg0);
1075
1045
  this.queue.push({ type: "atom", content: ")" });
@@ -1090,7 +1060,7 @@ class TypstWriter {
1090
1060
  return;
1091
1061
  } else if (node.content === "\\mathbb") {
1092
1062
  const body = node.args[0];
1093
- if (body.type === "symbol" && /^[A-Z]$/.test(body.content)) {
1063
+ if (body.type === "element" && /^[A-Z]$/.test(body.content)) {
1094
1064
  this.queue.push({ type: "symbol", content: body.content + body.content });
1095
1065
  return;
1096
1066
  }
@@ -1122,49 +1092,58 @@ class TypstWriter {
1122
1092
  } else if (node.type === "newline") {
1123
1093
  this.queue.push({ type: "newline", content: "\n" });
1124
1094
  return;
1125
- } else if (node.type === "align") {
1126
- const matrix = node.irregularData;
1127
- matrix.forEach((row, i) => {
1128
- row.forEach((cell, j) => {
1129
- if (j > 0) {
1130
- this.queue.push({ type: "atom", content: "&" });
1095
+ } else if (node.type === "beginend") {
1096
+ if (node.content.startsWith("align")) {
1097
+ const matrix = node.data;
1098
+ matrix.forEach((row, i) => {
1099
+ row.forEach((cell, j) => {
1100
+ if (j > 0) {
1101
+ this.queue.push({ type: "atom", content: "&" });
1102
+ }
1103
+ this.append(cell);
1104
+ });
1105
+ if (i < matrix.length - 1) {
1106
+ this.queue.push({ type: "symbol", content: "\\\\" });
1131
1107
  }
1132
- this.append(cell);
1133
1108
  });
1134
- if (i < matrix.length - 1) {
1135
- this.queue.push({ type: "symbol", content: "\\\\" });
1136
- }
1137
- });
1138
- } else if (node.type === "matrix") {
1139
- const matrix = node.irregularData;
1140
- this.queue.push({ type: "symbol", content: "mat" });
1141
- this.insideFunctionDepth++;
1142
- this.queue.push({ type: "atom", content: "(" });
1143
- this.queue.push({ type: "symbol", content: "delim: #none, " });
1144
- matrix.forEach((row, i) => {
1145
- row.forEach((cell, j) => {
1146
- if (cell.type === "ordgroup" && cell.args.length === 0) {
1147
- this.queue.push({ type: "atom", content: "," });
1148
- return;
1149
- }
1150
- this.append(cell);
1151
- if (j < row.length - 1) {
1152
- this.queue.push({ type: "atom", content: "," });
1153
- } else {
1154
- if (i < matrix.length - 1) {
1155
- this.queue.push({ type: "atom", content: ";" });
1109
+ } else {
1110
+ const matrix = node.data;
1111
+ this.queue.push({ type: "symbol", content: "mat" });
1112
+ this.insideFunctionDepth++;
1113
+ this.queue.push({ type: "atom", content: "(" });
1114
+ this.queue.push({ type: "symbol", content: "delim: #none, " });
1115
+ matrix.forEach((row, i) => {
1116
+ row.forEach((cell, j) => {
1117
+ if (cell.type === "ordgroup" && cell.args.length === 0) {
1118
+ this.queue.push({ type: "atom", content: "," });
1119
+ return;
1156
1120
  }
1157
- }
1121
+ this.append(cell);
1122
+ if (j < row.length - 1) {
1123
+ this.queue.push({ type: "atom", content: "," });
1124
+ } else {
1125
+ if (i < matrix.length - 1) {
1126
+ this.queue.push({ type: "atom", content: ";" });
1127
+ }
1128
+ }
1129
+ });
1158
1130
  });
1159
- });
1160
- this.queue.push({ type: "atom", content: ")" });
1161
- this.insideFunctionDepth--;
1131
+ this.queue.push({ type: "atom", content: ")" });
1132
+ this.insideFunctionDepth--;
1133
+ }
1134
+ } else if (node.type === "matrix") {
1162
1135
  } else if (node.type === "unknownMacro") {
1163
1136
  if (this.nonStrict) {
1164
1137
  this.queue.push({ type: "symbol", content: node.content });
1165
1138
  } else {
1166
1139
  throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
1167
1140
  }
1141
+ } else if (node.type === "control") {
1142
+ if (node.content === "\\\\") {
1143
+ this.queue.push({ type: "symbol", content: node.content });
1144
+ } else {
1145
+ throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
1146
+ }
1168
1147
  } else if (node.type === "comment") {
1169
1148
  this.queue.push({ type: "comment", content: node.content });
1170
1149
  } else {
@@ -1204,9 +1183,7 @@ class TypstWriter {
1204
1183
  this.queue = [];
1205
1184
  }
1206
1185
  appendWithBracketsIfNeeded(node) {
1207
- const is_single_atom = node.type === "atom";
1208
- const is_single_function = node.type === "unaryFunc" || node.type === "binaryFunc" || node.type === "leftright";
1209
- const is_single = ["atom", "symbol", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
1186
+ const is_single = ["symbol", "element", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
1210
1187
  if (is_single) {
1211
1188
  this.append(node);
1212
1189
  } else {