tex2typst 0.1.20 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,73 +1,201 @@
1
1
  // src/parser.ts
2
- import katex from "katex";
3
- function katexNodeToTexNode(node) {
4
- try {
5
- if (node.loc) {
6
- delete node.loc;
2
+ function assert(condition, message = "") {
3
+ if (!condition) {
4
+ throw new LatexParserError(message);
5
+ }
6
+ }
7
+ function get_command_param_num(command) {
8
+ if (UNARY_COMMANDS.includes(command)) {
9
+ return 1;
10
+ } else if (BINARY_COMMANDS.includes(command)) {
11
+ return 2;
12
+ } else {
13
+ return 0;
14
+ }
15
+ }
16
+ function find_closing_curly_bracket(latex, start) {
17
+ assert(latex[start] === "{");
18
+ let count = 1;
19
+ let pos = start + 1;
20
+ while (count > 0) {
21
+ if (pos >= latex.length) {
22
+ throw new LatexParserError("Unmatched curly brackets");
23
+ }
24
+ if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
25
+ pos += 2;
26
+ continue;
27
+ }
28
+ if (latex[pos] === "{") {
29
+ count += 1;
30
+ } else if (latex[pos] === "}") {
31
+ count -= 1;
32
+ }
33
+ pos += 1;
34
+ }
35
+ return pos - 1;
36
+ }
37
+ function find_closing_square_bracket(latex, start) {
38
+ assert(latex[start] === "[");
39
+ let count = 1;
40
+ let pos = start + 1;
41
+ while (count > 0) {
42
+ if (pos >= latex.length) {
43
+ throw new LatexParserError("Unmatched square brackets");
44
+ }
45
+ if (latex[pos] === "[") {
46
+ count += 1;
47
+ } else if (latex[pos] === "]") {
48
+ count -= 1;
49
+ }
50
+ pos += 1;
51
+ }
52
+ return pos - 1;
53
+ }
54
+ function isalpha(char) {
55
+ return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
56
+ }
57
+ function isdigit(char) {
58
+ return "0123456789".includes(char);
59
+ }
60
+ function find_command(latex, start, command_name) {
61
+ const len_slash_command = 1 + command_name.length;
62
+ let pos = start;
63
+ while (pos < latex.length) {
64
+ pos = latex.indexOf("\\" + command_name, pos);
65
+ if (pos === -1) {
66
+ return -1;
67
+ }
68
+ if (pos + len_slash_command >= latex.length || !isalpha(latex[pos + len_slash_command])) {
69
+ return pos;
70
+ } else {
71
+ pos += len_slash_command;
72
+ }
73
+ }
74
+ return -1;
75
+ }
76
+ function find_closing_right_command(latex, start) {
77
+ let count = 1;
78
+ let pos = start;
79
+ while (count > 0) {
80
+ if (pos >= latex.length) {
81
+ return -1;
7
82
  }
83
+ const left_idx = find_command(latex, pos, "left");
84
+ const right_idx = find_command(latex, pos, "right");
85
+ if (right_idx === -1) {
86
+ return -1;
87
+ }
88
+ if (left_idx === -1 || left_idx > right_idx) {
89
+ count -= 1;
90
+ pos = right_idx + "\\right".length;
91
+ } else {
92
+ count += 1;
93
+ pos = left_idx + "\\left".length;
94
+ }
95
+ }
96
+ return pos - "\\right".length;
97
+ }
98
+ function find_closing_end_command(latex, start) {
99
+ let count = 1;
100
+ let pos = start;
101
+ while (count > 0) {
102
+ if (pos >= latex.length) {
103
+ return -1;
104
+ }
105
+ const begin_idx = find_command(latex, pos, "begin");
106
+ const end_idx = find_command(latex, pos, "end");
107
+ if (end_idx === -1) {
108
+ return -1;
109
+ }
110
+ if (begin_idx === -1 || begin_idx > end_idx) {
111
+ count -= 1;
112
+ pos = end_idx + "\\end".length;
113
+ } else {
114
+ count += 1;
115
+ pos = begin_idx + "\\begin".length;
116
+ }
117
+ }
118
+ return pos - "\\end".length;
119
+ }
120
+ function eat_whitespaces(latex, start) {
121
+ let pos = start;
122
+ while (pos < latex.length && [" ", "\t", "\n"].includes(latex[pos])) {
123
+ pos += 1;
124
+ }
125
+ return latex.substring(start, pos);
126
+ }
127
+ function eat_command_name(latex, start) {
128
+ let pos = start;
129
+ while (pos < latex.length && isalpha(latex[pos])) {
130
+ pos += 1;
131
+ }
132
+ return latex.substring(start, pos);
133
+ }
134
+ function eat_parenthesis(latex, start) {
135
+ if ("()[]|".includes(latex[start])) {
136
+ return latex[start];
137
+ } else if (start + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(start, start + 2))) {
138
+ return latex.substring(start, start + 2);
139
+ } else if (start + 6 < latex.length && ["\\lfloor", "\\rfloor"].includes(latex.substring(start, start + 7))) {
140
+ return latex.substring(start, start + 7);
141
+ } else if (start + 5 < latex.length && ["\\lceil", "\\rceil"].includes(latex.substring(start, start + 6))) {
142
+ return latex.substring(start, start + 6);
143
+ } else if (start + 6 < latex.length && ["\\langle", "\\rangle"].includes(latex.substring(start, start + 7))) {
144
+ return latex.substring(start, start + 7);
145
+ } else {
146
+ return null;
147
+ }
148
+ }
149
+ function eat_primes(latex, start) {
150
+ let pos = start;
151
+ while (pos < latex.length && latex[pos] === "'") {
152
+ pos += 1;
153
+ }
154
+ return pos - start;
155
+ }
156
+ function latexNodeToTexNode(node) {
157
+ try {
8
158
  let res = {};
9
159
  switch (node.type) {
160
+ case "ordgroup":
161
+ res.type = "ordgroup";
162
+ res.args = node.args.map((n) => latexNodeToTexNode(n));
163
+ if (res.args.length === 1) {
164
+ res = res.args[0];
165
+ }
166
+ break;
167
+ case "empty":
168
+ res.type = "empty";
169
+ res.content = "";
170
+ break;
10
171
  case "atom":
11
172
  res.type = "atom";
12
- res.content = node.text;
13
- if (node.text === "\\{" || node.text === "\\}") {
14
- res.content = node.text.substring(1);
15
- } else if (node.text.startsWith("\\")) {
16
- res.type = "symbol";
17
- }
173
+ res.content = node.content;
18
174
  break;
19
- case "mathord":
20
- case "textord":
21
- case "op":
22
- case "cr":
175
+ case "token":
176
+ case "token-letter-var":
177
+ case "token-number":
178
+ case "token-operator":
179
+ case "token-parenthesis":
23
180
  res.type = "symbol";
24
- res.content = node.text;
25
- if (node.type === "op") {
26
- res.content = node["name"];
27
- } else if (node.type === "cr") {
28
- res.content = "\\\\";
29
- }
30
- break;
31
- case "genfrac":
32
- res.type = "binaryFunc";
33
- if (node["leftDelim"] === "(" && node["rightDelim"] === ")") {
34
- res.content = "\\binom";
35
- } else {
36
- res.content = "\\frac";
37
- }
38
- res.args = [
39
- katexNodeToTexNode(node["numer"]),
40
- katexNodeToTexNode(node["denom"])
41
- ];
181
+ res.content = node.content;
42
182
  break;
43
183
  case "supsub":
44
184
  res.type = "supsub";
45
185
  res.irregularData = {};
46
186
  if (node["base"]) {
47
- res.irregularData.base = katexNodeToTexNode(node["base"]);
187
+ res.irregularData.base = latexNodeToTexNode(node["base"]);
48
188
  }
49
189
  if (node["sup"]) {
50
- res.irregularData.sup = katexNodeToTexNode(node["sup"]);
190
+ res.irregularData.sup = latexNodeToTexNode(node["sup"]);
51
191
  }
52
192
  if (node["sub"]) {
53
- res.irregularData.sub = katexNodeToTexNode(node["sub"]);
193
+ res.irregularData.sub = latexNodeToTexNode(node["sub"]);
54
194
  }
55
195
  break;
56
- case "mclass":
57
- case "ordgroup":
58
- res.type = "ordgroup";
59
- res.args = node.body.map((n) => katexNodeToTexNode(n));
60
- if (res.args.length === 1) {
61
- res = res.args[0];
62
- }
63
- break;
64
- case "leftright": {
65
- const body = katexNodeToTexNode({
66
- type: "ordgroup",
67
- mode: "math",
68
- body: node.body
69
- });
196
+ case "leftright":
70
197
  res.type = "leftright";
198
+ const body = latexNodeToTexNode(node.body);
71
199
  let left = node["left"];
72
200
  if (left === "\\{") {
73
201
  left = "{";
@@ -83,217 +211,434 @@ function katexNodeToTexNode(node) {
83
211
  { type: is_atom(right) ? "atom" : "symbol", content: right }
84
212
  ];
85
213
  break;
86
- }
87
- case "underline":
88
- case "overline":
89
- res.type = "unaryFunc";
90
- res.content = "\\" + node.type;
91
- res.args = [
92
- katexNodeToTexNode(node["body"])
93
- ];
94
- break;
95
- case "accent": {
96
- res.type = "unaryFunc";
97
- res.content = node["label"];
98
- res.args = [
99
- katexNodeToTexNode(node["base"])
100
- ];
101
- break;
102
- }
103
- case "sqrt":
104
- if (node["index"]) {
105
- res.irregularData = katexNodeToTexNode(node["index"]);
106
- }
107
- case "font":
108
- case "operatorname":
109
- res.type = "unaryFunc";
110
- res.content = "\\" + node.type;
111
- if (node.type === "font") {
112
- res.content = "\\" + node["font"];
214
+ case "beginend":
215
+ if (node.content?.startsWith("align")) {
216
+ res.type = "align";
217
+ } else {
218
+ res.type = "matrix";
113
219
  }
114
- if (Array.isArray(node.body)) {
115
- const obj = {
116
- type: "ordgroup",
117
- mode: "math",
118
- body: node.body
119
- };
220
+ res.content = node.content;
221
+ res.irregularData = node.body.map((row) => {
222
+ return row.map((n) => latexNodeToTexNode(n));
223
+ });
224
+ break;
225
+ case "command":
226
+ const num_args = get_command_param_num(node.content);
227
+ res.content = "\\" + node.content;
228
+ if (num_args === 0) {
229
+ res.type = "symbol";
230
+ } else if (num_args === 1) {
231
+ res.type = "unaryFunc";
120
232
  res.args = [
121
- katexNodeToTexNode(obj)
233
+ latexNodeToTexNode(node.arg1)
122
234
  ];
123
- } else {
235
+ if (node.content === "sqrt") {
236
+ if (node.exponent) {
237
+ res.irregularData = latexNodeToTexNode(node.exponent);
238
+ }
239
+ }
240
+ } else if (num_args === 2) {
241
+ res.type = "binaryFunc";
124
242
  res.args = [
125
- katexNodeToTexNode(node.body)
243
+ latexNodeToTexNode(node.arg1),
244
+ latexNodeToTexNode(node.arg2)
126
245
  ];
127
- }
128
- break;
129
- case "horizBrace":
130
- res.type = "unaryFunc";
131
- res.content = node["label"];
132
- res.args = [
133
- katexNodeToTexNode(node["base"])
134
- ];
135
- break;
136
- case "array":
137
- if (node["colSeparationType"] === "align") {
138
- res.type = "align";
139
246
  } else {
140
- res.type = "matrix";
247
+ throw new LatexNodeToTexNodeError("Invalid number of arguments", node);
141
248
  }
142
- res.irregularData = node.body.map((row) => {
143
- return row.map((cell) => {
144
- if (cell.type !== "styling" || cell.body.length !== 1) {
145
- throw new KatexNodeToTexNodeError("Expecting cell.type==='\\styling' and cell.body.length===1", cell);
146
- }
147
- return katexNodeToTexNode(cell.body[0]);
148
- });
149
- });
150
249
  break;
151
- case "text": {
250
+ case "text":
152
251
  res.type = "text";
153
- let str = "";
154
- node.body.forEach((n) => {
155
- if (n.mode !== "text") {
156
- throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node);
157
- }
158
- str += n.text;
159
- });
160
- res.content = str;
252
+ res.content = node.content;
161
253
  break;
162
- }
163
- case "spacing":
164
- case "kern":
254
+ case "comment":
255
+ res.type = "comment";
256
+ res.content = node.content;
257
+ break;
258
+ case "whitespace":
165
259
  res.type = "empty";
166
- res.content = " ";
167
260
  break;
168
- case "htmlmathml": {
169
- const element = node["mathml"][0]["body"][0];
170
- if (element && element.type === "textord" && element.text === "\u2260") {
261
+ case "newline":
262
+ res.type = "newline";
263
+ res.content = "\n";
264
+ break;
265
+ case "control":
266
+ if (node.content === "\\\\") {
171
267
  res.type = "symbol";
172
- res.content = "\\neq";
268
+ res.content = node.content;
173
269
  break;
174
270
  } else {
271
+ throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
175
272
  }
176
- }
177
- case "color":
178
- if (Array.isArray(node.body) && node.body.length === 1) {
179
- const sub_body = node.body[0];
180
- if (sub_body.type === "text") {
181
- res.type = "unknownMacro";
182
- const joined = sub_body.body.map((n) => n.text).join("");
183
- if (/^\\[a-zA-Z]+$/.test(joined)) {
184
- res.content = joined.substring(1);
185
- break;
186
- }
187
- }
188
- }
189
- throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
190
- case "comment":
191
- res.type = "comment";
192
- res.content = node.text;
193
273
  break;
194
274
  default:
195
- throw new KatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
196
- break;
275
+ throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
197
276
  }
198
277
  return res;
199
278
  } catch (e) {
200
279
  throw e;
201
280
  }
202
281
  }
203
- function splitTex(tex) {
204
- const lines = tex.split("\n");
205
- const out_tex_list = [];
206
- let current_tex = "";
207
- for (let i = 0;i < lines.length; i++) {
208
- const line = lines[i];
209
- let index = -1;
210
- while (index + 1 < line.length) {
211
- index = line.indexOf("%", index + 1);
212
- if (index === -1) {
213
- break;
282
+ function parseTex(tex, customTexMacros) {
283
+ const parser = new LatexParser;
284
+ for (const [macro, replacement] of Object.entries(customTexMacros)) {
285
+ tex = tex.replaceAll(macro, replacement);
286
+ }
287
+ const node = parser.parse(tex);
288
+ return latexNodeToTexNode(node);
289
+ }
290
+ var UNARY_COMMANDS = [
291
+ "sqrt",
292
+ "text",
293
+ "arccos",
294
+ "arcsin",
295
+ "arctan",
296
+ "arg",
297
+ "bar",
298
+ "bold",
299
+ "boldsymbol",
300
+ "ddot",
301
+ "det",
302
+ "dim",
303
+ "dot",
304
+ "exp",
305
+ "gcd",
306
+ "hat",
307
+ "ker",
308
+ "mathbb",
309
+ "mathbf",
310
+ "mathcal",
311
+ "mathscr",
312
+ "mathsf",
313
+ "mathtt",
314
+ "mathrm",
315
+ "max",
316
+ "min",
317
+ "mod",
318
+ "operatorname",
319
+ "overbrace",
320
+ "overline",
321
+ "pmb",
322
+ "sup",
323
+ "rm",
324
+ "tilde",
325
+ "underbrace",
326
+ "underline",
327
+ "vec",
328
+ "widehat",
329
+ "widetilde"
330
+ ];
331
+ var BINARY_COMMANDS = [
332
+ "frac",
333
+ "tfrac",
334
+ "binom",
335
+ "dbinom",
336
+ "dfrac",
337
+ "tbinom"
338
+ ];
339
+ var EMPTY_NODE = { type: "empty", content: "" };
340
+
341
+ class LatexParserError extends Error {
342
+ constructor(message) {
343
+ super(message);
344
+ this.name = "LatexParserError";
345
+ }
346
+ }
347
+
348
+ class LatexParser {
349
+ space_sensitive;
350
+ newline_sensitive;
351
+ constructor(space_sensitive = false, newline_sensitive = true) {
352
+ this.space_sensitive = space_sensitive;
353
+ this.newline_sensitive = newline_sensitive;
354
+ }
355
+ parse(latex) {
356
+ const results = [];
357
+ let pos = 0;
358
+ while (pos < latex.length) {
359
+ const [res, newPos] = this.parseNextExpr(latex, pos);
360
+ pos = newPos;
361
+ if (!this.space_sensitive && res.type === "whitespace") {
362
+ continue;
214
363
  }
215
- if (index === 0 || line[index - 1] !== "\\") {
216
- break;
364
+ if (!this.newline_sensitive && res.type === "newline") {
365
+ continue;
366
+ }
367
+ if (res.type === "control" && res.content === "&") {
368
+ throw new LatexParserError("Unexpected & outside of an alignment");
369
+ }
370
+ results.push(res);
371
+ }
372
+ if (results.length === 0) {
373
+ return EMPTY_NODE;
374
+ } else if (results.length === 1) {
375
+ return results[0];
376
+ } else {
377
+ return { type: "ordgroup", args: results };
378
+ }
379
+ }
380
+ parseNextExpr(latex, start) {
381
+ let [base, pos] = this.parseNextExprWithoutSupSub(latex, start);
382
+ let sub = null;
383
+ let sup = null;
384
+ let num_prime = 0;
385
+ num_prime += eat_primes(latex, pos);
386
+ pos += num_prime;
387
+ if (pos < latex.length && latex[pos] === "_") {
388
+ [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
389
+ num_prime += eat_primes(latex, pos);
390
+ pos += num_prime;
391
+ if (pos < latex.length && latex[pos] === "^") {
392
+ [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
393
+ if (eat_primes(latex, pos) > 0) {
394
+ throw new LatexParserError("Double superscript");
395
+ }
396
+ }
397
+ } else if (pos < latex.length && latex[pos] === "^") {
398
+ [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
399
+ if (eat_primes(latex, pos) > 0) {
400
+ throw new LatexParserError("Double superscript");
401
+ }
402
+ if (pos < latex.length && latex[pos] === "_") {
403
+ [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
404
+ if (eat_primes(latex, pos) > 0) {
405
+ throw new LatexParserError("Double superscript");
406
+ }
217
407
  }
218
408
  }
219
- if (index !== -1) {
220
- current_tex += line.substring(0, index);
221
- const comment = line.substring(index);
222
- out_tex_list.push(current_tex);
223
- current_tex = "";
224
- out_tex_list.push(comment);
409
+ if (sub !== null || sup !== null || num_prime > 0) {
410
+ const res = { type: "supsub", base };
411
+ if (sub) {
412
+ res.sub = sub;
413
+ }
414
+ if (num_prime > 0) {
415
+ res.sup = { type: "ordgroup", args: [] };
416
+ for (let i = 0;i < num_prime; i++) {
417
+ res.sup.args.push({ type: "command", content: "prime" });
418
+ }
419
+ if (sup) {
420
+ res.sup.args.push(sup);
421
+ }
422
+ if (res.sup.args.length === 1) {
423
+ res.sup = res.sup.args[0];
424
+ }
425
+ } else if (sup) {
426
+ res.sup = sup;
427
+ }
428
+ return [res, pos];
429
+ } else {
430
+ return [base, pos];
431
+ }
432
+ }
433
+ parseNextExprWithoutSupSub(latex, start) {
434
+ const firstChar = latex[start];
435
+ if (firstChar === "{") {
436
+ const posClosingBracket = find_closing_curly_bracket(latex, start);
437
+ const exprInside = latex.slice(start + 1, posClosingBracket);
438
+ return [this.parse(exprInside), posClosingBracket + 1];
439
+ } else if (firstChar === "\\") {
440
+ if (start + 1 >= latex.length) {
441
+ throw new LatexParserError("Expecting command name after \\");
442
+ }
443
+ const firstTwoChars = latex.slice(start, start + 2);
444
+ if (firstTwoChars === "\\\\") {
445
+ return [{ type: "control", content: "\\\\" }, start + 2];
446
+ } else if (firstTwoChars === "\\{" || firstTwoChars === "\\}") {
447
+ return [{ type: "token-parenthesis", content: firstTwoChars }, start + 2];
448
+ } else if (["\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
449
+ return [{ type: "token", content: firstTwoChars }, start + 2];
450
+ } else if (latex.slice(start).startsWith("\\begin{")) {
451
+ return this.parseBeginEndExpr(latex, start);
452
+ } else if (latex.slice(start).startsWith("\\left") && (start + 5 >= latex.length || !isalpha(latex[start + 5]))) {
453
+ return this.parseLeftRightExpr(latex, start);
454
+ } else {
455
+ return this.parseCommandExpr(latex, start);
456
+ }
457
+ } else if (firstChar === "%") {
458
+ let pos = start + 1;
459
+ while (pos < latex.length && latex[pos] !== "\n") {
460
+ pos += 1;
461
+ }
462
+ return [{ type: "comment", content: latex.slice(start + 1, pos) }, pos];
463
+ } else if (isdigit(firstChar)) {
464
+ let pos = start;
465
+ while (pos < latex.length && isdigit(latex[pos])) {
466
+ pos += 1;
467
+ }
468
+ return [{ type: "token-number", content: latex.slice(start, pos) }, pos];
469
+ } else if (isalpha(firstChar)) {
470
+ return [{ type: "token-letter-var", content: firstChar }, start + 1];
471
+ } else if ("+-*/=<>!".includes(firstChar)) {
472
+ return [{ type: "token-operator", content: firstChar }, start + 1];
473
+ } else if (".,;?".includes(firstChar)) {
474
+ return [{ type: "atom", content: firstChar }, start + 1];
475
+ } else if ("()[]".includes(firstChar)) {
476
+ return [{ type: "token-parenthesis", content: firstChar }, start + 1];
477
+ } else if (firstChar === "_") {
478
+ let [sub, pos] = this.parseNextExpr(latex, start + 1);
479
+ let sup = undefined;
480
+ if (pos < latex.length && latex[pos] === "^") {
481
+ [sup, pos] = this.parseNextExpr(latex, pos + 1);
482
+ }
483
+ return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
484
+ } else if (firstChar === "^") {
485
+ let [sup, pos] = this.parseNextExpr(latex, start + 1);
486
+ let sub = undefined;
487
+ if (pos < latex.length && latex[pos] === "_") {
488
+ [sub, pos] = this.parseNextExpr(latex, pos + 1);
489
+ }
490
+ return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
491
+ } else if (firstChar === " ") {
492
+ let pos = start;
493
+ while (pos < latex.length && latex[pos] === " ") {
494
+ pos += 1;
495
+ }
496
+ return [{ type: "whitespace", content: latex.slice(start, pos) }, pos];
497
+ } else if (firstChar === "\n") {
498
+ return [{ type: "newline", content: "\n" }, start + 1];
499
+ } else if (firstChar === "\r") {
500
+ if (start + 1 < latex.length && latex[start + 1] === "\n") {
501
+ return [{ type: "newline", content: "\n" }, start + 2];
502
+ } else {
503
+ return [{ type: "newline", content: "\n" }, start + 1];
504
+ }
505
+ } else if (firstChar === "&") {
506
+ return [{ type: "control", content: "&" }, start + 1];
225
507
  } else {
226
- current_tex += line;
508
+ return [{ type: "unknown", content: firstChar }, start + 1];
227
509
  }
228
- if (i < lines.length - 1) {
229
- const has_begin_command = line.includes("\\begin{");
230
- const followed_by_end_command = lines[i + 1].includes("\\end{");
231
- if (!has_begin_command && !followed_by_end_command) {
232
- current_tex += "\\SyMbOlNeWlInE ";
510
+ }
511
+ parseCommandExpr(latex, start) {
512
+ assert(latex[start] === "\\");
513
+ let pos = start + 1;
514
+ const command = eat_command_name(latex, pos);
515
+ pos += command.length;
516
+ const paramNum = get_command_param_num(command);
517
+ if (paramNum === 0) {
518
+ return [{ type: "command", content: command }, pos];
519
+ } else if (paramNum === 1) {
520
+ if (command === "sqrt" && pos < latex.length && latex[pos] === "[") {
521
+ const posLeftSquareBracket = pos;
522
+ const posRightSquareBracket = find_closing_square_bracket(latex, pos);
523
+ const exprInside = latex.slice(posLeftSquareBracket + 1, posRightSquareBracket);
524
+ const exponent = this.parse(exprInside);
525
+ const [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, posRightSquareBracket + 1);
526
+ return [{ type: "command", content: command, arg1, exponent }, newPos];
527
+ } else if (command === "text") {
528
+ assert(latex[pos] === "{");
529
+ const posClosingBracket = find_closing_curly_bracket(latex, pos);
530
+ const text = latex.slice(pos + 1, posClosingBracket);
531
+ return [{ type: "text", content: text }, posClosingBracket + 1];
532
+ } else {
533
+ let [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, pos);
534
+ return [{ type: "command", content: command, arg1 }, newPos];
233
535
  }
536
+ } else if (paramNum === 2) {
537
+ const [arg1, pos1] = this.parseNextExprWithoutSupSub(latex, pos);
538
+ const [arg2, pos2] = this.parseNextExprWithoutSupSub(latex, pos1);
539
+ return [{ type: "command", content: command, arg1, arg2 }, pos2];
540
+ } else {
541
+ throw new Error("Invalid number of parameters");
234
542
  }
235
543
  }
236
- if (current_tex.length > 0) {
237
- out_tex_list.push(current_tex);
544
+ parseLeftRightExpr(latex, start) {
545
+ assert(latex.slice(start, start + 5) === "\\left");
546
+ let pos = start + "\\left".length;
547
+ pos += eat_whitespaces(latex, pos).length;
548
+ if (pos >= latex.length) {
549
+ throw new LatexParserError("Expecting delimiter after \\left");
550
+ }
551
+ const leftDelimiter = eat_parenthesis(latex, pos);
552
+ if (leftDelimiter === null) {
553
+ throw new LatexParserError("Invalid delimiter after \\left");
554
+ }
555
+ pos += leftDelimiter.length;
556
+ const exprInsideStart = pos;
557
+ const idx = find_closing_right_command(latex, pos);
558
+ if (idx === -1) {
559
+ throw new LatexParserError("No matching \\right");
560
+ }
561
+ const exprInsideEnd = idx;
562
+ pos = idx + "\\right".length;
563
+ pos += eat_whitespaces(latex, pos).length;
564
+ if (pos >= latex.length) {
565
+ throw new LatexParserError("Expecting delimiter after \\right");
566
+ }
567
+ const rightDelimiter = eat_parenthesis(latex, pos);
568
+ if (rightDelimiter === null) {
569
+ throw new LatexParserError("Invalid delimiter after \\right");
570
+ }
571
+ pos += rightDelimiter.length;
572
+ const exprInside = latex.slice(exprInsideStart, exprInsideEnd);
573
+ const body = this.parse(exprInside);
574
+ const res = { type: "leftright", left: leftDelimiter, right: rightDelimiter, body };
575
+ return [res, pos];
238
576
  }
239
- return out_tex_list;
240
- }
241
- function parseTex(tex, customTexMacros) {
242
- const macros = {
243
- "\\mod": "\\operatorname{SyMb01-mod}",
244
- "\\liminf": "\\operatorname{SyMb01-liminf}",
245
- "\\limsup": "\\operatorname{SyMb01-limsup}",
246
- "\\qquad": "\\operatorname{SyMb01-qquad}",
247
- "\\quad": "\\operatorname{SyMb01-quad}",
248
- "\\cdots": "\\operatorname{SyMb01-cdots}",
249
- "\\colon": "\\operatorname{SyMb01-colon}",
250
- "\\imath": "\\operatorname{SyMb01-imath}",
251
- "\\iiiint": "\\operatorname{SyMb01-iiiint}",
252
- "\\jmath": "\\operatorname{SyMb01-jmath}",
253
- "\\vdots": "\\operatorname{SyMb01-vdots}",
254
- "\\notin": "\\operatorname{SyMb01-notin}",
255
- "\\slash": "\\operatorname{SyMb01-slash}",
256
- "\\LaTeX": "\\operatorname{SyMb01-LaTeX}",
257
- "\\TeX": "\\operatorname{SyMb01-TeX}",
258
- "\\SyMbOlNeWlInE": "\\operatorname{SyMb01-newline}",
259
- ...customTexMacros
260
- };
261
- const options = {
262
- macros,
263
- displayMode: true,
264
- strict: "ignore",
265
- throwOnError: false
266
- };
267
- const tex_list = splitTex(tex);
268
- let treeArray = [];
269
- for (const tex_item of tex_list) {
270
- if (tex_item.startsWith("%")) {
271
- const tex_node = {
272
- type: "comment",
273
- mode: "math",
274
- text: tex_item.substring(1)
275
- };
276
- treeArray.push(tex_node);
277
- continue;
577
+ parseBeginEndExpr(latex, start) {
578
+ assert(latex.slice(start, start + 7) === "\\begin{");
579
+ let pos = start + "\\begin".length;
580
+ const idx = find_closing_curly_bracket(latex, pos);
581
+ if (idx === -1) {
582
+ throw new LatexParserError("No matching } after \\begin{");
278
583
  }
279
- const trees = generateParseTree(tex_item, options);
280
- treeArray = treeArray.concat(trees);
584
+ const envName = latex.slice(pos + 1, idx);
585
+ pos = idx + 1;
586
+ pos += eat_whitespaces(latex, pos).length;
587
+ const exprInsideStart = pos;
588
+ const endIdx = find_closing_end_command(latex, pos);
589
+ if (endIdx === -1) {
590
+ throw new LatexParserError("No matching \\end");
591
+ }
592
+ const exprInsideEnd = endIdx;
593
+ pos = endIdx + "\\end".length;
594
+ const closingIdx = find_closing_curly_bracket(latex, pos);
595
+ if (closingIdx === -1) {
596
+ throw new LatexParserError("No matching } after \\end{");
597
+ }
598
+ if (latex.slice(pos + 1, closingIdx) !== envName) {
599
+ throw new LatexParserError("Mismatched \\begin and \\end environments");
600
+ }
601
+ let exprInside = latex.slice(exprInsideStart, exprInsideEnd);
602
+ exprInside = exprInside.trimEnd();
603
+ const body = this.parseAligned(exprInside);
604
+ const res = { type: "beginend", content: envName, body };
605
+ return [res, closingIdx + 1];
606
+ }
607
+ parseAligned(latex) {
608
+ let pos = 0;
609
+ const allRows = [];
610
+ let row = [];
611
+ allRows.push(row);
612
+ let group = { type: "ordgroup", args: [] };
613
+ row.push(group);
614
+ while (pos < latex.length) {
615
+ const [res, newPos] = this.parseNextExpr(latex, pos);
616
+ pos = newPos;
617
+ if (res.type === "whitespace") {
618
+ continue;
619
+ } else if (res.type === "newline" && !this.newline_sensitive) {
620
+ continue;
621
+ } else if (res.type === "control" && res.content === "\\\\") {
622
+ row = [];
623
+ group = { type: "ordgroup", args: [] };
624
+ row.push(group);
625
+ allRows.push(row);
626
+ } else if (res.type === "control" && res.content === "&") {
627
+ group = { type: "ordgroup", args: [] };
628
+ row.push(group);
629
+ } else {
630
+ group.args.push(res);
631
+ }
632
+ }
633
+ return allRows;
281
634
  }
282
- let t = {
283
- type: "ordgroup",
284
- mode: "math",
285
- body: treeArray,
286
- loc: {}
287
- };
288
- return katexNodeToTexNode(t);
289
635
  }
290
- var generateParseTree = katex.__parse;
291
636
 
292
- class KatexNodeToTexNodeError extends Error {
637
+ class LatexNodeToTexNodeError extends Error {
293
638
  node;
294
639
  constructor(message, node) {
295
640
  super(message);
296
- this.name = "KatexNodeToTexNodeError";
641
+ this.name = "LatexNodeToTexNodeError";
297
642
  this.node = node;
298
643
  }
299
644
  }
@@ -320,6 +665,10 @@ var symbolMap = new Map([
320
665
  ["overline", "overline"],
321
666
  ["underline", "underline"],
322
667
  ["bar", "macron"],
668
+ ["dbinom", "binom"],
669
+ ["tbinom", "binom"],
670
+ ["dfrac", "frac"],
671
+ ["tfrac", "frac"],
323
672
  ["boldsymbol", "bold"],
324
673
  ["mathbf", "bold"],
325
674
  ["mathbb", "bb"],
@@ -662,7 +1011,7 @@ class TypstWriter {
662
1011
  this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
663
1012
  return;
664
1013
  }
665
- if (!base) {
1014
+ if (base.type === "empty") {
666
1015
  this.queue.push({ type: "text", content: "" });
667
1016
  } else {
668
1017
  this.appendWithBracketsIfNeeded(base);
@@ -756,13 +1105,6 @@ class TypstWriter {
756
1105
  }, "");
757
1106
  if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
758
1107
  this.queue.push({ type: "symbol", content: text });
759
- } else if (text.startsWith("SyMb01-")) {
760
- const special_symbol = text.substring(7);
761
- if (special_symbol === "newline") {
762
- this.queue.push({ type: "newline", content: "\n" });
763
- return;
764
- }
765
- this.queue.push({ type: "symbol", content: "\\" + special_symbol });
766
1108
  } else {
767
1109
  this.queue.push({ type: "symbol", content: "op" });
768
1110
  this.queue.push({ type: "atom", content: "(" });
@@ -777,6 +1119,9 @@ class TypstWriter {
777
1119
  this.append(arg0);
778
1120
  this.queue.push({ type: "atom", content: ")" });
779
1121
  this.insideFunctionDepth--;
1122
+ } else if (node.type === "newline") {
1123
+ this.queue.push({ type: "newline", content: "\n" });
1124
+ return;
780
1125
  } else if (node.type === "align") {
781
1126
  const matrix = node.irregularData;
782
1127
  matrix.forEach((row, i) => {