tex2typst 0.0.19 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,73 +1,201 @@
1
1
  // src/parser.ts
2
- import katex from "katex";
3
- function katexNodeToTexNode(node) {
4
- try {
5
- if (node.loc) {
6
- delete node.loc;
2
+ function assert(condition, message = "") {
3
+ if (!condition) {
4
+ throw new LatexParserError(message);
5
+ }
6
+ }
7
+ function get_command_param_num(command) {
8
+ if (UNARY_COMMANDS.includes(command)) {
9
+ return 1;
10
+ } else if (BINARY_COMMANDS.includes(command)) {
11
+ return 2;
12
+ } else {
13
+ return 0;
14
+ }
15
+ }
16
+ function find_closing_curly_bracket(latex, start) {
17
+ assert(latex[start] === "{");
18
+ let count = 1;
19
+ let pos = start + 1;
20
+ while (count > 0) {
21
+ if (pos >= latex.length) {
22
+ throw new LatexParserError("Unmatched curly brackets");
23
+ }
24
+ if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
25
+ pos += 2;
26
+ continue;
7
27
  }
28
+ if (latex[pos] === "{") {
29
+ count += 1;
30
+ } else if (latex[pos] === "}") {
31
+ count -= 1;
32
+ }
33
+ pos += 1;
34
+ }
35
+ return pos - 1;
36
+ }
37
+ function find_closing_square_bracket(latex, start) {
38
+ assert(latex[start] === "[");
39
+ let count = 1;
40
+ let pos = start + 1;
41
+ while (count > 0) {
42
+ if (pos >= latex.length) {
43
+ throw new LatexParserError("Unmatched square brackets");
44
+ }
45
+ if (latex[pos] === "[") {
46
+ count += 1;
47
+ } else if (latex[pos] === "]") {
48
+ count -= 1;
49
+ }
50
+ pos += 1;
51
+ }
52
+ return pos - 1;
53
+ }
54
+ function isalpha(char) {
55
+ return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
56
+ }
57
+ function isdigit(char) {
58
+ return "0123456789".includes(char);
59
+ }
60
+ function find_command(latex, start, command_name) {
61
+ const len_slash_command = 1 + command_name.length;
62
+ let pos = start;
63
+ while (pos < latex.length) {
64
+ pos = latex.indexOf("\\" + command_name, pos);
65
+ if (pos === -1) {
66
+ return -1;
67
+ }
68
+ if (pos + len_slash_command >= latex.length || !isalpha(latex[pos + len_slash_command])) {
69
+ return pos;
70
+ } else {
71
+ pos += len_slash_command;
72
+ }
73
+ }
74
+ return -1;
75
+ }
76
+ function find_closing_right_command(latex, start) {
77
+ let count = 1;
78
+ let pos = start;
79
+ while (count > 0) {
80
+ if (pos >= latex.length) {
81
+ return -1;
82
+ }
83
+ const left_idx = find_command(latex, pos, "left");
84
+ const right_idx = find_command(latex, pos, "right");
85
+ if (right_idx === -1) {
86
+ return -1;
87
+ }
88
+ if (left_idx === -1 || left_idx > right_idx) {
89
+ count -= 1;
90
+ pos = right_idx + "\\right".length;
91
+ } else {
92
+ count += 1;
93
+ pos = left_idx + "\\left".length;
94
+ }
95
+ }
96
+ return pos - "\\right".length;
97
+ }
98
+ function find_closing_end_command(latex, start) {
99
+ let count = 1;
100
+ let pos = start;
101
+ while (count > 0) {
102
+ if (pos >= latex.length) {
103
+ return -1;
104
+ }
105
+ const begin_idx = find_command(latex, pos, "begin");
106
+ const end_idx = find_command(latex, pos, "end");
107
+ if (end_idx === -1) {
108
+ return -1;
109
+ }
110
+ if (begin_idx === -1 || begin_idx > end_idx) {
111
+ count -= 1;
112
+ pos = end_idx + "\\end".length;
113
+ } else {
114
+ count += 1;
115
+ pos = begin_idx + "\\begin".length;
116
+ }
117
+ }
118
+ return pos - "\\end".length;
119
+ }
120
+ function eat_whitespaces(latex, start) {
121
+ let pos = start;
122
+ while (pos < latex.length && [" ", "\t", "\n"].includes(latex[pos])) {
123
+ pos += 1;
124
+ }
125
+ return latex.substring(start, pos);
126
+ }
127
+ function eat_command_name(latex, start) {
128
+ let pos = start;
129
+ while (pos < latex.length && isalpha(latex[pos])) {
130
+ pos += 1;
131
+ }
132
+ return latex.substring(start, pos);
133
+ }
134
+ function eat_parenthesis(latex, start) {
135
+ if ("()[]|".includes(latex[start])) {
136
+ return latex[start];
137
+ } else if (start + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(start, start + 2))) {
138
+ return latex.substring(start, start + 2);
139
+ } else if (start + 6 < latex.length && ["\\lfloor", "\\rfloor"].includes(latex.substring(start, start + 7))) {
140
+ return latex.substring(start, start + 7);
141
+ } else if (start + 5 < latex.length && ["\\lceil", "\\rceil"].includes(latex.substring(start, start + 6))) {
142
+ return latex.substring(start, start + 6);
143
+ } else if (start + 6 < latex.length && ["\\langle", "\\rangle"].includes(latex.substring(start, start + 7))) {
144
+ return latex.substring(start, start + 7);
145
+ } else {
146
+ return null;
147
+ }
148
+ }
149
+ function eat_primes(latex, start) {
150
+ let pos = start;
151
+ while (pos < latex.length && latex[pos] === "'") {
152
+ pos += 1;
153
+ }
154
+ return pos - start;
155
+ }
156
+ function latexNodeToTexNode(node) {
157
+ try {
8
158
  let res = {};
9
159
  switch (node.type) {
160
+ case "ordgroup":
161
+ res.type = "ordgroup";
162
+ res.args = node.args.map((n) => latexNodeToTexNode(n));
163
+ if (res.args.length === 1) {
164
+ res = res.args[0];
165
+ }
166
+ break;
167
+ case "empty":
168
+ res.type = "empty";
169
+ res.content = "";
170
+ break;
10
171
  case "atom":
11
172
  res.type = "atom";
12
- res.content = node.text;
13
- if (node.text === "\\{" || node.text === "\\}") {
14
- res.content = node.text.substring(1);
15
- } else if (node.text.startsWith("\\")) {
16
- res.type = "symbol";
17
- }
173
+ res.content = node.content;
18
174
  break;
19
- case "mathord":
20
- case "textord":
21
- case "op":
22
- case "cr":
175
+ case "token":
176
+ case "token-letter-var":
177
+ case "token-number":
178
+ case "token-operator":
179
+ case "token-parenthesis":
23
180
  res.type = "symbol";
24
- res.content = node.text;
25
- if (node.type === "op") {
26
- res.content = node["name"];
27
- } else if (node.type === "cr") {
28
- res.content = "\\\\";
29
- }
30
- break;
31
- case "genfrac":
32
- res.type = "binaryFunc";
33
- if (node["leftDelim"] === "(" && node["rightDelim"] === ")") {
34
- res.content = "\\binom";
35
- } else {
36
- res.content = "\\frac";
37
- }
38
- res.args = [
39
- katexNodeToTexNode(node["numer"]),
40
- katexNodeToTexNode(node["denom"])
41
- ];
181
+ res.content = node.content;
42
182
  break;
43
183
  case "supsub":
44
184
  res.type = "supsub";
45
185
  res.irregularData = {};
46
186
  if (node["base"]) {
47
- res.irregularData.base = katexNodeToTexNode(node["base"]);
187
+ res.irregularData.base = latexNodeToTexNode(node["base"]);
48
188
  }
49
189
  if (node["sup"]) {
50
- res.irregularData.sup = katexNodeToTexNode(node["sup"]);
190
+ res.irregularData.sup = latexNodeToTexNode(node["sup"]);
51
191
  }
52
192
  if (node["sub"]) {
53
- res.irregularData.sub = katexNodeToTexNode(node["sub"]);
193
+ res.irregularData.sub = latexNodeToTexNode(node["sub"]);
54
194
  }
55
195
  break;
56
- case "mclass":
57
- case "ordgroup":
58
- res.type = "ordgroup";
59
- res.args = node.body.map((n) => katexNodeToTexNode(n));
60
- if (res.args.length === 1) {
61
- res = res.args[0];
62
- }
63
- break;
64
- case "leftright": {
65
- const body = katexNodeToTexNode({
66
- type: "ordgroup",
67
- mode: "math",
68
- body: node.body
69
- });
196
+ case "leftright":
70
197
  res.type = "leftright";
198
+ const body = latexNodeToTexNode(node.body);
71
199
  let left = node["left"];
72
200
  if (left === "\\{") {
73
201
  left = "{";
@@ -83,113 +211,68 @@ function katexNodeToTexNode(node) {
83
211
  { type: is_atom(right) ? "atom" : "symbol", content: right }
84
212
  ];
85
213
  break;
86
- }
87
- case "underline":
88
- case "overline":
89
- res.type = "unaryFunc";
90
- res.content = "\\" + node.type;
91
- res.args = [
92
- katexNodeToTexNode(node["body"])
93
- ];
94
- break;
95
- case "accent": {
96
- res.type = "unaryFunc";
97
- res.content = node["label"];
98
- res.args = [
99
- katexNodeToTexNode(node["base"])
100
- ];
101
- break;
102
- }
103
- case "sqrt":
104
- if (node["index"]) {
105
- res.irregularData = katexNodeToTexNode(node["index"]);
106
- }
107
- case "font":
108
- case "operatorname":
109
- res.type = "unaryFunc";
110
- res.content = "\\" + node.type;
111
- if (node.type === "font") {
112
- res.content = "\\" + node["font"];
214
+ case "beginend":
215
+ if (node.content?.startsWith("align")) {
216
+ res.type = "align";
217
+ } else {
218
+ res.type = "matrix";
113
219
  }
114
- if (Array.isArray(node.body)) {
115
- const obj = {
116
- type: "ordgroup",
117
- mode: "math",
118
- body: node.body
119
- };
220
+ res.content = node.content;
221
+ res.irregularData = node.body.map((row) => {
222
+ return row.map((n) => latexNodeToTexNode(n));
223
+ });
224
+ break;
225
+ case "command":
226
+ const num_args = get_command_param_num(node.content);
227
+ res.content = "\\" + node.content;
228
+ if (num_args === 0) {
229
+ res.type = "symbol";
230
+ } else if (num_args === 1) {
231
+ res.type = "unaryFunc";
120
232
  res.args = [
121
- katexNodeToTexNode(obj)
233
+ latexNodeToTexNode(node.arg1)
122
234
  ];
123
- } else {
235
+ if (node.content === "sqrt") {
236
+ if (node.exponent) {
237
+ res.irregularData = latexNodeToTexNode(node.exponent);
238
+ }
239
+ }
240
+ } else if (num_args === 2) {
241
+ res.type = "binaryFunc";
124
242
  res.args = [
125
- katexNodeToTexNode(node.body)
243
+ latexNodeToTexNode(node.arg1),
244
+ latexNodeToTexNode(node.arg2)
126
245
  ];
127
- }
128
- break;
129
- case "horizBrace":
130
- res.type = "unaryFunc";
131
- res.content = node["label"];
132
- res.args = [
133
- katexNodeToTexNode(node["base"])
134
- ];
135
- break;
136
- case "array":
137
- if (node["colSeparationType"] === "align") {
138
- res.type = "align";
139
246
  } else {
140
- res.type = "matrix";
247
+ throw new LatexNodeToTexNodeError("Invalid number of arguments", node);
141
248
  }
142
- res.irregularData = node.body.map((row) => {
143
- return row.map((cell) => {
144
- if (cell.type !== "styling" || cell.body.length !== 1) {
145
- throw new KatexNodeToTexNodeError("Expecting cell.type==='\\styling' and cell.body.length===1", cell);
146
- }
147
- return katexNodeToTexNode(cell.body[0]);
148
- });
149
- });
150
249
  break;
151
- case "text": {
250
+ case "text":
152
251
  res.type = "text";
153
- let str = "";
154
- node.body.forEach((n) => {
155
- if (n.mode !== "text") {
156
- throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node);
157
- }
158
- str += n.text;
159
- });
160
- res.content = str;
252
+ res.content = node.content;
161
253
  break;
162
- }
163
- case "spacing":
164
- case "kern":
254
+ case "comment":
255
+ res.type = "comment";
256
+ res.content = node.content;
257
+ break;
258
+ case "whitespace":
165
259
  res.type = "empty";
166
- res.content = " ";
167
260
  break;
168
- case "htmlmathml": {
169
- const element = node["mathml"][0]["body"][0];
170
- if (element && element.type === "textord" && element.text === "\u2260") {
261
+ case "newline":
262
+ res.type = "newline";
263
+ res.content = "\n";
264
+ break;
265
+ case "control":
266
+ if (node.content === "\\\\") {
171
267
  res.type = "symbol";
172
- res.content = "\\neq";
268
+ res.content = node.content;
173
269
  break;
174
270
  } else {
271
+ throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
175
272
  }
176
- }
177
- case "color":
178
- if (Array.isArray(node.body) && node.body.length === 1) {
179
- const sub_body = node.body[0];
180
- if (sub_body.type === "text") {
181
- res.type = "unknownMacro";
182
- const joined = sub_body.body.map((n) => n.text).join("");
183
- if (/^\\[a-zA-Z]+$/.test(joined)) {
184
- res.content = joined.substring(1);
185
- break;
186
- }
187
- }
188
- }
189
- throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
190
- default:
191
- throw new KatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
192
273
  break;
274
+ default:
275
+ throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
193
276
  }
194
277
  return res;
195
278
  } catch (e) {
@@ -197,46 +280,365 @@ function katexNodeToTexNode(node) {
197
280
  }
198
281
  }
199
282
  function parseTex(tex, customTexMacros) {
200
- const macros = {
201
- "\\mod": "\\operatorname{SyMb01-mod}",
202
- "\\liminf": "\\operatorname{SyMb01-liminf}",
203
- "\\limsup": "\\operatorname{SyMb01-limsup}",
204
- "\\qquad": "\\operatorname{SyMb01-qquad}",
205
- "\\quad": "\\operatorname{SyMb01-quad}",
206
- "\\cdots": "\\operatorname{SyMb01-cdots}",
207
- "\\colon": "\\operatorname{SyMb01-colon}",
208
- "\\imath": "\\operatorname{SyMb01-imath}",
209
- "\\iiiint": "\\operatorname{SyMb01-iiiint}",
210
- "\\jmath": "\\operatorname{SyMb01-jmath}",
211
- "\\vdots": "\\operatorname{SyMb01-vdots}",
212
- "\\notin": "\\operatorname{SyMb01-notin}",
213
- "\\slash": "\\operatorname{SyMb01-slash}",
214
- "\\LaTeX": "\\operatorname{SyMb01-LaTeX}",
215
- "\\TeX": "\\operatorname{SyMb01-TeX}",
216
- ...customTexMacros
217
- };
218
- const options = {
219
- macros,
220
- displayMode: true,
221
- strict: "ignore",
222
- throwOnError: false
223
- };
224
- let treeArray = generateParseTree(tex, options);
225
- let t = {
226
- type: "ordgroup",
227
- mode: "math",
228
- body: treeArray,
229
- loc: {}
230
- };
231
- return katexNodeToTexNode(t);
283
+ const parser = new LatexParser;
284
+ for (const [macro, replacement] of Object.entries(customTexMacros)) {
285
+ tex = tex.replaceAll(macro, replacement);
286
+ }
287
+ const node = parser.parse(tex);
288
+ return latexNodeToTexNode(node);
232
289
  }
233
- var generateParseTree = katex.__parse;
290
+ var UNARY_COMMANDS = [
291
+ "sqrt",
292
+ "text",
293
+ "arccos",
294
+ "arcsin",
295
+ "arctan",
296
+ "arg",
297
+ "bar",
298
+ "bold",
299
+ "boldsymbol",
300
+ "ddot",
301
+ "det",
302
+ "dim",
303
+ "dot",
304
+ "exp",
305
+ "gcd",
306
+ "hat",
307
+ "ker",
308
+ "mathbb",
309
+ "mathbf",
310
+ "mathcal",
311
+ "mathscr",
312
+ "mathsf",
313
+ "mathtt",
314
+ "mathrm",
315
+ "max",
316
+ "min",
317
+ "mod",
318
+ "operatorname",
319
+ "overbrace",
320
+ "overline",
321
+ "pmb",
322
+ "sup",
323
+ "rm",
324
+ "tilde",
325
+ "underbrace",
326
+ "underline",
327
+ "vec",
328
+ "widehat",
329
+ "widetilde"
330
+ ];
331
+ var BINARY_COMMANDS = [
332
+ "frac",
333
+ "tfrac",
334
+ "binom",
335
+ "dbinom",
336
+ "dfrac",
337
+ "tbinom"
338
+ ];
339
+ var EMPTY_NODE = { type: "empty", content: "" };
234
340
 
235
- class KatexNodeToTexNodeError extends Error {
341
+ class LatexParserError extends Error {
342
+ constructor(message) {
343
+ super(message);
344
+ this.name = "LatexParserError";
345
+ }
346
+ }
347
+
348
+ class LatexParser {
349
+ space_sensitive;
350
+ newline_sensitive;
351
+ constructor(space_sensitive = false, newline_sensitive = true) {
352
+ this.space_sensitive = space_sensitive;
353
+ this.newline_sensitive = newline_sensitive;
354
+ }
355
+ parse(latex) {
356
+ const results = [];
357
+ let pos = 0;
358
+ while (pos < latex.length) {
359
+ const [res, newPos] = this.parseNextExpr(latex, pos);
360
+ pos = newPos;
361
+ if (!this.space_sensitive && res.type === "whitespace") {
362
+ continue;
363
+ }
364
+ if (!this.newline_sensitive && res.type === "newline") {
365
+ continue;
366
+ }
367
+ if (res.type === "control" && res.content === "&") {
368
+ throw new LatexParserError("Unexpected & outside of an alignment");
369
+ }
370
+ results.push(res);
371
+ }
372
+ if (results.length === 0) {
373
+ return EMPTY_NODE;
374
+ } else if (results.length === 1) {
375
+ return results[0];
376
+ } else {
377
+ return { type: "ordgroup", args: results };
378
+ }
379
+ }
380
+ parseNextExpr(latex, start) {
381
+ let [base, pos] = this.parseNextExprWithoutSupSub(latex, start);
382
+ let sub = null;
383
+ let sup = null;
384
+ let num_prime = 0;
385
+ num_prime += eat_primes(latex, pos);
386
+ pos += num_prime;
387
+ if (pos < latex.length && latex[pos] === "_") {
388
+ [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
389
+ num_prime += eat_primes(latex, pos);
390
+ pos += num_prime;
391
+ if (pos < latex.length && latex[pos] === "^") {
392
+ [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
393
+ if (eat_primes(latex, pos) > 0) {
394
+ throw new LatexParserError("Double superscript");
395
+ }
396
+ }
397
+ } else if (pos < latex.length && latex[pos] === "^") {
398
+ [sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
399
+ if (eat_primes(latex, pos) > 0) {
400
+ throw new LatexParserError("Double superscript");
401
+ }
402
+ if (pos < latex.length && latex[pos] === "_") {
403
+ [sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
404
+ if (eat_primes(latex, pos) > 0) {
405
+ throw new LatexParserError("Double superscript");
406
+ }
407
+ }
408
+ }
409
+ if (sub !== null || sup !== null || num_prime > 0) {
410
+ const res = { type: "supsub", base };
411
+ if (sub) {
412
+ res.sub = sub;
413
+ }
414
+ if (num_prime > 0) {
415
+ res.sup = { type: "ordgroup", args: [] };
416
+ for (let i = 0;i < num_prime; i++) {
417
+ res.sup.args.push({ type: "command", content: "prime" });
418
+ }
419
+ if (sup) {
420
+ res.sup.args.push(sup);
421
+ }
422
+ if (res.sup.args.length === 1) {
423
+ res.sup = res.sup.args[0];
424
+ }
425
+ } else if (sup) {
426
+ res.sup = sup;
427
+ }
428
+ return [res, pos];
429
+ } else {
430
+ return [base, pos];
431
+ }
432
+ }
433
+ parseNextExprWithoutSupSub(latex, start) {
434
+ const firstChar = latex[start];
435
+ if (firstChar === "{") {
436
+ const posClosingBracket = find_closing_curly_bracket(latex, start);
437
+ const exprInside = latex.slice(start + 1, posClosingBracket);
438
+ return [this.parse(exprInside), posClosingBracket + 1];
439
+ } else if (firstChar === "\\") {
440
+ if (start + 1 >= latex.length) {
441
+ throw new LatexParserError("Expecting command name after \\");
442
+ }
443
+ const firstTwoChars = latex.slice(start, start + 2);
444
+ if (firstTwoChars === "\\\\") {
445
+ return [{ type: "control", content: "\\\\" }, start + 2];
446
+ } else if (firstTwoChars === "\\{" || firstTwoChars === "\\}") {
447
+ return [{ type: "token-parenthesis", content: firstTwoChars }, start + 2];
448
+ } else if (["\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
449
+ return [{ type: "token", content: firstTwoChars }, start + 2];
450
+ } else if (latex.slice(start).startsWith("\\begin{")) {
451
+ return this.parseBeginEndExpr(latex, start);
452
+ } else if (latex.slice(start).startsWith("\\left") && (start + 5 >= latex.length || !isalpha(latex[start + 5]))) {
453
+ return this.parseLeftRightExpr(latex, start);
454
+ } else {
455
+ return this.parseCommandExpr(latex, start);
456
+ }
457
+ } else if (firstChar === "%") {
458
+ let pos = start + 1;
459
+ while (pos < latex.length && latex[pos] !== "\n") {
460
+ pos += 1;
461
+ }
462
+ return [{ type: "comment", content: latex.slice(start + 1, pos) }, pos];
463
+ } else if (isdigit(firstChar)) {
464
+ let pos = start;
465
+ while (pos < latex.length && isdigit(latex[pos])) {
466
+ pos += 1;
467
+ }
468
+ return [{ type: "token-number", content: latex.slice(start, pos) }, pos];
469
+ } else if (isalpha(firstChar)) {
470
+ return [{ type: "token-letter-var", content: firstChar }, start + 1];
471
+ } else if ("+-*/=<>!".includes(firstChar)) {
472
+ return [{ type: "token-operator", content: firstChar }, start + 1];
473
+ } else if (".,;?".includes(firstChar)) {
474
+ return [{ type: "atom", content: firstChar }, start + 1];
475
+ } else if ("()[]".includes(firstChar)) {
476
+ return [{ type: "token-parenthesis", content: firstChar }, start + 1];
477
+ } else if (firstChar === "_") {
478
+ let [sub, pos] = this.parseNextExpr(latex, start + 1);
479
+ let sup = undefined;
480
+ if (pos < latex.length && latex[pos] === "^") {
481
+ [sup, pos] = this.parseNextExpr(latex, pos + 1);
482
+ }
483
+ return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
484
+ } else if (firstChar === "^") {
485
+ let [sup, pos] = this.parseNextExpr(latex, start + 1);
486
+ let sub = undefined;
487
+ if (pos < latex.length && latex[pos] === "_") {
488
+ [sub, pos] = this.parseNextExpr(latex, pos + 1);
489
+ }
490
+ return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
491
+ } else if (firstChar === " ") {
492
+ let pos = start;
493
+ while (pos < latex.length && latex[pos] === " ") {
494
+ pos += 1;
495
+ }
496
+ return [{ type: "whitespace", content: latex.slice(start, pos) }, pos];
497
+ } else if (firstChar === "\n") {
498
+ return [{ type: "newline", content: "\n" }, start + 1];
499
+ } else if (firstChar === "\r") {
500
+ if (start + 1 < latex.length && latex[start + 1] === "\n") {
501
+ return [{ type: "newline", content: "\n" }, start + 2];
502
+ } else {
503
+ return [{ type: "newline", content: "\n" }, start + 1];
504
+ }
505
+ } else if (firstChar === "&") {
506
+ return [{ type: "control", content: "&" }, start + 1];
507
+ } else {
508
+ return [{ type: "unknown", content: firstChar }, start + 1];
509
+ }
510
+ }
511
+ parseCommandExpr(latex, start) {
512
+ assert(latex[start] === "\\");
513
+ let pos = start + 1;
514
+ const command = eat_command_name(latex, pos);
515
+ pos += command.length;
516
+ const paramNum = get_command_param_num(command);
517
+ if (paramNum === 0) {
518
+ return [{ type: "command", content: command }, pos];
519
+ } else if (paramNum === 1) {
520
+ if (command === "sqrt" && pos < latex.length && latex[pos] === "[") {
521
+ const posLeftSquareBracket = pos;
522
+ const posRightSquareBracket = find_closing_square_bracket(latex, pos);
523
+ const exprInside = latex.slice(posLeftSquareBracket + 1, posRightSquareBracket);
524
+ const exponent = this.parse(exprInside);
525
+ const [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, posRightSquareBracket + 1);
526
+ return [{ type: "command", content: command, arg1, exponent }, newPos];
527
+ } else if (command === "text") {
528
+ assert(latex[pos] === "{");
529
+ const posClosingBracket = find_closing_curly_bracket(latex, pos);
530
+ const text = latex.slice(pos + 1, posClosingBracket);
531
+ return [{ type: "text", content: text }, posClosingBracket + 1];
532
+ } else {
533
+ let [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, pos);
534
+ return [{ type: "command", content: command, arg1 }, newPos];
535
+ }
536
+ } else if (paramNum === 2) {
537
+ const [arg1, pos1] = this.parseNextExprWithoutSupSub(latex, pos);
538
+ const [arg2, pos2] = this.parseNextExprWithoutSupSub(latex, pos1);
539
+ return [{ type: "command", content: command, arg1, arg2 }, pos2];
540
+ } else {
541
+ throw new Error("Invalid number of parameters");
542
+ }
543
+ }
544
+ parseLeftRightExpr(latex, start) {
545
+ assert(latex.slice(start, start + 5) === "\\left");
546
+ let pos = start + "\\left".length;
547
+ pos += eat_whitespaces(latex, pos).length;
548
+ if (pos >= latex.length) {
549
+ throw new LatexParserError("Expecting delimiter after \\left");
550
+ }
551
+ const leftDelimiter = eat_parenthesis(latex, pos);
552
+ if (leftDelimiter === null) {
553
+ throw new LatexParserError("Invalid delimiter after \\left");
554
+ }
555
+ pos += leftDelimiter.length;
556
+ const exprInsideStart = pos;
557
+ const idx = find_closing_right_command(latex, pos);
558
+ if (idx === -1) {
559
+ throw new LatexParserError("No matching \\right");
560
+ }
561
+ const exprInsideEnd = idx;
562
+ pos = idx + "\\right".length;
563
+ pos += eat_whitespaces(latex, pos).length;
564
+ if (pos >= latex.length) {
565
+ throw new LatexParserError("Expecting delimiter after \\right");
566
+ }
567
+ const rightDelimiter = eat_parenthesis(latex, pos);
568
+ if (rightDelimiter === null) {
569
+ throw new LatexParserError("Invalid delimiter after \\right");
570
+ }
571
+ pos += rightDelimiter.length;
572
+ const exprInside = latex.slice(exprInsideStart, exprInsideEnd);
573
+ const body = this.parse(exprInside);
574
+ const res = { type: "leftright", left: leftDelimiter, right: rightDelimiter, body };
575
+ return [res, pos];
576
+ }
577
+ parseBeginEndExpr(latex, start) {
578
+ assert(latex.slice(start, start + 7) === "\\begin{");
579
+ let pos = start + "\\begin".length;
580
+ const idx = find_closing_curly_bracket(latex, pos);
581
+ if (idx === -1) {
582
+ throw new LatexParserError("No matching } after \\begin{");
583
+ }
584
+ const envName = latex.slice(pos + 1, idx);
585
+ pos = idx + 1;
586
+ pos += eat_whitespaces(latex, pos).length;
587
+ const exprInsideStart = pos;
588
+ const endIdx = find_closing_end_command(latex, pos);
589
+ if (endIdx === -1) {
590
+ throw new LatexParserError("No matching \\end");
591
+ }
592
+ const exprInsideEnd = endIdx;
593
+ pos = endIdx + "\\end".length;
594
+ const closingIdx = find_closing_curly_bracket(latex, pos);
595
+ if (closingIdx === -1) {
596
+ throw new LatexParserError("No matching } after \\end{");
597
+ }
598
+ if (latex.slice(pos + 1, closingIdx) !== envName) {
599
+ throw new LatexParserError("Mismatched \\begin and \\end environments");
600
+ }
601
+ let exprInside = latex.slice(exprInsideStart, exprInsideEnd);
602
+ exprInside = exprInside.trimEnd();
603
+ const body = this.parseAligned(exprInside);
604
+ const res = { type: "beginend", content: envName, body };
605
+ return [res, closingIdx + 1];
606
+ }
607
+ parseAligned(latex) {
608
+ let pos = 0;
609
+ const allRows = [];
610
+ let row = [];
611
+ allRows.push(row);
612
+ let group = { type: "ordgroup", args: [] };
613
+ row.push(group);
614
+ while (pos < latex.length) {
615
+ const [res, newPos] = this.parseNextExpr(latex, pos);
616
+ pos = newPos;
617
+ if (res.type === "whitespace") {
618
+ continue;
619
+ } else if (res.type === "newline" && !this.newline_sensitive) {
620
+ continue;
621
+ } else if (res.type === "control" && res.content === "\\\\") {
622
+ row = [];
623
+ group = { type: "ordgroup", args: [] };
624
+ row.push(group);
625
+ allRows.push(row);
626
+ } else if (res.type === "control" && res.content === "&") {
627
+ group = { type: "ordgroup", args: [] };
628
+ row.push(group);
629
+ } else {
630
+ group.args.push(res);
631
+ }
632
+ }
633
+ return allRows;
634
+ }
635
+ }
636
+
637
+ class LatexNodeToTexNodeError extends Error {
236
638
  node;
237
639
  constructor(message, node) {
238
640
  super(message);
239
- this.name = "KatexNodeToTexNodeError";
641
+ this.name = "LatexNodeToTexNodeError";
240
642
  this.node = node;
241
643
  }
242
644
  }
@@ -263,6 +665,10 @@ var symbolMap = new Map([
263
665
  ["overline", "overline"],
264
666
  ["underline", "underline"],
265
667
  ["bar", "macron"],
668
+ ["dbinom", "binom"],
669
+ ["tbinom", "binom"],
670
+ ["dfrac", "frac"],
671
+ ["tfrac", "frac"],
266
672
  ["boldsymbol", "bold"],
267
673
  ["mathbf", "bold"],
268
674
  ["mathbb", "bb"],
@@ -514,7 +920,9 @@ function convertToken(token) {
514
920
  if (/^[a-zA-Z0-9]$/.test(token)) {
515
921
  return token;
516
922
  } else if (token === "\\\\") {
517
- return "\\\n";
923
+ return "\\";
924
+ } else if (token == "/") {
925
+ return "\\/";
518
926
  } else if (["\\$", "\\#", "\\&", "\\_"].includes(token)) {
519
927
  return token;
520
928
  } else if (token.startsWith("\\")) {
@@ -567,6 +975,7 @@ class TypstWriter {
567
975
  no_need_space ||= str === "'";
568
976
  no_need_space ||= /[0-9]$/.test(this.buffer) && /^[0-9]/.test(str);
569
977
  no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
978
+ no_need_space ||= str.startsWith("\n");
570
979
  no_need_space ||= this.buffer === "";
571
980
  no_need_space ||= /[\s"_^{\(]$/.test(this.buffer);
572
981
  if (!no_need_space) {
@@ -602,7 +1011,7 @@ class TypstWriter {
602
1011
  this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
603
1012
  return;
604
1013
  }
605
- if (!base) {
1014
+ if (base.type === "empty") {
606
1015
  this.queue.push({ type: "text", content: "" });
607
1016
  } else {
608
1017
  this.appendWithBracketsIfNeeded(base);
@@ -696,8 +1105,6 @@ class TypstWriter {
696
1105
  }, "");
697
1106
  if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
698
1107
  this.queue.push({ type: "symbol", content: text });
699
- } else if (text.startsWith("SyMb01-")) {
700
- this.queue.push({ type: "symbol", content: "\\" + text.substring(7) });
701
1108
  } else {
702
1109
  this.queue.push({ type: "symbol", content: "op" });
703
1110
  this.queue.push({ type: "atom", content: "(" });
@@ -712,6 +1119,9 @@ class TypstWriter {
712
1119
  this.append(arg0);
713
1120
  this.queue.push({ type: "atom", content: ")" });
714
1121
  this.insideFunctionDepth--;
1122
+ } else if (node.type === "newline") {
1123
+ this.queue.push({ type: "newline", content: "\n" });
1124
+ return;
715
1125
  } else if (node.type === "align") {
716
1126
  const matrix = node.irregularData;
717
1127
  matrix.forEach((row, i) => {
@@ -734,6 +1144,7 @@ class TypstWriter {
734
1144
  matrix.forEach((row, i) => {
735
1145
  row.forEach((cell, j) => {
736
1146
  if (cell.type === "ordgroup" && cell.args.length === 0) {
1147
+ this.queue.push({ type: "atom", content: "," });
737
1148
  return;
738
1149
  }
739
1150
  this.append(cell);
@@ -754,6 +1165,8 @@ class TypstWriter {
754
1165
  } else {
755
1166
  throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
756
1167
  }
1168
+ } else if (node.type === "comment") {
1169
+ this.queue.push({ type: "comment", content: node.content });
757
1170
  } else {
758
1171
  throw new TypstWriterError(`Unimplemented node type to append: ${node.type}`, node);
759
1172
  }
@@ -775,6 +1188,12 @@ class TypstWriter {
775
1188
  this.needSpaceAfterSingleItemScript = true;
776
1189
  str = "";
777
1190
  break;
1191
+ case "comment":
1192
+ str = `//${node.content}`;
1193
+ break;
1194
+ case "newline":
1195
+ str = "\n";
1196
+ break;
778
1197
  default:
779
1198
  throw new TypstWriterError(`Unexpected node type to stringify: ${node.type}`, node);
780
1199
  }