tex2typst 0.1.20 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,300 +1,615 @@
1
1
  // src/parser.ts
2
- import katex from "katex";
3
- function katexNodeToTexNode(node) {
4
- try {
5
- if (node.loc) {
6
- delete node.loc;
7
- }
8
- let res = {};
9
- switch (node.type) {
10
- case "atom":
11
- res.type = "atom";
12
- res.content = node.text;
13
- if (node.text === "\\{" || node.text === "\\}") {
14
- res.content = node.text.substring(1);
15
- } else if (node.text.startsWith("\\")) {
16
- res.type = "symbol";
2
+ function assert(condition, message = "") {
3
+ if (!condition) {
4
+ throw new LatexParserError(message);
5
+ }
6
+ }
7
+ function get_command_param_num(command) {
8
+ if (UNARY_COMMANDS.includes(command)) {
9
+ return 1;
10
+ } else if (BINARY_COMMANDS.includes(command)) {
11
+ return 2;
12
+ } else {
13
+ return 0;
14
+ }
15
+ }
16
+ function find_closing_curly_bracket(tokens, start) {
17
+ assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
18
+ let count = 1;
19
+ let pos = start + 1;
20
+ while (count > 0) {
21
+ if (pos >= tokens.length) {
22
+ throw new LatexParserError("Unmatched curly brackets");
23
+ }
24
+ if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
25
+ count += 1;
26
+ } else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
27
+ count -= 1;
28
+ }
29
+ pos += 1;
30
+ }
31
+ return pos - 1;
32
+ }
33
+ function find_closing_square_bracket(tokens, start) {
34
+ assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
35
+ let count = 1;
36
+ let pos = start + 1;
37
+ while (count > 0) {
38
+ if (pos >= tokens.length) {
39
+ throw new LatexParserError("Unmatched square brackets");
40
+ }
41
+ if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
42
+ count += 1;
43
+ } else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
44
+ count -= 1;
45
+ }
46
+ pos += 1;
47
+ }
48
+ return pos - 1;
49
+ }
50
+ function isalpha(char) {
51
+ return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
52
+ }
53
+ function isdigit(char) {
54
+ return "0123456789".includes(char);
55
+ }
56
+ function eat_whitespaces(tokens, start) {
57
+ let pos = start;
58
+ while (pos < tokens.length && ["whitespace", "newline"].includes(tokens[pos].type)) {
59
+ pos++;
60
+ }
61
+ return tokens.slice(start, pos);
62
+ }
63
+ function eat_parenthesis(tokens, start) {
64
+ const firstToken = tokens[start];
65
+ if (firstToken.type === "element" && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
66
+ return firstToken;
67
+ } else if (firstToken.type === "command" && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
68
+ return firstToken;
69
+ } else {
70
+ return null;
71
+ }
72
+ }
73
+ function eat_primes(tokens, start) {
74
+ let pos = start;
75
+ while (pos < tokens.length && token_eq(tokens[pos], { type: "element", value: "'" })) {
76
+ pos += 1;
77
+ }
78
+ return pos - start;
79
+ }
80
+ function eat_command_name(latex, start) {
81
+ let pos = start;
82
+ while (pos < latex.length && isalpha(latex[pos])) {
83
+ pos += 1;
84
+ }
85
+ return latex.substring(start, pos);
86
+ }
87
+ function find_closing_right_command(tokens, start) {
88
+ let count = 1;
89
+ let pos = start;
90
+ while (count > 0) {
91
+ if (pos >= tokens.length) {
92
+ return -1;
93
+ }
94
+ if (token_eq(tokens[pos], LEFT_COMMAND)) {
95
+ count += 1;
96
+ } else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
97
+ count -= 1;
98
+ }
99
+ pos += 1;
100
+ }
101
+ return pos - 1;
102
+ }
103
+ function find_closing_end_command(tokens, start) {
104
+ let count = 1;
105
+ let pos = start;
106
+ while (count > 0) {
107
+ if (pos >= tokens.length) {
108
+ return -1;
109
+ }
110
+ if (token_eq(tokens[pos], BEGIN_COMMAND)) {
111
+ count += 1;
112
+ } else if (token_eq(tokens[pos], END_COMMAND)) {
113
+ count -= 1;
114
+ }
115
+ pos += 1;
116
+ }
117
+ return pos - 1;
118
+ }
119
+ function find_closing_curly_bracket_char(latex, start) {
120
+ assert(latex[start] === "{");
121
+ let count = 1;
122
+ let pos = start + 1;
123
+ while (count > 0) {
124
+ if (pos >= latex.length) {
125
+ throw new LatexParserError("Unmatched curly brackets");
126
+ }
127
+ if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
128
+ pos += 2;
129
+ continue;
130
+ }
131
+ if (latex[pos] === "{") {
132
+ count += 1;
133
+ } else if (latex[pos] === "}") {
134
+ count -= 1;
135
+ }
136
+ pos += 1;
137
+ }
138
+ return pos - 1;
139
+ }
140
+ function tokenize(latex) {
141
+ const tokens = [];
142
+ let pos = 0;
143
+ while (pos < latex.length) {
144
+ const firstChar = latex[pos];
145
+ let token;
146
+ switch (firstChar) {
147
+ case "%": {
148
+ let newPos = pos + 1;
149
+ while (newPos < latex.length && latex[newPos] !== "\n") {
150
+ newPos += 1;
17
151
  }
152
+ token = { type: "comment", value: latex.slice(pos + 1, newPos) };
153
+ pos = newPos;
18
154
  break;
19
- case "mathord":
20
- case "textord":
21
- case "op":
22
- case "cr":
23
- res.type = "symbol";
24
- res.content = node.text;
25
- if (node.type === "op") {
26
- res.content = node["name"];
27
- } else if (node.type === "cr") {
28
- res.content = "\\\\";
29
- }
155
+ }
156
+ case "{":
157
+ case "}":
158
+ case "_":
159
+ case "^":
160
+ case "&":
161
+ token = { type: "control", value: firstChar };
162
+ pos++;
163
+ break;
164
+ case "\n":
165
+ token = { type: "newline", value: firstChar };
166
+ pos++;
30
167
  break;
31
- case "genfrac":
32
- res.type = "binaryFunc";
33
- if (node["leftDelim"] === "(" && node["rightDelim"] === ")") {
34
- res.content = "\\binom";
168
+ case "\r": {
169
+ if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
170
+ token = { type: "newline", value: "\n" };
171
+ pos += 2;
35
172
  } else {
36
- res.content = "\\frac";
173
+ token = { type: "newline", value: "\n" };
174
+ pos++;
37
175
  }
38
- res.args = [
39
- katexNodeToTexNode(node["numer"]),
40
- katexNodeToTexNode(node["denom"])
41
- ];
42
176
  break;
43
- case "supsub":
44
- res.type = "supsub";
45
- res.irregularData = {};
46
- if (node["base"]) {
47
- res.irregularData.base = katexNodeToTexNode(node["base"]);
177
+ }
178
+ case " ": {
179
+ let newPos = pos;
180
+ while (newPos < latex.length && latex[newPos] === " ") {
181
+ newPos += 1;
48
182
  }
49
- if (node["sup"]) {
50
- res.irregularData.sup = katexNodeToTexNode(node["sup"]);
183
+ token = { type: "whitespace", value: latex.slice(pos, newPos) };
184
+ pos = newPos;
185
+ break;
186
+ }
187
+ case "\\": {
188
+ if (pos + 1 >= latex.length) {
189
+ throw new LatexParserError("Expecting command name after \\");
51
190
  }
52
- if (node["sub"]) {
53
- res.irregularData.sub = katexNodeToTexNode(node["sub"]);
191
+ const firstTwoChars = latex.slice(pos, pos + 2);
192
+ if (firstTwoChars === "\\\\") {
193
+ token = { type: "control", value: "\\\\" };
194
+ pos += 2;
195
+ } else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
196
+ token = { type: "element", value: firstTwoChars };
197
+ pos += 2;
198
+ } else {
199
+ const command = eat_command_name(latex, pos + 1);
200
+ token = { type: "command", value: "\\" + command };
201
+ pos += 1 + command.length;
54
202
  }
55
203
  break;
56
- case "mclass":
57
- case "ordgroup":
58
- res.type = "ordgroup";
59
- res.args = node.body.map((n) => katexNodeToTexNode(n));
60
- if (res.args.length === 1) {
61
- res = res.args[0];
204
+ }
205
+ default: {
206
+ if (isdigit(firstChar)) {
207
+ let newPos = pos;
208
+ while (newPos < latex.length && isdigit(latex[newPos])) {
209
+ newPos += 1;
210
+ }
211
+ token = { type: "element", value: latex.slice(pos, newPos) };
212
+ } else if (isalpha(firstChar)) {
213
+ token = { type: "element", value: firstChar };
214
+ } else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
215
+ token = { type: "element", value: firstChar };
216
+ } else {
217
+ token = { type: "unknown", value: firstChar };
62
218
  }
63
- break;
64
- case "leftright": {
65
- const body = katexNodeToTexNode({
66
- type: "ordgroup",
67
- mode: "math",
68
- body: node.body
69
- });
70
- res.type = "leftright";
71
- let left = node["left"];
72
- if (left === "\\{") {
73
- left = "{";
219
+ pos += token.value.length;
220
+ }
221
+ }
222
+ tokens.push(token);
223
+ if (token.type === "command" && ["\\text", "\\begin", "\\end"].includes(token.value)) {
224
+ if (pos >= latex.length || latex[pos] !== "{") {
225
+ throw new LatexParserError(`No content for ${token.value} command`);
226
+ }
227
+ tokens.push({ type: "control", value: "{" });
228
+ const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
229
+ pos++;
230
+ let textInside = latex.slice(pos, posClosingBracket);
231
+ const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
232
+ for (const char of chars) {
233
+ textInside = textInside.replaceAll("\\" + char, char);
234
+ }
235
+ tokens.push({ type: "text", value: textInside });
236
+ tokens.push({ type: "control", value: "}" });
237
+ pos = posClosingBracket + 1;
238
+ }
239
+ }
240
+ return tokens;
241
+ }
242
+ function token_eq(token1, token2) {
243
+ return token1.type == token2.type && token1.value == token2.value;
244
+ }
245
+ function parseTex(tex, customTexMacros) {
246
+ const parser = new LatexParser;
247
+ const original_tokens = tokenize(tex);
248
+ let processed_tokens = [];
249
+ for (const token of original_tokens) {
250
+ if (token.type === "command" && customTexMacros[token.value]) {
251
+ const expanded_tokens = tokenize(customTexMacros[token.value]);
252
+ processed_tokens = processed_tokens.concat(expanded_tokens);
253
+ } else {
254
+ processed_tokens.push(token);
255
+ }
256
+ }
257
+ return parser.parse(processed_tokens);
258
+ }
259
+ var UNARY_COMMANDS = [
260
+ "sqrt",
261
+ "text",
262
+ "arccos",
263
+ "arcsin",
264
+ "arctan",
265
+ "arg",
266
+ "bar",
267
+ "bold",
268
+ "boldsymbol",
269
+ "ddot",
270
+ "det",
271
+ "dim",
272
+ "dot",
273
+ "exp",
274
+ "gcd",
275
+ "hat",
276
+ "ker",
277
+ "mathbb",
278
+ "mathbf",
279
+ "mathcal",
280
+ "mathscr",
281
+ "mathsf",
282
+ "mathtt",
283
+ "mathrm",
284
+ "max",
285
+ "min",
286
+ "mod",
287
+ "operatorname",
288
+ "overbrace",
289
+ "overline",
290
+ "pmb",
291
+ "sup",
292
+ "rm",
293
+ "tilde",
294
+ "underbrace",
295
+ "underline",
296
+ "vec",
297
+ "widehat",
298
+ "widetilde"
299
+ ];
300
+ var BINARY_COMMANDS = [
301
+ "frac",
302
+ "tfrac",
303
+ "binom",
304
+ "dbinom",
305
+ "dfrac",
306
+ "tbinom"
307
+ ];
308
+ var EMPTY_NODE = { type: "empty", content: "" };
309
+ var LEFT_CURLY_BRACKET = { type: "control", value: "{" };
310
+ var RIGHT_CURLY_BRACKET = { type: "control", value: "}" };
311
+ var LEFT_SQUARE_BRACKET = { type: "element", value: "[" };
312
+ var RIGHT_SQUARE_BRACKET = { type: "element", value: "]" };
313
+ var LEFT_COMMAND = { type: "command", value: "\\left" };
314
+ var RIGHT_COMMAND = { type: "command", value: "\\right" };
315
+ var BEGIN_COMMAND = { type: "command", value: "\\begin" };
316
+ var END_COMMAND = { type: "command", value: "\\end" };
317
+
318
+ class LatexParserError extends Error {
319
+ constructor(message) {
320
+ super(message);
321
+ this.name = "LatexParserError";
322
+ }
323
+ }
324
+ var SUB_SYMBOL = { type: "control", value: "_" };
325
+ var SUP_SYMBOL = { type: "control", value: "^" };
326
+
327
+ class LatexParser {
328
+ space_sensitive;
329
+ newline_sensitive;
330
+ constructor(space_sensitive = false, newline_sensitive = true) {
331
+ this.space_sensitive = space_sensitive;
332
+ this.newline_sensitive = newline_sensitive;
333
+ }
334
+ parse(tokens) {
335
+ const results = [];
336
+ let pos = 0;
337
+ while (pos < tokens.length) {
338
+ const results2 = [];
339
+ let pos2 = 0;
340
+ while (pos2 < tokens.length) {
341
+ const [res, newPos] = this.parseNextExpr(tokens, pos2);
342
+ pos2 = newPos;
343
+ if (!this.space_sensitive && res.type === "whitespace") {
344
+ continue;
74
345
  }
75
- let right = node["right"];
76
- if (right === "\\}") {
77
- right = "}";
346
+ if (!this.newline_sensitive && res.type === "newline") {
347
+ continue;
78
348
  }
79
- const is_atom = (str) => ["(", ")", "[", "]", "{", "}"].includes(str);
80
- res.args = [
81
- { type: is_atom(left) ? "atom" : "symbol", content: left },
82
- body,
83
- { type: is_atom(right) ? "atom" : "symbol", content: right }
84
- ];
85
- break;
349
+ if (res.type === "control" && res.content === "&") {
350
+ throw new LatexParserError("Unexpected & outside of an alignment");
351
+ }
352
+ results2.push(res);
86
353
  }
87
- case "underline":
88
- case "overline":
89
- res.type = "unaryFunc";
90
- res.content = "\\" + node.type;
91
- res.args = [
92
- katexNodeToTexNode(node["body"])
93
- ];
94
- break;
95
- case "accent": {
96
- res.type = "unaryFunc";
97
- res.content = node["label"];
98
- res.args = [
99
- katexNodeToTexNode(node["base"])
100
- ];
101
- break;
354
+ if (results2.length === 0) {
355
+ return EMPTY_NODE;
356
+ } else if (results2.length === 1) {
357
+ return results2[0];
358
+ } else {
359
+ return { type: "ordgroup", content: "", args: results2 };
102
360
  }
103
- case "sqrt":
104
- if (node["index"]) {
105
- res.irregularData = katexNodeToTexNode(node["index"]);
361
+ }
362
+ if (results.length === 0) {
363
+ return EMPTY_NODE;
364
+ } else if (results.length === 1) {
365
+ return results[0];
366
+ } else {
367
+ return { type: "ordgroup", content: "", args: results };
368
+ }
369
+ }
370
+ parseNextExpr(tokens, start) {
371
+ let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
372
+ let sub = null;
373
+ let sup = null;
374
+ let num_prime = 0;
375
+ num_prime += eat_primes(tokens, pos);
376
+ pos += num_prime;
377
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
378
+ [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
379
+ num_prime += eat_primes(tokens, pos);
380
+ pos += num_prime;
381
+ if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
382
+ [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
383
+ if (eat_primes(tokens, pos) > 0) {
384
+ throw new LatexParserError("Double superscript");
106
385
  }
107
- case "font":
108
- case "operatorname":
109
- res.type = "unaryFunc";
110
- res.content = "\\" + node.type;
111
- if (node.type === "font") {
112
- res.content = "\\" + node["font"];
386
+ }
387
+ } else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
388
+ [sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
389
+ if (eat_primes(tokens, pos) > 0) {
390
+ throw new LatexParserError("Double superscript");
391
+ }
392
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
393
+ [sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
394
+ if (eat_primes(tokens, pos) > 0) {
395
+ throw new LatexParserError("Double superscript");
113
396
  }
114
- if (Array.isArray(node.body)) {
115
- const obj = {
116
- type: "ordgroup",
117
- mode: "math",
118
- body: node.body
119
- };
120
- res.args = [
121
- katexNodeToTexNode(obj)
122
- ];
123
- } else {
124
- res.args = [
125
- katexNodeToTexNode(node.body)
126
- ];
397
+ }
398
+ }
399
+ if (sub !== null || sup !== null || num_prime > 0) {
400
+ const res = { base };
401
+ if (sub) {
402
+ res.sub = sub;
403
+ }
404
+ if (num_prime > 0) {
405
+ res.sup = { type: "ordgroup", content: "", args: [] };
406
+ for (let i = 0;i < num_prime; i++) {
407
+ res.sup.args.push({ type: "symbol", content: "\\prime" });
127
408
  }
128
- break;
129
- case "horizBrace":
130
- res.type = "unaryFunc";
131
- res.content = node["label"];
132
- res.args = [
133
- katexNodeToTexNode(node["base"])
134
- ];
135
- break;
136
- case "array":
137
- if (node["colSeparationType"] === "align") {
138
- res.type = "align";
139
- } else {
140
- res.type = "matrix";
409
+ if (sup) {
410
+ res.sup.args.push(sup);
141
411
  }
142
- res.irregularData = node.body.map((row) => {
143
- return row.map((cell) => {
144
- if (cell.type !== "styling" || cell.body.length !== 1) {
145
- throw new KatexNodeToTexNodeError("Expecting cell.type==='\\styling' and cell.body.length===1", cell);
146
- }
147
- return katexNodeToTexNode(cell.body[0]);
148
- });
149
- });
150
- break;
151
- case "text": {
152
- res.type = "text";
153
- let str = "";
154
- node.body.forEach((n) => {
155
- if (n.mode !== "text") {
156
- throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node);
157
- }
158
- str += n.text;
159
- });
160
- res.content = str;
161
- break;
412
+ if (res.sup.args.length === 1) {
413
+ res.sup = res.sup.args[0];
414
+ }
415
+ } else if (sup) {
416
+ res.sup = sup;
162
417
  }
163
- case "spacing":
164
- case "kern":
165
- res.type = "empty";
166
- res.content = " ";
167
- break;
168
- case "htmlmathml": {
169
- const element = node["mathml"][0]["body"][0];
170
- if (element && element.type === "textord" && element.text === "\u2260") {
171
- res.type = "symbol";
172
- res.content = "\\neq";
173
- break;
418
+ return [{ type: "supsub", content: "", data: res }, pos];
419
+ } else {
420
+ return [base, pos];
421
+ }
422
+ }
423
+ parseNextExprWithoutSupSub(tokens, start) {
424
+ const firstToken = tokens[start];
425
+ const tokenType = firstToken.type;
426
+ switch (tokenType) {
427
+ case "element":
428
+ case "text":
429
+ case "comment":
430
+ case "whitespace":
431
+ case "newline":
432
+ return [{ type: tokenType, content: firstToken.value }, start + 1];
433
+ case "command":
434
+ if (token_eq(firstToken, BEGIN_COMMAND)) {
435
+ return this.parseBeginEndExpr(tokens, start);
436
+ } else if (token_eq(firstToken, LEFT_COMMAND)) {
437
+ return this.parseLeftRightExpr(tokens, start);
174
438
  } else {
439
+ return this.parseCommandExpr(tokens, start);
175
440
  }
176
- }
177
- case "color":
178
- if (Array.isArray(node.body) && node.body.length === 1) {
179
- const sub_body = node.body[0];
180
- if (sub_body.type === "text") {
181
- res.type = "unknownMacro";
182
- const joined = sub_body.body.map((n) => n.text).join("");
183
- if (/^\\[a-zA-Z]+$/.test(joined)) {
184
- res.content = joined.substring(1);
185
- break;
441
+ case "control":
442
+ const controlChar = firstToken.value;
443
+ switch (controlChar) {
444
+ case "{":
445
+ const posClosingBracket = find_closing_curly_bracket(tokens, start);
446
+ const exprInside = tokens.slice(start + 1, posClosingBracket);
447
+ return [this.parse(exprInside), posClosingBracket + 1];
448
+ case "}":
449
+ throw new LatexParserError("Unmatched '}'");
450
+ case "\\\\":
451
+ return [{ type: "control", content: "\\\\" }, start + 1];
452
+ case "_": {
453
+ let [sub, pos] = this.parseNextExpr(tokens, start + 1);
454
+ let sup = undefined;
455
+ if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
456
+ [sup, pos] = this.parseNextExpr(tokens, pos + 1);
186
457
  }
458
+ const subData = { base: EMPTY_NODE, sub, sup };
459
+ return [{ type: "supsub", content: "", data: subData }, pos];
187
460
  }
461
+ case "^": {
462
+ let [sup, pos] = this.parseNextExpr(tokens, start + 1);
463
+ let sub = undefined;
464
+ if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
465
+ [sub, pos] = this.parseNextExpr(tokens, pos + 1);
466
+ }
467
+ const supData = { base: EMPTY_NODE, sub, sup };
468
+ return [{ type: "supsub", content: "", data: supData }, pos];
469
+ }
470
+ case "&":
471
+ return [{ type: "control", content: "&" }, start + 1];
472
+ default:
473
+ throw new LatexParserError("Unknown control sequence");
188
474
  }
189
- throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
190
- case "comment":
191
- res.type = "comment";
192
- res.content = node.text;
193
- break;
194
475
  default:
195
- throw new KatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
196
- break;
476
+ throw new LatexParserError("Unknown token type");
197
477
  }
198
- return res;
199
- } catch (e) {
200
- throw e;
201
478
  }
202
- }
203
- function splitTex(tex) {
204
- const lines = tex.split("\n");
205
- const out_tex_list = [];
206
- let current_tex = "";
207
- for (let i = 0;i < lines.length; i++) {
208
- const line = lines[i];
209
- let index = -1;
210
- while (index + 1 < line.length) {
211
- index = line.indexOf("%", index + 1);
212
- if (index === -1) {
213
- break;
214
- }
215
- if (index === 0 || line[index - 1] !== "\\") {
216
- break;
217
- }
479
+ parseCommandExpr(tokens, start) {
480
+ assert(tokens[start].type === "command");
481
+ const command = tokens[start].value;
482
+ let pos = start + 1;
483
+ if (["left", "right", "begin", "end"].includes(command.slice(1))) {
484
+ throw new LatexParserError("Unexpected command: " + command);
218
485
  }
219
- if (index !== -1) {
220
- current_tex += line.substring(0, index);
221
- const comment = line.substring(index);
222
- out_tex_list.push(current_tex);
223
- current_tex = "";
224
- out_tex_list.push(comment);
225
- } else {
226
- current_tex += line;
227
- }
228
- if (i < lines.length - 1) {
229
- const has_begin_command = line.includes("\\begin{");
230
- const followed_by_end_command = lines[i + 1].includes("\\end{");
231
- if (!has_begin_command && !followed_by_end_command) {
232
- current_tex += "\\SyMbOlNeWlInE ";
486
+ const paramNum = get_command_param_num(command.slice(1));
487
+ if (paramNum === 0) {
488
+ return [{ type: "symbol", content: command }, pos];
489
+ } else if (paramNum === 1) {
490
+ if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
491
+ const posLeftSquareBracket = pos;
492
+ const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
493
+ const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
494
+ const exponent = this.parse(exprInside);
495
+ const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
496
+ return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
497
+ } else if (command === "\\text") {
498
+ if (pos + 2 >= tokens.length) {
499
+ throw new LatexParserError("Expecting content for \\text command");
500
+ }
501
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
502
+ assert(tokens[pos + 1].type === "text");
503
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
504
+ const text = tokens[pos + 1].value;
505
+ return [{ type: "text", content: text }, pos + 3];
233
506
  }
507
+ let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
508
+ return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
509
+ } else if (paramNum === 2) {
510
+ const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
511
+ const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
512
+ return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
513
+ } else {
514
+ throw new Error("Invalid number of parameters");
234
515
  }
235
516
  }
236
- if (current_tex.length > 0) {
237
- out_tex_list.push(current_tex);
517
+ parseLeftRightExpr(tokens, start) {
518
+ assert(token_eq(tokens[start], LEFT_COMMAND));
519
+ let pos = start + 1;
520
+ pos += eat_whitespaces(tokens, pos).length;
521
+ if (pos >= tokens.length) {
522
+ throw new LatexParserError("Expecting delimiter after \\left");
523
+ }
524
+ const leftDelimiter = eat_parenthesis(tokens, pos);
525
+ if (leftDelimiter === null) {
526
+ throw new LatexParserError("Invalid delimiter after \\left");
527
+ }
528
+ pos++;
529
+ const exprInsideStart = pos;
530
+ const idx = find_closing_right_command(tokens, pos);
531
+ if (idx === -1) {
532
+ throw new LatexParserError("No matching \\right");
533
+ }
534
+ const exprInsideEnd = idx;
535
+ pos = idx + 1;
536
+ pos += eat_whitespaces(tokens, pos).length;
537
+ if (pos >= tokens.length) {
538
+ throw new LatexParserError("Expecting \\right after \\left");
539
+ }
540
+ const rightDelimiter = eat_parenthesis(tokens, pos);
541
+ if (rightDelimiter === null) {
542
+ throw new LatexParserError("Invalid delimiter after \\right");
543
+ }
544
+ pos++;
545
+ const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
546
+ const body = this.parse(exprInside);
547
+ const args = [
548
+ { type: "element", content: leftDelimiter.value },
549
+ body,
550
+ { type: "element", content: rightDelimiter.value }
551
+ ];
552
+ const res = { type: "leftright", content: "", args };
553
+ return [res, pos];
238
554
  }
239
- return out_tex_list;
240
- }
241
- function parseTex(tex, customTexMacros) {
242
- const macros = {
243
- "\\mod": "\\operatorname{SyMb01-mod}",
244
- "\\liminf": "\\operatorname{SyMb01-liminf}",
245
- "\\limsup": "\\operatorname{SyMb01-limsup}",
246
- "\\qquad": "\\operatorname{SyMb01-qquad}",
247
- "\\quad": "\\operatorname{SyMb01-quad}",
248
- "\\cdots": "\\operatorname{SyMb01-cdots}",
249
- "\\colon": "\\operatorname{SyMb01-colon}",
250
- "\\imath": "\\operatorname{SyMb01-imath}",
251
- "\\iiiint": "\\operatorname{SyMb01-iiiint}",
252
- "\\jmath": "\\operatorname{SyMb01-jmath}",
253
- "\\vdots": "\\operatorname{SyMb01-vdots}",
254
- "\\notin": "\\operatorname{SyMb01-notin}",
255
- "\\slash": "\\operatorname{SyMb01-slash}",
256
- "\\LaTeX": "\\operatorname{SyMb01-LaTeX}",
257
- "\\TeX": "\\operatorname{SyMb01-TeX}",
258
- "\\SyMbOlNeWlInE": "\\operatorname{SyMb01-newline}",
259
- ...customTexMacros
260
- };
261
- const options = {
262
- macros,
263
- displayMode: true,
264
- strict: "ignore",
265
- throwOnError: false
266
- };
267
- const tex_list = splitTex(tex);
268
- let treeArray = [];
269
- for (const tex_item of tex_list) {
270
- if (tex_item.startsWith("%")) {
271
- const tex_node = {
272
- type: "comment",
273
- mode: "math",
274
- text: tex_item.substring(1)
275
- };
276
- treeArray.push(tex_node);
277
- continue;
555
+ parseBeginEndExpr(tokens, start) {
556
+ assert(token_eq(tokens[start], BEGIN_COMMAND));
557
+ let pos = start + 1;
558
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
559
+ assert(tokens[pos + 1].type === "text");
560
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
561
+ const envName = tokens[pos + 1].value;
562
+ pos += 3;
563
+ pos += eat_whitespaces(tokens, pos).length;
564
+ const exprInsideStart = pos;
565
+ const endIdx = find_closing_end_command(tokens, pos);
566
+ if (endIdx === -1) {
567
+ throw new LatexParserError("No matching \\end");
278
568
  }
279
- const trees = generateParseTree(tex_item, options);
280
- treeArray = treeArray.concat(trees);
569
+ const exprInsideEnd = endIdx;
570
+ pos = endIdx + 1;
571
+ assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
572
+ assert(tokens[pos + 1].type === "text");
573
+ assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
574
+ if (tokens[pos + 1].value !== envName) {
575
+ throw new LatexParserError("Mismatched \\begin and \\end environments");
576
+ }
577
+ pos += 3;
578
+ const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
579
+ while (exprInside.length > 0 && ["whitespace", "newline"].includes(exprInside[exprInside.length - 1].type)) {
580
+ exprInside.pop();
581
+ }
582
+ const body = this.parseAligned(exprInside);
583
+ const res = { type: "beginend", content: envName, data: body };
584
+ return [res, pos];
281
585
  }
282
- let t = {
283
- type: "ordgroup",
284
- mode: "math",
285
- body: treeArray,
286
- loc: {}
287
- };
288
- return katexNodeToTexNode(t);
289
- }
290
- var generateParseTree = katex.__parse;
291
-
292
- class KatexNodeToTexNodeError extends Error {
293
- node;
294
- constructor(message, node) {
295
- super(message);
296
- this.name = "KatexNodeToTexNodeError";
297
- this.node = node;
586
+ parseAligned(tokens) {
587
+ let pos = 0;
588
+ const allRows = [];
589
+ let row = [];
590
+ allRows.push(row);
591
+ let group = { type: "ordgroup", content: "", args: [] };
592
+ row.push(group);
593
+ while (pos < tokens.length) {
594
+ const [res, newPos] = this.parseNextExpr(tokens, pos);
595
+ pos = newPos;
596
+ if (res.type === "whitespace") {
597
+ continue;
598
+ } else if (res.type === "newline" && !this.newline_sensitive) {
599
+ continue;
600
+ } else if (res.type === "control" && res.content === "\\\\") {
601
+ row = [];
602
+ group = { type: "ordgroup", content: "", args: [] };
603
+ row.push(group);
604
+ allRows.push(row);
605
+ } else if (res.type === "control" && res.content === "&") {
606
+ group = { type: "ordgroup", content: "", args: [] };
607
+ row.push(group);
608
+ } else {
609
+ group.args.push(res);
610
+ }
611
+ }
612
+ return allRows;
298
613
  }
299
614
  }
300
615
 
@@ -320,6 +635,10 @@ var symbolMap = new Map([
320
635
  ["overline", "overline"],
321
636
  ["underline", "underline"],
322
637
  ["bar", "macron"],
638
+ ["dbinom", "binom"],
639
+ ["tbinom", "binom"],
640
+ ["dfrac", "frac"],
641
+ ["tfrac", "frac"],
323
642
  ["boldsymbol", "bold"],
324
643
  ["mathbf", "bold"],
325
644
  ["mathbb", "bb"],
@@ -639,22 +958,22 @@ class TypstWriter {
639
958
  this.buffer += str;
640
959
  }
641
960
  append(node) {
642
- if (node.type === "empty") {
961
+ if (node.type === "empty" || node.type === "whitespace") {
643
962
  return;
644
963
  } else if (node.type === "ordgroup") {
645
964
  node.args.forEach((arg) => this.append(arg));
646
- } else if (node.type === "atom") {
965
+ } else if (node.type === "element") {
647
966
  let content = node.content;
648
967
  if (node.content === "," && this.insideFunctionDepth > 0) {
649
968
  content = "comma";
650
969
  }
651
- this.queue.push({ type: "atom", content });
970
+ this.queue.push({ type: "symbol", content });
652
971
  } else if (node.type === "symbol") {
653
972
  this.queue.push({ type: "symbol", content: node.content });
654
973
  } else if (node.type === "text") {
655
974
  this.queue.push(node);
656
975
  } else if (node.type === "supsub") {
657
- let { base, sup, sub } = node.irregularData;
976
+ let { base, sup, sub } = node.data;
658
977
  if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
659
978
  this.append({ type: "binaryFunc", content: "\\overbrace", args: [base.args[0], sup] });
660
979
  return;
@@ -662,7 +981,7 @@ class TypstWriter {
662
981
  this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
663
982
  return;
664
983
  }
665
- if (!base) {
984
+ if (base.type === "empty") {
666
985
  this.queue.push({ type: "text", content: "" });
667
986
  } else {
668
987
  this.appendWithBracketsIfNeeded(base);
@@ -686,7 +1005,7 @@ class TypstWriter {
686
1005
  }
687
1006
  } else if (node.type === "leftright") {
688
1007
  const [left, body, right] = node.args;
689
- if (["[]", "()", "{}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
1008
+ if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
690
1009
  this.append(left);
691
1010
  this.append(body);
692
1011
  this.append(right);
@@ -715,12 +1034,12 @@ class TypstWriter {
715
1034
  } else if (node.type === "unaryFunc") {
716
1035
  const func_symbol = { type: "symbol", content: node.content };
717
1036
  const arg0 = node.args[0];
718
- if (node.content === "\\sqrt" && node.irregularData) {
1037
+ if (node.content === "\\sqrt" && node.data) {
719
1038
  func_symbol.content = "root";
720
1039
  this.queue.push(func_symbol);
721
1040
  this.insideFunctionDepth++;
722
1041
  this.queue.push({ type: "atom", content: "(" });
723
- this.append(node.irregularData);
1042
+ this.append(node.data);
724
1043
  this.queue.push({ type: "atom", content: "," });
725
1044
  this.append(arg0);
726
1045
  this.queue.push({ type: "atom", content: ")" });
@@ -741,7 +1060,7 @@ class TypstWriter {
741
1060
  return;
742
1061
  } else if (node.content === "\\mathbb") {
743
1062
  const body = node.args[0];
744
- if (body.type === "symbol" && /^[A-Z]$/.test(body.content)) {
1063
+ if (body.type === "element" && /^[A-Z]$/.test(body.content)) {
745
1064
  this.queue.push({ type: "symbol", content: body.content + body.content });
746
1065
  return;
747
1066
  }
@@ -756,13 +1075,6 @@ class TypstWriter {
756
1075
  }, "");
757
1076
  if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
758
1077
  this.queue.push({ type: "symbol", content: text });
759
- } else if (text.startsWith("SyMb01-")) {
760
- const special_symbol = text.substring(7);
761
- if (special_symbol === "newline") {
762
- this.queue.push({ type: "newline", content: "\n" });
763
- return;
764
- }
765
- this.queue.push({ type: "symbol", content: "\\" + special_symbol });
766
1078
  } else {
767
1079
  this.queue.push({ type: "symbol", content: "op" });
768
1080
  this.queue.push({ type: "atom", content: "(" });
@@ -777,49 +1089,61 @@ class TypstWriter {
777
1089
  this.append(arg0);
778
1090
  this.queue.push({ type: "atom", content: ")" });
779
1091
  this.insideFunctionDepth--;
780
- } else if (node.type === "align") {
781
- const matrix = node.irregularData;
782
- matrix.forEach((row, i) => {
783
- row.forEach((cell, j) => {
784
- if (j > 0) {
785
- this.queue.push({ type: "atom", content: "&" });
1092
+ } else if (node.type === "newline") {
1093
+ this.queue.push({ type: "newline", content: "\n" });
1094
+ return;
1095
+ } else if (node.type === "beginend") {
1096
+ if (node.content.startsWith("align")) {
1097
+ const matrix = node.data;
1098
+ matrix.forEach((row, i) => {
1099
+ row.forEach((cell, j) => {
1100
+ if (j > 0) {
1101
+ this.queue.push({ type: "atom", content: "&" });
1102
+ }
1103
+ this.append(cell);
1104
+ });
1105
+ if (i < matrix.length - 1) {
1106
+ this.queue.push({ type: "symbol", content: "\\\\" });
786
1107
  }
787
- this.append(cell);
788
1108
  });
789
- if (i < matrix.length - 1) {
790
- this.queue.push({ type: "symbol", content: "\\\\" });
791
- }
792
- });
793
- } else if (node.type === "matrix") {
794
- const matrix = node.irregularData;
795
- this.queue.push({ type: "symbol", content: "mat" });
796
- this.insideFunctionDepth++;
797
- this.queue.push({ type: "atom", content: "(" });
798
- this.queue.push({ type: "symbol", content: "delim: #none, " });
799
- matrix.forEach((row, i) => {
800
- row.forEach((cell, j) => {
801
- if (cell.type === "ordgroup" && cell.args.length === 0) {
802
- this.queue.push({ type: "atom", content: "," });
803
- return;
804
- }
805
- this.append(cell);
806
- if (j < row.length - 1) {
807
- this.queue.push({ type: "atom", content: "," });
808
- } else {
809
- if (i < matrix.length - 1) {
810
- this.queue.push({ type: "atom", content: ";" });
1109
+ } else {
1110
+ const matrix = node.data;
1111
+ this.queue.push({ type: "symbol", content: "mat" });
1112
+ this.insideFunctionDepth++;
1113
+ this.queue.push({ type: "atom", content: "(" });
1114
+ this.queue.push({ type: "symbol", content: "delim: #none, " });
1115
+ matrix.forEach((row, i) => {
1116
+ row.forEach((cell, j) => {
1117
+ if (cell.type === "ordgroup" && cell.args.length === 0) {
1118
+ this.queue.push({ type: "atom", content: "," });
1119
+ return;
811
1120
  }
812
- }
1121
+ this.append(cell);
1122
+ if (j < row.length - 1) {
1123
+ this.queue.push({ type: "atom", content: "," });
1124
+ } else {
1125
+ if (i < matrix.length - 1) {
1126
+ this.queue.push({ type: "atom", content: ";" });
1127
+ }
1128
+ }
1129
+ });
813
1130
  });
814
- });
815
- this.queue.push({ type: "atom", content: ")" });
816
- this.insideFunctionDepth--;
1131
+ this.queue.push({ type: "atom", content: ")" });
1132
+ this.insideFunctionDepth--;
1133
+ }
1134
+ } else if (node.type === "matrix") {
817
1135
  } else if (node.type === "unknownMacro") {
818
1136
  if (this.nonStrict) {
819
1137
  this.queue.push({ type: "symbol", content: node.content });
820
1138
  } else {
821
1139
  throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
822
1140
  }
1141
+ } else if (node.type === "control") {
1142
+ if (node.content === "\\\\") {
1143
+ this.queue.push({ type: "symbol", content: node.content });
1144
+ } else {
1145
+ throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
1146
+ }
823
1147
  } else if (node.type === "comment") {
824
1148
  this.queue.push({ type: "comment", content: node.content });
825
1149
  } else {
@@ -859,9 +1183,7 @@ class TypstWriter {
859
1183
  this.queue = [];
860
1184
  }
861
1185
  appendWithBracketsIfNeeded(node) {
862
- const is_single_atom = node.type === "atom";
863
- const is_single_function = node.type === "unaryFunc" || node.type === "binaryFunc" || node.type === "leftright";
864
- const is_single = ["atom", "symbol", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
1186
+ const is_single = ["symbol", "element", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
865
1187
  if (is_single) {
866
1188
  this.append(node);
867
1189
  } else {