tex2typst 0.1.20 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/index.js +574 -229
- package/dist/parser.d.ts +18 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +10 -5
- package/package.json +1 -1
- package/src/map.ts +4 -0
- package/src/parser.ts +689 -272
- package/src/types.ts +11 -5
- package/src/writer.ts +4 -9
- package/tsconfig.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,73 +1,201 @@
|
|
|
1
1
|
// src/parser.ts
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
function assert(condition, message = "") {
|
|
3
|
+
if (!condition) {
|
|
4
|
+
throw new LatexParserError(message);
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
function get_command_param_num(command) {
|
|
8
|
+
if (UNARY_COMMANDS.includes(command)) {
|
|
9
|
+
return 1;
|
|
10
|
+
} else if (BINARY_COMMANDS.includes(command)) {
|
|
11
|
+
return 2;
|
|
12
|
+
} else {
|
|
13
|
+
return 0;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function find_closing_curly_bracket(latex, start) {
|
|
17
|
+
assert(latex[start] === "{");
|
|
18
|
+
let count = 1;
|
|
19
|
+
let pos = start + 1;
|
|
20
|
+
while (count > 0) {
|
|
21
|
+
if (pos >= latex.length) {
|
|
22
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
23
|
+
}
|
|
24
|
+
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
25
|
+
pos += 2;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (latex[pos] === "{") {
|
|
29
|
+
count += 1;
|
|
30
|
+
} else if (latex[pos] === "}") {
|
|
31
|
+
count -= 1;
|
|
32
|
+
}
|
|
33
|
+
pos += 1;
|
|
34
|
+
}
|
|
35
|
+
return pos - 1;
|
|
36
|
+
}
|
|
37
|
+
function find_closing_square_bracket(latex, start) {
|
|
38
|
+
assert(latex[start] === "[");
|
|
39
|
+
let count = 1;
|
|
40
|
+
let pos = start + 1;
|
|
41
|
+
while (count > 0) {
|
|
42
|
+
if (pos >= latex.length) {
|
|
43
|
+
throw new LatexParserError("Unmatched square brackets");
|
|
44
|
+
}
|
|
45
|
+
if (latex[pos] === "[") {
|
|
46
|
+
count += 1;
|
|
47
|
+
} else if (latex[pos] === "]") {
|
|
48
|
+
count -= 1;
|
|
49
|
+
}
|
|
50
|
+
pos += 1;
|
|
51
|
+
}
|
|
52
|
+
return pos - 1;
|
|
53
|
+
}
|
|
54
|
+
function isalpha(char) {
|
|
55
|
+
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
56
|
+
}
|
|
57
|
+
function isdigit(char) {
|
|
58
|
+
return "0123456789".includes(char);
|
|
59
|
+
}
|
|
60
|
+
function find_command(latex, start, command_name) {
|
|
61
|
+
const len_slash_command = 1 + command_name.length;
|
|
62
|
+
let pos = start;
|
|
63
|
+
while (pos < latex.length) {
|
|
64
|
+
pos = latex.indexOf("\\" + command_name, pos);
|
|
65
|
+
if (pos === -1) {
|
|
66
|
+
return -1;
|
|
67
|
+
}
|
|
68
|
+
if (pos + len_slash_command >= latex.length || !isalpha(latex[pos + len_slash_command])) {
|
|
69
|
+
return pos;
|
|
70
|
+
} else {
|
|
71
|
+
pos += len_slash_command;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return -1;
|
|
75
|
+
}
|
|
76
|
+
function find_closing_right_command(latex, start) {
|
|
77
|
+
let count = 1;
|
|
78
|
+
let pos = start;
|
|
79
|
+
while (count > 0) {
|
|
80
|
+
if (pos >= latex.length) {
|
|
81
|
+
return -1;
|
|
7
82
|
}
|
|
83
|
+
const left_idx = find_command(latex, pos, "left");
|
|
84
|
+
const right_idx = find_command(latex, pos, "right");
|
|
85
|
+
if (right_idx === -1) {
|
|
86
|
+
return -1;
|
|
87
|
+
}
|
|
88
|
+
if (left_idx === -1 || left_idx > right_idx) {
|
|
89
|
+
count -= 1;
|
|
90
|
+
pos = right_idx + "\\right".length;
|
|
91
|
+
} else {
|
|
92
|
+
count += 1;
|
|
93
|
+
pos = left_idx + "\\left".length;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return pos - "\\right".length;
|
|
97
|
+
}
|
|
98
|
+
function find_closing_end_command(latex, start) {
|
|
99
|
+
let count = 1;
|
|
100
|
+
let pos = start;
|
|
101
|
+
while (count > 0) {
|
|
102
|
+
if (pos >= latex.length) {
|
|
103
|
+
return -1;
|
|
104
|
+
}
|
|
105
|
+
const begin_idx = find_command(latex, pos, "begin");
|
|
106
|
+
const end_idx = find_command(latex, pos, "end");
|
|
107
|
+
if (end_idx === -1) {
|
|
108
|
+
return -1;
|
|
109
|
+
}
|
|
110
|
+
if (begin_idx === -1 || begin_idx > end_idx) {
|
|
111
|
+
count -= 1;
|
|
112
|
+
pos = end_idx + "\\end".length;
|
|
113
|
+
} else {
|
|
114
|
+
count += 1;
|
|
115
|
+
pos = begin_idx + "\\begin".length;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return pos - "\\end".length;
|
|
119
|
+
}
|
|
120
|
+
function eat_whitespaces(latex, start) {
|
|
121
|
+
let pos = start;
|
|
122
|
+
while (pos < latex.length && [" ", "\t", "\n"].includes(latex[pos])) {
|
|
123
|
+
pos += 1;
|
|
124
|
+
}
|
|
125
|
+
return latex.substring(start, pos);
|
|
126
|
+
}
|
|
127
|
+
function eat_command_name(latex, start) {
|
|
128
|
+
let pos = start;
|
|
129
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
130
|
+
pos += 1;
|
|
131
|
+
}
|
|
132
|
+
return latex.substring(start, pos);
|
|
133
|
+
}
|
|
134
|
+
function eat_parenthesis(latex, start) {
|
|
135
|
+
if ("()[]|".includes(latex[start])) {
|
|
136
|
+
return latex[start];
|
|
137
|
+
} else if (start + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(start, start + 2))) {
|
|
138
|
+
return latex.substring(start, start + 2);
|
|
139
|
+
} else if (start + 6 < latex.length && ["\\lfloor", "\\rfloor"].includes(latex.substring(start, start + 7))) {
|
|
140
|
+
return latex.substring(start, start + 7);
|
|
141
|
+
} else if (start + 5 < latex.length && ["\\lceil", "\\rceil"].includes(latex.substring(start, start + 6))) {
|
|
142
|
+
return latex.substring(start, start + 6);
|
|
143
|
+
} else if (start + 6 < latex.length && ["\\langle", "\\rangle"].includes(latex.substring(start, start + 7))) {
|
|
144
|
+
return latex.substring(start, start + 7);
|
|
145
|
+
} else {
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
function eat_primes(latex, start) {
|
|
150
|
+
let pos = start;
|
|
151
|
+
while (pos < latex.length && latex[pos] === "'") {
|
|
152
|
+
pos += 1;
|
|
153
|
+
}
|
|
154
|
+
return pos - start;
|
|
155
|
+
}
|
|
156
|
+
function latexNodeToTexNode(node) {
|
|
157
|
+
try {
|
|
8
158
|
let res = {};
|
|
9
159
|
switch (node.type) {
|
|
160
|
+
case "ordgroup":
|
|
161
|
+
res.type = "ordgroup";
|
|
162
|
+
res.args = node.args.map((n) => latexNodeToTexNode(n));
|
|
163
|
+
if (res.args.length === 1) {
|
|
164
|
+
res = res.args[0];
|
|
165
|
+
}
|
|
166
|
+
break;
|
|
167
|
+
case "empty":
|
|
168
|
+
res.type = "empty";
|
|
169
|
+
res.content = "";
|
|
170
|
+
break;
|
|
10
171
|
case "atom":
|
|
11
172
|
res.type = "atom";
|
|
12
|
-
res.content = node.
|
|
13
|
-
if (node.text === "\\{" || node.text === "\\}") {
|
|
14
|
-
res.content = node.text.substring(1);
|
|
15
|
-
} else if (node.text.startsWith("\\")) {
|
|
16
|
-
res.type = "symbol";
|
|
17
|
-
}
|
|
173
|
+
res.content = node.content;
|
|
18
174
|
break;
|
|
19
|
-
case "
|
|
20
|
-
case "
|
|
21
|
-
case "
|
|
22
|
-
case "
|
|
175
|
+
case "token":
|
|
176
|
+
case "token-letter-var":
|
|
177
|
+
case "token-number":
|
|
178
|
+
case "token-operator":
|
|
179
|
+
case "token-parenthesis":
|
|
23
180
|
res.type = "symbol";
|
|
24
|
-
res.content = node.
|
|
25
|
-
if (node.type === "op") {
|
|
26
|
-
res.content = node["name"];
|
|
27
|
-
} else if (node.type === "cr") {
|
|
28
|
-
res.content = "\\\\";
|
|
29
|
-
}
|
|
30
|
-
break;
|
|
31
|
-
case "genfrac":
|
|
32
|
-
res.type = "binaryFunc";
|
|
33
|
-
if (node["leftDelim"] === "(" && node["rightDelim"] === ")") {
|
|
34
|
-
res.content = "\\binom";
|
|
35
|
-
} else {
|
|
36
|
-
res.content = "\\frac";
|
|
37
|
-
}
|
|
38
|
-
res.args = [
|
|
39
|
-
katexNodeToTexNode(node["numer"]),
|
|
40
|
-
katexNodeToTexNode(node["denom"])
|
|
41
|
-
];
|
|
181
|
+
res.content = node.content;
|
|
42
182
|
break;
|
|
43
183
|
case "supsub":
|
|
44
184
|
res.type = "supsub";
|
|
45
185
|
res.irregularData = {};
|
|
46
186
|
if (node["base"]) {
|
|
47
|
-
res.irregularData.base =
|
|
187
|
+
res.irregularData.base = latexNodeToTexNode(node["base"]);
|
|
48
188
|
}
|
|
49
189
|
if (node["sup"]) {
|
|
50
|
-
res.irregularData.sup =
|
|
190
|
+
res.irregularData.sup = latexNodeToTexNode(node["sup"]);
|
|
51
191
|
}
|
|
52
192
|
if (node["sub"]) {
|
|
53
|
-
res.irregularData.sub =
|
|
193
|
+
res.irregularData.sub = latexNodeToTexNode(node["sub"]);
|
|
54
194
|
}
|
|
55
195
|
break;
|
|
56
|
-
case "
|
|
57
|
-
case "ordgroup":
|
|
58
|
-
res.type = "ordgroup";
|
|
59
|
-
res.args = node.body.map((n) => katexNodeToTexNode(n));
|
|
60
|
-
if (res.args.length === 1) {
|
|
61
|
-
res = res.args[0];
|
|
62
|
-
}
|
|
63
|
-
break;
|
|
64
|
-
case "leftright": {
|
|
65
|
-
const body = katexNodeToTexNode({
|
|
66
|
-
type: "ordgroup",
|
|
67
|
-
mode: "math",
|
|
68
|
-
body: node.body
|
|
69
|
-
});
|
|
196
|
+
case "leftright":
|
|
70
197
|
res.type = "leftright";
|
|
198
|
+
const body = latexNodeToTexNode(node.body);
|
|
71
199
|
let left = node["left"];
|
|
72
200
|
if (left === "\\{") {
|
|
73
201
|
left = "{";
|
|
@@ -83,217 +211,434 @@ function katexNodeToTexNode(node) {
|
|
|
83
211
|
{ type: is_atom(right) ? "atom" : "symbol", content: right }
|
|
84
212
|
];
|
|
85
213
|
break;
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
res.args = [
|
|
92
|
-
katexNodeToTexNode(node["body"])
|
|
93
|
-
];
|
|
94
|
-
break;
|
|
95
|
-
case "accent": {
|
|
96
|
-
res.type = "unaryFunc";
|
|
97
|
-
res.content = node["label"];
|
|
98
|
-
res.args = [
|
|
99
|
-
katexNodeToTexNode(node["base"])
|
|
100
|
-
];
|
|
101
|
-
break;
|
|
102
|
-
}
|
|
103
|
-
case "sqrt":
|
|
104
|
-
if (node["index"]) {
|
|
105
|
-
res.irregularData = katexNodeToTexNode(node["index"]);
|
|
106
|
-
}
|
|
107
|
-
case "font":
|
|
108
|
-
case "operatorname":
|
|
109
|
-
res.type = "unaryFunc";
|
|
110
|
-
res.content = "\\" + node.type;
|
|
111
|
-
if (node.type === "font") {
|
|
112
|
-
res.content = "\\" + node["font"];
|
|
214
|
+
case "beginend":
|
|
215
|
+
if (node.content?.startsWith("align")) {
|
|
216
|
+
res.type = "align";
|
|
217
|
+
} else {
|
|
218
|
+
res.type = "matrix";
|
|
113
219
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
220
|
+
res.content = node.content;
|
|
221
|
+
res.irregularData = node.body.map((row) => {
|
|
222
|
+
return row.map((n) => latexNodeToTexNode(n));
|
|
223
|
+
});
|
|
224
|
+
break;
|
|
225
|
+
case "command":
|
|
226
|
+
const num_args = get_command_param_num(node.content);
|
|
227
|
+
res.content = "\\" + node.content;
|
|
228
|
+
if (num_args === 0) {
|
|
229
|
+
res.type = "symbol";
|
|
230
|
+
} else if (num_args === 1) {
|
|
231
|
+
res.type = "unaryFunc";
|
|
120
232
|
res.args = [
|
|
121
|
-
|
|
233
|
+
latexNodeToTexNode(node.arg1)
|
|
122
234
|
];
|
|
123
|
-
|
|
235
|
+
if (node.content === "sqrt") {
|
|
236
|
+
if (node.exponent) {
|
|
237
|
+
res.irregularData = latexNodeToTexNode(node.exponent);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
} else if (num_args === 2) {
|
|
241
|
+
res.type = "binaryFunc";
|
|
124
242
|
res.args = [
|
|
125
|
-
|
|
243
|
+
latexNodeToTexNode(node.arg1),
|
|
244
|
+
latexNodeToTexNode(node.arg2)
|
|
126
245
|
];
|
|
127
|
-
}
|
|
128
|
-
break;
|
|
129
|
-
case "horizBrace":
|
|
130
|
-
res.type = "unaryFunc";
|
|
131
|
-
res.content = node["label"];
|
|
132
|
-
res.args = [
|
|
133
|
-
katexNodeToTexNode(node["base"])
|
|
134
|
-
];
|
|
135
|
-
break;
|
|
136
|
-
case "array":
|
|
137
|
-
if (node["colSeparationType"] === "align") {
|
|
138
|
-
res.type = "align";
|
|
139
246
|
} else {
|
|
140
|
-
|
|
247
|
+
throw new LatexNodeToTexNodeError("Invalid number of arguments", node);
|
|
141
248
|
}
|
|
142
|
-
res.irregularData = node.body.map((row) => {
|
|
143
|
-
return row.map((cell) => {
|
|
144
|
-
if (cell.type !== "styling" || cell.body.length !== 1) {
|
|
145
|
-
throw new KatexNodeToTexNodeError("Expecting cell.type==='\\styling' and cell.body.length===1", cell);
|
|
146
|
-
}
|
|
147
|
-
return katexNodeToTexNode(cell.body[0]);
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
249
|
break;
|
|
151
|
-
case "text":
|
|
250
|
+
case "text":
|
|
152
251
|
res.type = "text";
|
|
153
|
-
|
|
154
|
-
node.body.forEach((n) => {
|
|
155
|
-
if (n.mode !== "text") {
|
|
156
|
-
throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node);
|
|
157
|
-
}
|
|
158
|
-
str += n.text;
|
|
159
|
-
});
|
|
160
|
-
res.content = str;
|
|
252
|
+
res.content = node.content;
|
|
161
253
|
break;
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
254
|
+
case "comment":
|
|
255
|
+
res.type = "comment";
|
|
256
|
+
res.content = node.content;
|
|
257
|
+
break;
|
|
258
|
+
case "whitespace":
|
|
165
259
|
res.type = "empty";
|
|
166
|
-
res.content = " ";
|
|
167
260
|
break;
|
|
168
|
-
case "
|
|
169
|
-
|
|
170
|
-
|
|
261
|
+
case "newline":
|
|
262
|
+
res.type = "newline";
|
|
263
|
+
res.content = "\n";
|
|
264
|
+
break;
|
|
265
|
+
case "control":
|
|
266
|
+
if (node.content === "\\\\") {
|
|
171
267
|
res.type = "symbol";
|
|
172
|
-
res.content =
|
|
268
|
+
res.content = node.content;
|
|
173
269
|
break;
|
|
174
270
|
} else {
|
|
271
|
+
throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
|
|
175
272
|
}
|
|
176
|
-
}
|
|
177
|
-
case "color":
|
|
178
|
-
if (Array.isArray(node.body) && node.body.length === 1) {
|
|
179
|
-
const sub_body = node.body[0];
|
|
180
|
-
if (sub_body.type === "text") {
|
|
181
|
-
res.type = "unknownMacro";
|
|
182
|
-
const joined = sub_body.body.map((n) => n.text).join("");
|
|
183
|
-
if (/^\\[a-zA-Z]+$/.test(joined)) {
|
|
184
|
-
res.content = joined.substring(1);
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
|
|
190
|
-
case "comment":
|
|
191
|
-
res.type = "comment";
|
|
192
|
-
res.content = node.text;
|
|
193
273
|
break;
|
|
194
274
|
default:
|
|
195
|
-
throw new
|
|
196
|
-
break;
|
|
275
|
+
throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
|
|
197
276
|
}
|
|
198
277
|
return res;
|
|
199
278
|
} catch (e) {
|
|
200
279
|
throw e;
|
|
201
280
|
}
|
|
202
281
|
}
|
|
203
|
-
function
|
|
204
|
-
const
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
282
|
+
function parseTex(tex, customTexMacros) {
|
|
283
|
+
const parser = new LatexParser;
|
|
284
|
+
for (const [macro, replacement] of Object.entries(customTexMacros)) {
|
|
285
|
+
tex = tex.replaceAll(macro, replacement);
|
|
286
|
+
}
|
|
287
|
+
const node = parser.parse(tex);
|
|
288
|
+
return latexNodeToTexNode(node);
|
|
289
|
+
}
|
|
290
|
+
var UNARY_COMMANDS = [
|
|
291
|
+
"sqrt",
|
|
292
|
+
"text",
|
|
293
|
+
"arccos",
|
|
294
|
+
"arcsin",
|
|
295
|
+
"arctan",
|
|
296
|
+
"arg",
|
|
297
|
+
"bar",
|
|
298
|
+
"bold",
|
|
299
|
+
"boldsymbol",
|
|
300
|
+
"ddot",
|
|
301
|
+
"det",
|
|
302
|
+
"dim",
|
|
303
|
+
"dot",
|
|
304
|
+
"exp",
|
|
305
|
+
"gcd",
|
|
306
|
+
"hat",
|
|
307
|
+
"ker",
|
|
308
|
+
"mathbb",
|
|
309
|
+
"mathbf",
|
|
310
|
+
"mathcal",
|
|
311
|
+
"mathscr",
|
|
312
|
+
"mathsf",
|
|
313
|
+
"mathtt",
|
|
314
|
+
"mathrm",
|
|
315
|
+
"max",
|
|
316
|
+
"min",
|
|
317
|
+
"mod",
|
|
318
|
+
"operatorname",
|
|
319
|
+
"overbrace",
|
|
320
|
+
"overline",
|
|
321
|
+
"pmb",
|
|
322
|
+
"sup",
|
|
323
|
+
"rm",
|
|
324
|
+
"tilde",
|
|
325
|
+
"underbrace",
|
|
326
|
+
"underline",
|
|
327
|
+
"vec",
|
|
328
|
+
"widehat",
|
|
329
|
+
"widetilde"
|
|
330
|
+
];
|
|
331
|
+
var BINARY_COMMANDS = [
|
|
332
|
+
"frac",
|
|
333
|
+
"tfrac",
|
|
334
|
+
"binom",
|
|
335
|
+
"dbinom",
|
|
336
|
+
"dfrac",
|
|
337
|
+
"tbinom"
|
|
338
|
+
];
|
|
339
|
+
var EMPTY_NODE = { type: "empty", content: "" };
|
|
340
|
+
|
|
341
|
+
class LatexParserError extends Error {
|
|
342
|
+
constructor(message) {
|
|
343
|
+
super(message);
|
|
344
|
+
this.name = "LatexParserError";
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
class LatexParser {
|
|
349
|
+
space_sensitive;
|
|
350
|
+
newline_sensitive;
|
|
351
|
+
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
352
|
+
this.space_sensitive = space_sensitive;
|
|
353
|
+
this.newline_sensitive = newline_sensitive;
|
|
354
|
+
}
|
|
355
|
+
parse(latex) {
|
|
356
|
+
const results = [];
|
|
357
|
+
let pos = 0;
|
|
358
|
+
while (pos < latex.length) {
|
|
359
|
+
const [res, newPos] = this.parseNextExpr(latex, pos);
|
|
360
|
+
pos = newPos;
|
|
361
|
+
if (!this.space_sensitive && res.type === "whitespace") {
|
|
362
|
+
continue;
|
|
214
363
|
}
|
|
215
|
-
if (
|
|
216
|
-
|
|
364
|
+
if (!this.newline_sensitive && res.type === "newline") {
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
if (res.type === "control" && res.content === "&") {
|
|
368
|
+
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
369
|
+
}
|
|
370
|
+
results.push(res);
|
|
371
|
+
}
|
|
372
|
+
if (results.length === 0) {
|
|
373
|
+
return EMPTY_NODE;
|
|
374
|
+
} else if (results.length === 1) {
|
|
375
|
+
return results[0];
|
|
376
|
+
} else {
|
|
377
|
+
return { type: "ordgroup", args: results };
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
parseNextExpr(latex, start) {
|
|
381
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(latex, start);
|
|
382
|
+
let sub = null;
|
|
383
|
+
let sup = null;
|
|
384
|
+
let num_prime = 0;
|
|
385
|
+
num_prime += eat_primes(latex, pos);
|
|
386
|
+
pos += num_prime;
|
|
387
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
388
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
389
|
+
num_prime += eat_primes(latex, pos);
|
|
390
|
+
pos += num_prime;
|
|
391
|
+
if (pos < latex.length && latex[pos] === "^") {
|
|
392
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
393
|
+
if (eat_primes(latex, pos) > 0) {
|
|
394
|
+
throw new LatexParserError("Double superscript");
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
} else if (pos < latex.length && latex[pos] === "^") {
|
|
398
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
399
|
+
if (eat_primes(latex, pos) > 0) {
|
|
400
|
+
throw new LatexParserError("Double superscript");
|
|
401
|
+
}
|
|
402
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
403
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
404
|
+
if (eat_primes(latex, pos) > 0) {
|
|
405
|
+
throw new LatexParserError("Double superscript");
|
|
406
|
+
}
|
|
217
407
|
}
|
|
218
408
|
}
|
|
219
|
-
if (
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
409
|
+
if (sub !== null || sup !== null || num_prime > 0) {
|
|
410
|
+
const res = { type: "supsub", base };
|
|
411
|
+
if (sub) {
|
|
412
|
+
res.sub = sub;
|
|
413
|
+
}
|
|
414
|
+
if (num_prime > 0) {
|
|
415
|
+
res.sup = { type: "ordgroup", args: [] };
|
|
416
|
+
for (let i = 0;i < num_prime; i++) {
|
|
417
|
+
res.sup.args.push({ type: "command", content: "prime" });
|
|
418
|
+
}
|
|
419
|
+
if (sup) {
|
|
420
|
+
res.sup.args.push(sup);
|
|
421
|
+
}
|
|
422
|
+
if (res.sup.args.length === 1) {
|
|
423
|
+
res.sup = res.sup.args[0];
|
|
424
|
+
}
|
|
425
|
+
} else if (sup) {
|
|
426
|
+
res.sup = sup;
|
|
427
|
+
}
|
|
428
|
+
return [res, pos];
|
|
429
|
+
} else {
|
|
430
|
+
return [base, pos];
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
parseNextExprWithoutSupSub(latex, start) {
|
|
434
|
+
const firstChar = latex[start];
|
|
435
|
+
if (firstChar === "{") {
|
|
436
|
+
const posClosingBracket = find_closing_curly_bracket(latex, start);
|
|
437
|
+
const exprInside = latex.slice(start + 1, posClosingBracket);
|
|
438
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
439
|
+
} else if (firstChar === "\\") {
|
|
440
|
+
if (start + 1 >= latex.length) {
|
|
441
|
+
throw new LatexParserError("Expecting command name after \\");
|
|
442
|
+
}
|
|
443
|
+
const firstTwoChars = latex.slice(start, start + 2);
|
|
444
|
+
if (firstTwoChars === "\\\\") {
|
|
445
|
+
return [{ type: "control", content: "\\\\" }, start + 2];
|
|
446
|
+
} else if (firstTwoChars === "\\{" || firstTwoChars === "\\}") {
|
|
447
|
+
return [{ type: "token-parenthesis", content: firstTwoChars }, start + 2];
|
|
448
|
+
} else if (["\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
449
|
+
return [{ type: "token", content: firstTwoChars }, start + 2];
|
|
450
|
+
} else if (latex.slice(start).startsWith("\\begin{")) {
|
|
451
|
+
return this.parseBeginEndExpr(latex, start);
|
|
452
|
+
} else if (latex.slice(start).startsWith("\\left") && (start + 5 >= latex.length || !isalpha(latex[start + 5]))) {
|
|
453
|
+
return this.parseLeftRightExpr(latex, start);
|
|
454
|
+
} else {
|
|
455
|
+
return this.parseCommandExpr(latex, start);
|
|
456
|
+
}
|
|
457
|
+
} else if (firstChar === "%") {
|
|
458
|
+
let pos = start + 1;
|
|
459
|
+
while (pos < latex.length && latex[pos] !== "\n") {
|
|
460
|
+
pos += 1;
|
|
461
|
+
}
|
|
462
|
+
return [{ type: "comment", content: latex.slice(start + 1, pos) }, pos];
|
|
463
|
+
} else if (isdigit(firstChar)) {
|
|
464
|
+
let pos = start;
|
|
465
|
+
while (pos < latex.length && isdigit(latex[pos])) {
|
|
466
|
+
pos += 1;
|
|
467
|
+
}
|
|
468
|
+
return [{ type: "token-number", content: latex.slice(start, pos) }, pos];
|
|
469
|
+
} else if (isalpha(firstChar)) {
|
|
470
|
+
return [{ type: "token-letter-var", content: firstChar }, start + 1];
|
|
471
|
+
} else if ("+-*/=<>!".includes(firstChar)) {
|
|
472
|
+
return [{ type: "token-operator", content: firstChar }, start + 1];
|
|
473
|
+
} else if (".,;?".includes(firstChar)) {
|
|
474
|
+
return [{ type: "atom", content: firstChar }, start + 1];
|
|
475
|
+
} else if ("()[]".includes(firstChar)) {
|
|
476
|
+
return [{ type: "token-parenthesis", content: firstChar }, start + 1];
|
|
477
|
+
} else if (firstChar === "_") {
|
|
478
|
+
let [sub, pos] = this.parseNextExpr(latex, start + 1);
|
|
479
|
+
let sup = undefined;
|
|
480
|
+
if (pos < latex.length && latex[pos] === "^") {
|
|
481
|
+
[sup, pos] = this.parseNextExpr(latex, pos + 1);
|
|
482
|
+
}
|
|
483
|
+
return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
|
|
484
|
+
} else if (firstChar === "^") {
|
|
485
|
+
let [sup, pos] = this.parseNextExpr(latex, start + 1);
|
|
486
|
+
let sub = undefined;
|
|
487
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
488
|
+
[sub, pos] = this.parseNextExpr(latex, pos + 1);
|
|
489
|
+
}
|
|
490
|
+
return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
|
|
491
|
+
} else if (firstChar === " ") {
|
|
492
|
+
let pos = start;
|
|
493
|
+
while (pos < latex.length && latex[pos] === " ") {
|
|
494
|
+
pos += 1;
|
|
495
|
+
}
|
|
496
|
+
return [{ type: "whitespace", content: latex.slice(start, pos) }, pos];
|
|
497
|
+
} else if (firstChar === "\n") {
|
|
498
|
+
return [{ type: "newline", content: "\n" }, start + 1];
|
|
499
|
+
} else if (firstChar === "\r") {
|
|
500
|
+
if (start + 1 < latex.length && latex[start + 1] === "\n") {
|
|
501
|
+
return [{ type: "newline", content: "\n" }, start + 2];
|
|
502
|
+
} else {
|
|
503
|
+
return [{ type: "newline", content: "\n" }, start + 1];
|
|
504
|
+
}
|
|
505
|
+
} else if (firstChar === "&") {
|
|
506
|
+
return [{ type: "control", content: "&" }, start + 1];
|
|
225
507
|
} else {
|
|
226
|
-
|
|
508
|
+
return [{ type: "unknown", content: firstChar }, start + 1];
|
|
227
509
|
}
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
510
|
+
}
|
|
511
|
+
parseCommandExpr(latex, start) {
|
|
512
|
+
assert(latex[start] === "\\");
|
|
513
|
+
let pos = start + 1;
|
|
514
|
+
const command = eat_command_name(latex, pos);
|
|
515
|
+
pos += command.length;
|
|
516
|
+
const paramNum = get_command_param_num(command);
|
|
517
|
+
if (paramNum === 0) {
|
|
518
|
+
return [{ type: "command", content: command }, pos];
|
|
519
|
+
} else if (paramNum === 1) {
|
|
520
|
+
if (command === "sqrt" && pos < latex.length && latex[pos] === "[") {
|
|
521
|
+
const posLeftSquareBracket = pos;
|
|
522
|
+
const posRightSquareBracket = find_closing_square_bracket(latex, pos);
|
|
523
|
+
const exprInside = latex.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
524
|
+
const exponent = this.parse(exprInside);
|
|
525
|
+
const [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, posRightSquareBracket + 1);
|
|
526
|
+
return [{ type: "command", content: command, arg1, exponent }, newPos];
|
|
527
|
+
} else if (command === "text") {
|
|
528
|
+
assert(latex[pos] === "{");
|
|
529
|
+
const posClosingBracket = find_closing_curly_bracket(latex, pos);
|
|
530
|
+
const text = latex.slice(pos + 1, posClosingBracket);
|
|
531
|
+
return [{ type: "text", content: text }, posClosingBracket + 1];
|
|
532
|
+
} else {
|
|
533
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, pos);
|
|
534
|
+
return [{ type: "command", content: command, arg1 }, newPos];
|
|
233
535
|
}
|
|
536
|
+
} else if (paramNum === 2) {
|
|
537
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(latex, pos);
|
|
538
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(latex, pos1);
|
|
539
|
+
return [{ type: "command", content: command, arg1, arg2 }, pos2];
|
|
540
|
+
} else {
|
|
541
|
+
throw new Error("Invalid number of parameters");
|
|
234
542
|
}
|
|
235
543
|
}
|
|
236
|
-
|
|
237
|
-
|
|
544
|
+
parseLeftRightExpr(latex, start) {
|
|
545
|
+
assert(latex.slice(start, start + 5) === "\\left");
|
|
546
|
+
let pos = start + "\\left".length;
|
|
547
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
548
|
+
if (pos >= latex.length) {
|
|
549
|
+
throw new LatexParserError("Expecting delimiter after \\left");
|
|
550
|
+
}
|
|
551
|
+
const leftDelimiter = eat_parenthesis(latex, pos);
|
|
552
|
+
if (leftDelimiter === null) {
|
|
553
|
+
throw new LatexParserError("Invalid delimiter after \\left");
|
|
554
|
+
}
|
|
555
|
+
pos += leftDelimiter.length;
|
|
556
|
+
const exprInsideStart = pos;
|
|
557
|
+
const idx = find_closing_right_command(latex, pos);
|
|
558
|
+
if (idx === -1) {
|
|
559
|
+
throw new LatexParserError("No matching \\right");
|
|
560
|
+
}
|
|
561
|
+
const exprInsideEnd = idx;
|
|
562
|
+
pos = idx + "\\right".length;
|
|
563
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
564
|
+
if (pos >= latex.length) {
|
|
565
|
+
throw new LatexParserError("Expecting delimiter after \\right");
|
|
566
|
+
}
|
|
567
|
+
const rightDelimiter = eat_parenthesis(latex, pos);
|
|
568
|
+
if (rightDelimiter === null) {
|
|
569
|
+
throw new LatexParserError("Invalid delimiter after \\right");
|
|
570
|
+
}
|
|
571
|
+
pos += rightDelimiter.length;
|
|
572
|
+
const exprInside = latex.slice(exprInsideStart, exprInsideEnd);
|
|
573
|
+
const body = this.parse(exprInside);
|
|
574
|
+
const res = { type: "leftright", left: leftDelimiter, right: rightDelimiter, body };
|
|
575
|
+
return [res, pos];
|
|
238
576
|
}
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
"\\limsup": "\\operatorname{SyMb01-limsup}",
|
|
246
|
-
"\\qquad": "\\operatorname{SyMb01-qquad}",
|
|
247
|
-
"\\quad": "\\operatorname{SyMb01-quad}",
|
|
248
|
-
"\\cdots": "\\operatorname{SyMb01-cdots}",
|
|
249
|
-
"\\colon": "\\operatorname{SyMb01-colon}",
|
|
250
|
-
"\\imath": "\\operatorname{SyMb01-imath}",
|
|
251
|
-
"\\iiiint": "\\operatorname{SyMb01-iiiint}",
|
|
252
|
-
"\\jmath": "\\operatorname{SyMb01-jmath}",
|
|
253
|
-
"\\vdots": "\\operatorname{SyMb01-vdots}",
|
|
254
|
-
"\\notin": "\\operatorname{SyMb01-notin}",
|
|
255
|
-
"\\slash": "\\operatorname{SyMb01-slash}",
|
|
256
|
-
"\\LaTeX": "\\operatorname{SyMb01-LaTeX}",
|
|
257
|
-
"\\TeX": "\\operatorname{SyMb01-TeX}",
|
|
258
|
-
"\\SyMbOlNeWlInE": "\\operatorname{SyMb01-newline}",
|
|
259
|
-
...customTexMacros
|
|
260
|
-
};
|
|
261
|
-
const options = {
|
|
262
|
-
macros,
|
|
263
|
-
displayMode: true,
|
|
264
|
-
strict: "ignore",
|
|
265
|
-
throwOnError: false
|
|
266
|
-
};
|
|
267
|
-
const tex_list = splitTex(tex);
|
|
268
|
-
let treeArray = [];
|
|
269
|
-
for (const tex_item of tex_list) {
|
|
270
|
-
if (tex_item.startsWith("%")) {
|
|
271
|
-
const tex_node = {
|
|
272
|
-
type: "comment",
|
|
273
|
-
mode: "math",
|
|
274
|
-
text: tex_item.substring(1)
|
|
275
|
-
};
|
|
276
|
-
treeArray.push(tex_node);
|
|
277
|
-
continue;
|
|
577
|
+
parseBeginEndExpr(latex, start) {
|
|
578
|
+
assert(latex.slice(start, start + 7) === "\\begin{");
|
|
579
|
+
let pos = start + "\\begin".length;
|
|
580
|
+
const idx = find_closing_curly_bracket(latex, pos);
|
|
581
|
+
if (idx === -1) {
|
|
582
|
+
throw new LatexParserError("No matching } after \\begin{");
|
|
278
583
|
}
|
|
279
|
-
const
|
|
280
|
-
|
|
584
|
+
const envName = latex.slice(pos + 1, idx);
|
|
585
|
+
pos = idx + 1;
|
|
586
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
587
|
+
const exprInsideStart = pos;
|
|
588
|
+
const endIdx = find_closing_end_command(latex, pos);
|
|
589
|
+
if (endIdx === -1) {
|
|
590
|
+
throw new LatexParserError("No matching \\end");
|
|
591
|
+
}
|
|
592
|
+
const exprInsideEnd = endIdx;
|
|
593
|
+
pos = endIdx + "\\end".length;
|
|
594
|
+
const closingIdx = find_closing_curly_bracket(latex, pos);
|
|
595
|
+
if (closingIdx === -1) {
|
|
596
|
+
throw new LatexParserError("No matching } after \\end{");
|
|
597
|
+
}
|
|
598
|
+
if (latex.slice(pos + 1, closingIdx) !== envName) {
|
|
599
|
+
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
600
|
+
}
|
|
601
|
+
let exprInside = latex.slice(exprInsideStart, exprInsideEnd);
|
|
602
|
+
exprInside = exprInside.trimEnd();
|
|
603
|
+
const body = this.parseAligned(exprInside);
|
|
604
|
+
const res = { type: "beginend", content: envName, body };
|
|
605
|
+
return [res, closingIdx + 1];
|
|
606
|
+
}
|
|
607
|
+
parseAligned(latex) {
|
|
608
|
+
let pos = 0;
|
|
609
|
+
const allRows = [];
|
|
610
|
+
let row = [];
|
|
611
|
+
allRows.push(row);
|
|
612
|
+
let group = { type: "ordgroup", args: [] };
|
|
613
|
+
row.push(group);
|
|
614
|
+
while (pos < latex.length) {
|
|
615
|
+
const [res, newPos] = this.parseNextExpr(latex, pos);
|
|
616
|
+
pos = newPos;
|
|
617
|
+
if (res.type === "whitespace") {
|
|
618
|
+
continue;
|
|
619
|
+
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
620
|
+
continue;
|
|
621
|
+
} else if (res.type === "control" && res.content === "\\\\") {
|
|
622
|
+
row = [];
|
|
623
|
+
group = { type: "ordgroup", args: [] };
|
|
624
|
+
row.push(group);
|
|
625
|
+
allRows.push(row);
|
|
626
|
+
} else if (res.type === "control" && res.content === "&") {
|
|
627
|
+
group = { type: "ordgroup", args: [] };
|
|
628
|
+
row.push(group);
|
|
629
|
+
} else {
|
|
630
|
+
group.args.push(res);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
return allRows;
|
|
281
634
|
}
|
|
282
|
-
let t = {
|
|
283
|
-
type: "ordgroup",
|
|
284
|
-
mode: "math",
|
|
285
|
-
body: treeArray,
|
|
286
|
-
loc: {}
|
|
287
|
-
};
|
|
288
|
-
return katexNodeToTexNode(t);
|
|
289
635
|
}
|
|
290
|
-
var generateParseTree = katex.__parse;
|
|
291
636
|
|
|
292
|
-
class
|
|
637
|
+
class LatexNodeToTexNodeError extends Error {
|
|
293
638
|
node;
|
|
294
639
|
constructor(message, node) {
|
|
295
640
|
super(message);
|
|
296
|
-
this.name = "
|
|
641
|
+
this.name = "LatexNodeToTexNodeError";
|
|
297
642
|
this.node = node;
|
|
298
643
|
}
|
|
299
644
|
}
|
|
@@ -320,6 +665,10 @@ var symbolMap = new Map([
|
|
|
320
665
|
["overline", "overline"],
|
|
321
666
|
["underline", "underline"],
|
|
322
667
|
["bar", "macron"],
|
|
668
|
+
["dbinom", "binom"],
|
|
669
|
+
["tbinom", "binom"],
|
|
670
|
+
["dfrac", "frac"],
|
|
671
|
+
["tfrac", "frac"],
|
|
323
672
|
["boldsymbol", "bold"],
|
|
324
673
|
["mathbf", "bold"],
|
|
325
674
|
["mathbb", "bb"],
|
|
@@ -662,7 +1011,7 @@ class TypstWriter {
|
|
|
662
1011
|
this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
|
|
663
1012
|
return;
|
|
664
1013
|
}
|
|
665
|
-
if (
|
|
1014
|
+
if (base.type === "empty") {
|
|
666
1015
|
this.queue.push({ type: "text", content: "" });
|
|
667
1016
|
} else {
|
|
668
1017
|
this.appendWithBracketsIfNeeded(base);
|
|
@@ -756,13 +1105,6 @@ class TypstWriter {
|
|
|
756
1105
|
}, "");
|
|
757
1106
|
if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
758
1107
|
this.queue.push({ type: "symbol", content: text });
|
|
759
|
-
} else if (text.startsWith("SyMb01-")) {
|
|
760
|
-
const special_symbol = text.substring(7);
|
|
761
|
-
if (special_symbol === "newline") {
|
|
762
|
-
this.queue.push({ type: "newline", content: "\n" });
|
|
763
|
-
return;
|
|
764
|
-
}
|
|
765
|
-
this.queue.push({ type: "symbol", content: "\\" + special_symbol });
|
|
766
1108
|
} else {
|
|
767
1109
|
this.queue.push({ type: "symbol", content: "op" });
|
|
768
1110
|
this.queue.push({ type: "atom", content: "(" });
|
|
@@ -777,6 +1119,9 @@ class TypstWriter {
|
|
|
777
1119
|
this.append(arg0);
|
|
778
1120
|
this.queue.push({ type: "atom", content: ")" });
|
|
779
1121
|
this.insideFunctionDepth--;
|
|
1122
|
+
} else if (node.type === "newline") {
|
|
1123
|
+
this.queue.push({ type: "newline", content: "\n" });
|
|
1124
|
+
return;
|
|
780
1125
|
} else if (node.type === "align") {
|
|
781
1126
|
const matrix = node.irregularData;
|
|
782
1127
|
matrix.forEach((row, i) => {
|