tex2typst 0.0.19 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/index.js +598 -179
- package/dist/parser.d.ts +18 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +11 -6
- package/package.json +1 -1
- package/src/map.ts +4 -0
- package/src/parser.ts +702 -207
- package/src/types.ts +12 -6
- package/src/writer.ts +21 -5
- package/tsconfig.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,73 +1,201 @@
|
|
|
1
1
|
// src/parser.ts
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
function assert(condition, message = "") {
|
|
3
|
+
if (!condition) {
|
|
4
|
+
throw new LatexParserError(message);
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
function get_command_param_num(command) {
|
|
8
|
+
if (UNARY_COMMANDS.includes(command)) {
|
|
9
|
+
return 1;
|
|
10
|
+
} else if (BINARY_COMMANDS.includes(command)) {
|
|
11
|
+
return 2;
|
|
12
|
+
} else {
|
|
13
|
+
return 0;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
function find_closing_curly_bracket(latex, start) {
|
|
17
|
+
assert(latex[start] === "{");
|
|
18
|
+
let count = 1;
|
|
19
|
+
let pos = start + 1;
|
|
20
|
+
while (count > 0) {
|
|
21
|
+
if (pos >= latex.length) {
|
|
22
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
23
|
+
}
|
|
24
|
+
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
25
|
+
pos += 2;
|
|
26
|
+
continue;
|
|
7
27
|
}
|
|
28
|
+
if (latex[pos] === "{") {
|
|
29
|
+
count += 1;
|
|
30
|
+
} else if (latex[pos] === "}") {
|
|
31
|
+
count -= 1;
|
|
32
|
+
}
|
|
33
|
+
pos += 1;
|
|
34
|
+
}
|
|
35
|
+
return pos - 1;
|
|
36
|
+
}
|
|
37
|
+
function find_closing_square_bracket(latex, start) {
|
|
38
|
+
assert(latex[start] === "[");
|
|
39
|
+
let count = 1;
|
|
40
|
+
let pos = start + 1;
|
|
41
|
+
while (count > 0) {
|
|
42
|
+
if (pos >= latex.length) {
|
|
43
|
+
throw new LatexParserError("Unmatched square brackets");
|
|
44
|
+
}
|
|
45
|
+
if (latex[pos] === "[") {
|
|
46
|
+
count += 1;
|
|
47
|
+
} else if (latex[pos] === "]") {
|
|
48
|
+
count -= 1;
|
|
49
|
+
}
|
|
50
|
+
pos += 1;
|
|
51
|
+
}
|
|
52
|
+
return pos - 1;
|
|
53
|
+
}
|
|
54
|
+
function isalpha(char) {
|
|
55
|
+
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
56
|
+
}
|
|
57
|
+
function isdigit(char) {
|
|
58
|
+
return "0123456789".includes(char);
|
|
59
|
+
}
|
|
60
|
+
function find_command(latex, start, command_name) {
|
|
61
|
+
const len_slash_command = 1 + command_name.length;
|
|
62
|
+
let pos = start;
|
|
63
|
+
while (pos < latex.length) {
|
|
64
|
+
pos = latex.indexOf("\\" + command_name, pos);
|
|
65
|
+
if (pos === -1) {
|
|
66
|
+
return -1;
|
|
67
|
+
}
|
|
68
|
+
if (pos + len_slash_command >= latex.length || !isalpha(latex[pos + len_slash_command])) {
|
|
69
|
+
return pos;
|
|
70
|
+
} else {
|
|
71
|
+
pos += len_slash_command;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return -1;
|
|
75
|
+
}
|
|
76
|
+
function find_closing_right_command(latex, start) {
|
|
77
|
+
let count = 1;
|
|
78
|
+
let pos = start;
|
|
79
|
+
while (count > 0) {
|
|
80
|
+
if (pos >= latex.length) {
|
|
81
|
+
return -1;
|
|
82
|
+
}
|
|
83
|
+
const left_idx = find_command(latex, pos, "left");
|
|
84
|
+
const right_idx = find_command(latex, pos, "right");
|
|
85
|
+
if (right_idx === -1) {
|
|
86
|
+
return -1;
|
|
87
|
+
}
|
|
88
|
+
if (left_idx === -1 || left_idx > right_idx) {
|
|
89
|
+
count -= 1;
|
|
90
|
+
pos = right_idx + "\\right".length;
|
|
91
|
+
} else {
|
|
92
|
+
count += 1;
|
|
93
|
+
pos = left_idx + "\\left".length;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return pos - "\\right".length;
|
|
97
|
+
}
|
|
98
|
+
function find_closing_end_command(latex, start) {
|
|
99
|
+
let count = 1;
|
|
100
|
+
let pos = start;
|
|
101
|
+
while (count > 0) {
|
|
102
|
+
if (pos >= latex.length) {
|
|
103
|
+
return -1;
|
|
104
|
+
}
|
|
105
|
+
const begin_idx = find_command(latex, pos, "begin");
|
|
106
|
+
const end_idx = find_command(latex, pos, "end");
|
|
107
|
+
if (end_idx === -1) {
|
|
108
|
+
return -1;
|
|
109
|
+
}
|
|
110
|
+
if (begin_idx === -1 || begin_idx > end_idx) {
|
|
111
|
+
count -= 1;
|
|
112
|
+
pos = end_idx + "\\end".length;
|
|
113
|
+
} else {
|
|
114
|
+
count += 1;
|
|
115
|
+
pos = begin_idx + "\\begin".length;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return pos - "\\end".length;
|
|
119
|
+
}
|
|
120
|
+
function eat_whitespaces(latex, start) {
|
|
121
|
+
let pos = start;
|
|
122
|
+
while (pos < latex.length && [" ", "\t", "\n"].includes(latex[pos])) {
|
|
123
|
+
pos += 1;
|
|
124
|
+
}
|
|
125
|
+
return latex.substring(start, pos);
|
|
126
|
+
}
|
|
127
|
+
function eat_command_name(latex, start) {
|
|
128
|
+
let pos = start;
|
|
129
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
130
|
+
pos += 1;
|
|
131
|
+
}
|
|
132
|
+
return latex.substring(start, pos);
|
|
133
|
+
}
|
|
134
|
+
function eat_parenthesis(latex, start) {
|
|
135
|
+
if ("()[]|".includes(latex[start])) {
|
|
136
|
+
return latex[start];
|
|
137
|
+
} else if (start + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(start, start + 2))) {
|
|
138
|
+
return latex.substring(start, start + 2);
|
|
139
|
+
} else if (start + 6 < latex.length && ["\\lfloor", "\\rfloor"].includes(latex.substring(start, start + 7))) {
|
|
140
|
+
return latex.substring(start, start + 7);
|
|
141
|
+
} else if (start + 5 < latex.length && ["\\lceil", "\\rceil"].includes(latex.substring(start, start + 6))) {
|
|
142
|
+
return latex.substring(start, start + 6);
|
|
143
|
+
} else if (start + 6 < latex.length && ["\\langle", "\\rangle"].includes(latex.substring(start, start + 7))) {
|
|
144
|
+
return latex.substring(start, start + 7);
|
|
145
|
+
} else {
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
function eat_primes(latex, start) {
|
|
150
|
+
let pos = start;
|
|
151
|
+
while (pos < latex.length && latex[pos] === "'") {
|
|
152
|
+
pos += 1;
|
|
153
|
+
}
|
|
154
|
+
return pos - start;
|
|
155
|
+
}
|
|
156
|
+
function latexNodeToTexNode(node) {
|
|
157
|
+
try {
|
|
8
158
|
let res = {};
|
|
9
159
|
switch (node.type) {
|
|
160
|
+
case "ordgroup":
|
|
161
|
+
res.type = "ordgroup";
|
|
162
|
+
res.args = node.args.map((n) => latexNodeToTexNode(n));
|
|
163
|
+
if (res.args.length === 1) {
|
|
164
|
+
res = res.args[0];
|
|
165
|
+
}
|
|
166
|
+
break;
|
|
167
|
+
case "empty":
|
|
168
|
+
res.type = "empty";
|
|
169
|
+
res.content = "";
|
|
170
|
+
break;
|
|
10
171
|
case "atom":
|
|
11
172
|
res.type = "atom";
|
|
12
|
-
res.content = node.
|
|
13
|
-
if (node.text === "\\{" || node.text === "\\}") {
|
|
14
|
-
res.content = node.text.substring(1);
|
|
15
|
-
} else if (node.text.startsWith("\\")) {
|
|
16
|
-
res.type = "symbol";
|
|
17
|
-
}
|
|
173
|
+
res.content = node.content;
|
|
18
174
|
break;
|
|
19
|
-
case "
|
|
20
|
-
case "
|
|
21
|
-
case "
|
|
22
|
-
case "
|
|
175
|
+
case "token":
|
|
176
|
+
case "token-letter-var":
|
|
177
|
+
case "token-number":
|
|
178
|
+
case "token-operator":
|
|
179
|
+
case "token-parenthesis":
|
|
23
180
|
res.type = "symbol";
|
|
24
|
-
res.content = node.
|
|
25
|
-
if (node.type === "op") {
|
|
26
|
-
res.content = node["name"];
|
|
27
|
-
} else if (node.type === "cr") {
|
|
28
|
-
res.content = "\\\\";
|
|
29
|
-
}
|
|
30
|
-
break;
|
|
31
|
-
case "genfrac":
|
|
32
|
-
res.type = "binaryFunc";
|
|
33
|
-
if (node["leftDelim"] === "(" && node["rightDelim"] === ")") {
|
|
34
|
-
res.content = "\\binom";
|
|
35
|
-
} else {
|
|
36
|
-
res.content = "\\frac";
|
|
37
|
-
}
|
|
38
|
-
res.args = [
|
|
39
|
-
katexNodeToTexNode(node["numer"]),
|
|
40
|
-
katexNodeToTexNode(node["denom"])
|
|
41
|
-
];
|
|
181
|
+
res.content = node.content;
|
|
42
182
|
break;
|
|
43
183
|
case "supsub":
|
|
44
184
|
res.type = "supsub";
|
|
45
185
|
res.irregularData = {};
|
|
46
186
|
if (node["base"]) {
|
|
47
|
-
res.irregularData.base =
|
|
187
|
+
res.irregularData.base = latexNodeToTexNode(node["base"]);
|
|
48
188
|
}
|
|
49
189
|
if (node["sup"]) {
|
|
50
|
-
res.irregularData.sup =
|
|
190
|
+
res.irregularData.sup = latexNodeToTexNode(node["sup"]);
|
|
51
191
|
}
|
|
52
192
|
if (node["sub"]) {
|
|
53
|
-
res.irregularData.sub =
|
|
193
|
+
res.irregularData.sub = latexNodeToTexNode(node["sub"]);
|
|
54
194
|
}
|
|
55
195
|
break;
|
|
56
|
-
case "
|
|
57
|
-
case "ordgroup":
|
|
58
|
-
res.type = "ordgroup";
|
|
59
|
-
res.args = node.body.map((n) => katexNodeToTexNode(n));
|
|
60
|
-
if (res.args.length === 1) {
|
|
61
|
-
res = res.args[0];
|
|
62
|
-
}
|
|
63
|
-
break;
|
|
64
|
-
case "leftright": {
|
|
65
|
-
const body = katexNodeToTexNode({
|
|
66
|
-
type: "ordgroup",
|
|
67
|
-
mode: "math",
|
|
68
|
-
body: node.body
|
|
69
|
-
});
|
|
196
|
+
case "leftright":
|
|
70
197
|
res.type = "leftright";
|
|
198
|
+
const body = latexNodeToTexNode(node.body);
|
|
71
199
|
let left = node["left"];
|
|
72
200
|
if (left === "\\{") {
|
|
73
201
|
left = "{";
|
|
@@ -83,113 +211,68 @@ function katexNodeToTexNode(node) {
|
|
|
83
211
|
{ type: is_atom(right) ? "atom" : "symbol", content: right }
|
|
84
212
|
];
|
|
85
213
|
break;
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
res.args = [
|
|
92
|
-
katexNodeToTexNode(node["body"])
|
|
93
|
-
];
|
|
94
|
-
break;
|
|
95
|
-
case "accent": {
|
|
96
|
-
res.type = "unaryFunc";
|
|
97
|
-
res.content = node["label"];
|
|
98
|
-
res.args = [
|
|
99
|
-
katexNodeToTexNode(node["base"])
|
|
100
|
-
];
|
|
101
|
-
break;
|
|
102
|
-
}
|
|
103
|
-
case "sqrt":
|
|
104
|
-
if (node["index"]) {
|
|
105
|
-
res.irregularData = katexNodeToTexNode(node["index"]);
|
|
106
|
-
}
|
|
107
|
-
case "font":
|
|
108
|
-
case "operatorname":
|
|
109
|
-
res.type = "unaryFunc";
|
|
110
|
-
res.content = "\\" + node.type;
|
|
111
|
-
if (node.type === "font") {
|
|
112
|
-
res.content = "\\" + node["font"];
|
|
214
|
+
case "beginend":
|
|
215
|
+
if (node.content?.startsWith("align")) {
|
|
216
|
+
res.type = "align";
|
|
217
|
+
} else {
|
|
218
|
+
res.type = "matrix";
|
|
113
219
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
220
|
+
res.content = node.content;
|
|
221
|
+
res.irregularData = node.body.map((row) => {
|
|
222
|
+
return row.map((n) => latexNodeToTexNode(n));
|
|
223
|
+
});
|
|
224
|
+
break;
|
|
225
|
+
case "command":
|
|
226
|
+
const num_args = get_command_param_num(node.content);
|
|
227
|
+
res.content = "\\" + node.content;
|
|
228
|
+
if (num_args === 0) {
|
|
229
|
+
res.type = "symbol";
|
|
230
|
+
} else if (num_args === 1) {
|
|
231
|
+
res.type = "unaryFunc";
|
|
120
232
|
res.args = [
|
|
121
|
-
|
|
233
|
+
latexNodeToTexNode(node.arg1)
|
|
122
234
|
];
|
|
123
|
-
|
|
235
|
+
if (node.content === "sqrt") {
|
|
236
|
+
if (node.exponent) {
|
|
237
|
+
res.irregularData = latexNodeToTexNode(node.exponent);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
} else if (num_args === 2) {
|
|
241
|
+
res.type = "binaryFunc";
|
|
124
242
|
res.args = [
|
|
125
|
-
|
|
243
|
+
latexNodeToTexNode(node.arg1),
|
|
244
|
+
latexNodeToTexNode(node.arg2)
|
|
126
245
|
];
|
|
127
|
-
}
|
|
128
|
-
break;
|
|
129
|
-
case "horizBrace":
|
|
130
|
-
res.type = "unaryFunc";
|
|
131
|
-
res.content = node["label"];
|
|
132
|
-
res.args = [
|
|
133
|
-
katexNodeToTexNode(node["base"])
|
|
134
|
-
];
|
|
135
|
-
break;
|
|
136
|
-
case "array":
|
|
137
|
-
if (node["colSeparationType"] === "align") {
|
|
138
|
-
res.type = "align";
|
|
139
246
|
} else {
|
|
140
|
-
|
|
247
|
+
throw new LatexNodeToTexNodeError("Invalid number of arguments", node);
|
|
141
248
|
}
|
|
142
|
-
res.irregularData = node.body.map((row) => {
|
|
143
|
-
return row.map((cell) => {
|
|
144
|
-
if (cell.type !== "styling" || cell.body.length !== 1) {
|
|
145
|
-
throw new KatexNodeToTexNodeError("Expecting cell.type==='\\styling' and cell.body.length===1", cell);
|
|
146
|
-
}
|
|
147
|
-
return katexNodeToTexNode(cell.body[0]);
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
249
|
break;
|
|
151
|
-
case "text":
|
|
250
|
+
case "text":
|
|
152
251
|
res.type = "text";
|
|
153
|
-
|
|
154
|
-
node.body.forEach((n) => {
|
|
155
|
-
if (n.mode !== "text") {
|
|
156
|
-
throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node);
|
|
157
|
-
}
|
|
158
|
-
str += n.text;
|
|
159
|
-
});
|
|
160
|
-
res.content = str;
|
|
252
|
+
res.content = node.content;
|
|
161
253
|
break;
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
254
|
+
case "comment":
|
|
255
|
+
res.type = "comment";
|
|
256
|
+
res.content = node.content;
|
|
257
|
+
break;
|
|
258
|
+
case "whitespace":
|
|
165
259
|
res.type = "empty";
|
|
166
|
-
res.content = " ";
|
|
167
260
|
break;
|
|
168
|
-
case "
|
|
169
|
-
|
|
170
|
-
|
|
261
|
+
case "newline":
|
|
262
|
+
res.type = "newline";
|
|
263
|
+
res.content = "\n";
|
|
264
|
+
break;
|
|
265
|
+
case "control":
|
|
266
|
+
if (node.content === "\\\\") {
|
|
171
267
|
res.type = "symbol";
|
|
172
|
-
res.content =
|
|
268
|
+
res.content = node.content;
|
|
173
269
|
break;
|
|
174
270
|
} else {
|
|
271
|
+
throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
|
|
175
272
|
}
|
|
176
|
-
}
|
|
177
|
-
case "color":
|
|
178
|
-
if (Array.isArray(node.body) && node.body.length === 1) {
|
|
179
|
-
const sub_body = node.body[0];
|
|
180
|
-
if (sub_body.type === "text") {
|
|
181
|
-
res.type = "unknownMacro";
|
|
182
|
-
const joined = sub_body.body.map((n) => n.text).join("");
|
|
183
|
-
if (/^\\[a-zA-Z]+$/.test(joined)) {
|
|
184
|
-
res.content = joined.substring(1);
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
|
|
190
|
-
default:
|
|
191
|
-
throw new KatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
|
|
192
273
|
break;
|
|
274
|
+
default:
|
|
275
|
+
throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
|
|
193
276
|
}
|
|
194
277
|
return res;
|
|
195
278
|
} catch (e) {
|
|
@@ -197,46 +280,365 @@ function katexNodeToTexNode(node) {
|
|
|
197
280
|
}
|
|
198
281
|
}
|
|
199
282
|
function parseTex(tex, customTexMacros) {
|
|
200
|
-
const
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
"\\cdots": "\\operatorname{SyMb01-cdots}",
|
|
207
|
-
"\\colon": "\\operatorname{SyMb01-colon}",
|
|
208
|
-
"\\imath": "\\operatorname{SyMb01-imath}",
|
|
209
|
-
"\\iiiint": "\\operatorname{SyMb01-iiiint}",
|
|
210
|
-
"\\jmath": "\\operatorname{SyMb01-jmath}",
|
|
211
|
-
"\\vdots": "\\operatorname{SyMb01-vdots}",
|
|
212
|
-
"\\notin": "\\operatorname{SyMb01-notin}",
|
|
213
|
-
"\\slash": "\\operatorname{SyMb01-slash}",
|
|
214
|
-
"\\LaTeX": "\\operatorname{SyMb01-LaTeX}",
|
|
215
|
-
"\\TeX": "\\operatorname{SyMb01-TeX}",
|
|
216
|
-
...customTexMacros
|
|
217
|
-
};
|
|
218
|
-
const options = {
|
|
219
|
-
macros,
|
|
220
|
-
displayMode: true,
|
|
221
|
-
strict: "ignore",
|
|
222
|
-
throwOnError: false
|
|
223
|
-
};
|
|
224
|
-
let treeArray = generateParseTree(tex, options);
|
|
225
|
-
let t = {
|
|
226
|
-
type: "ordgroup",
|
|
227
|
-
mode: "math",
|
|
228
|
-
body: treeArray,
|
|
229
|
-
loc: {}
|
|
230
|
-
};
|
|
231
|
-
return katexNodeToTexNode(t);
|
|
283
|
+
const parser = new LatexParser;
|
|
284
|
+
for (const [macro, replacement] of Object.entries(customTexMacros)) {
|
|
285
|
+
tex = tex.replaceAll(macro, replacement);
|
|
286
|
+
}
|
|
287
|
+
const node = parser.parse(tex);
|
|
288
|
+
return latexNodeToTexNode(node);
|
|
232
289
|
}
|
|
233
|
-
var
|
|
290
|
+
var UNARY_COMMANDS = [
|
|
291
|
+
"sqrt",
|
|
292
|
+
"text",
|
|
293
|
+
"arccos",
|
|
294
|
+
"arcsin",
|
|
295
|
+
"arctan",
|
|
296
|
+
"arg",
|
|
297
|
+
"bar",
|
|
298
|
+
"bold",
|
|
299
|
+
"boldsymbol",
|
|
300
|
+
"ddot",
|
|
301
|
+
"det",
|
|
302
|
+
"dim",
|
|
303
|
+
"dot",
|
|
304
|
+
"exp",
|
|
305
|
+
"gcd",
|
|
306
|
+
"hat",
|
|
307
|
+
"ker",
|
|
308
|
+
"mathbb",
|
|
309
|
+
"mathbf",
|
|
310
|
+
"mathcal",
|
|
311
|
+
"mathscr",
|
|
312
|
+
"mathsf",
|
|
313
|
+
"mathtt",
|
|
314
|
+
"mathrm",
|
|
315
|
+
"max",
|
|
316
|
+
"min",
|
|
317
|
+
"mod",
|
|
318
|
+
"operatorname",
|
|
319
|
+
"overbrace",
|
|
320
|
+
"overline",
|
|
321
|
+
"pmb",
|
|
322
|
+
"sup",
|
|
323
|
+
"rm",
|
|
324
|
+
"tilde",
|
|
325
|
+
"underbrace",
|
|
326
|
+
"underline",
|
|
327
|
+
"vec",
|
|
328
|
+
"widehat",
|
|
329
|
+
"widetilde"
|
|
330
|
+
];
|
|
331
|
+
var BINARY_COMMANDS = [
|
|
332
|
+
"frac",
|
|
333
|
+
"tfrac",
|
|
334
|
+
"binom",
|
|
335
|
+
"dbinom",
|
|
336
|
+
"dfrac",
|
|
337
|
+
"tbinom"
|
|
338
|
+
];
|
|
339
|
+
var EMPTY_NODE = { type: "empty", content: "" };
|
|
234
340
|
|
|
235
|
-
class
|
|
341
|
+
class LatexParserError extends Error {
|
|
342
|
+
constructor(message) {
|
|
343
|
+
super(message);
|
|
344
|
+
this.name = "LatexParserError";
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
class LatexParser {
|
|
349
|
+
space_sensitive;
|
|
350
|
+
newline_sensitive;
|
|
351
|
+
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
352
|
+
this.space_sensitive = space_sensitive;
|
|
353
|
+
this.newline_sensitive = newline_sensitive;
|
|
354
|
+
}
|
|
355
|
+
parse(latex) {
|
|
356
|
+
const results = [];
|
|
357
|
+
let pos = 0;
|
|
358
|
+
while (pos < latex.length) {
|
|
359
|
+
const [res, newPos] = this.parseNextExpr(latex, pos);
|
|
360
|
+
pos = newPos;
|
|
361
|
+
if (!this.space_sensitive && res.type === "whitespace") {
|
|
362
|
+
continue;
|
|
363
|
+
}
|
|
364
|
+
if (!this.newline_sensitive && res.type === "newline") {
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
if (res.type === "control" && res.content === "&") {
|
|
368
|
+
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
369
|
+
}
|
|
370
|
+
results.push(res);
|
|
371
|
+
}
|
|
372
|
+
if (results.length === 0) {
|
|
373
|
+
return EMPTY_NODE;
|
|
374
|
+
} else if (results.length === 1) {
|
|
375
|
+
return results[0];
|
|
376
|
+
} else {
|
|
377
|
+
return { type: "ordgroup", args: results };
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
parseNextExpr(latex, start) {
|
|
381
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(latex, start);
|
|
382
|
+
let sub = null;
|
|
383
|
+
let sup = null;
|
|
384
|
+
let num_prime = 0;
|
|
385
|
+
num_prime += eat_primes(latex, pos);
|
|
386
|
+
pos += num_prime;
|
|
387
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
388
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
389
|
+
num_prime += eat_primes(latex, pos);
|
|
390
|
+
pos += num_prime;
|
|
391
|
+
if (pos < latex.length && latex[pos] === "^") {
|
|
392
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
393
|
+
if (eat_primes(latex, pos) > 0) {
|
|
394
|
+
throw new LatexParserError("Double superscript");
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
} else if (pos < latex.length && latex[pos] === "^") {
|
|
398
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
399
|
+
if (eat_primes(latex, pos) > 0) {
|
|
400
|
+
throw new LatexParserError("Double superscript");
|
|
401
|
+
}
|
|
402
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
403
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(latex, pos + 1);
|
|
404
|
+
if (eat_primes(latex, pos) > 0) {
|
|
405
|
+
throw new LatexParserError("Double superscript");
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
if (sub !== null || sup !== null || num_prime > 0) {
|
|
410
|
+
const res = { type: "supsub", base };
|
|
411
|
+
if (sub) {
|
|
412
|
+
res.sub = sub;
|
|
413
|
+
}
|
|
414
|
+
if (num_prime > 0) {
|
|
415
|
+
res.sup = { type: "ordgroup", args: [] };
|
|
416
|
+
for (let i = 0;i < num_prime; i++) {
|
|
417
|
+
res.sup.args.push({ type: "command", content: "prime" });
|
|
418
|
+
}
|
|
419
|
+
if (sup) {
|
|
420
|
+
res.sup.args.push(sup);
|
|
421
|
+
}
|
|
422
|
+
if (res.sup.args.length === 1) {
|
|
423
|
+
res.sup = res.sup.args[0];
|
|
424
|
+
}
|
|
425
|
+
} else if (sup) {
|
|
426
|
+
res.sup = sup;
|
|
427
|
+
}
|
|
428
|
+
return [res, pos];
|
|
429
|
+
} else {
|
|
430
|
+
return [base, pos];
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
parseNextExprWithoutSupSub(latex, start) {
|
|
434
|
+
const firstChar = latex[start];
|
|
435
|
+
if (firstChar === "{") {
|
|
436
|
+
const posClosingBracket = find_closing_curly_bracket(latex, start);
|
|
437
|
+
const exprInside = latex.slice(start + 1, posClosingBracket);
|
|
438
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
439
|
+
} else if (firstChar === "\\") {
|
|
440
|
+
if (start + 1 >= latex.length) {
|
|
441
|
+
throw new LatexParserError("Expecting command name after \\");
|
|
442
|
+
}
|
|
443
|
+
const firstTwoChars = latex.slice(start, start + 2);
|
|
444
|
+
if (firstTwoChars === "\\\\") {
|
|
445
|
+
return [{ type: "control", content: "\\\\" }, start + 2];
|
|
446
|
+
} else if (firstTwoChars === "\\{" || firstTwoChars === "\\}") {
|
|
447
|
+
return [{ type: "token-parenthesis", content: firstTwoChars }, start + 2];
|
|
448
|
+
} else if (["\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
449
|
+
return [{ type: "token", content: firstTwoChars }, start + 2];
|
|
450
|
+
} else if (latex.slice(start).startsWith("\\begin{")) {
|
|
451
|
+
return this.parseBeginEndExpr(latex, start);
|
|
452
|
+
} else if (latex.slice(start).startsWith("\\left") && (start + 5 >= latex.length || !isalpha(latex[start + 5]))) {
|
|
453
|
+
return this.parseLeftRightExpr(latex, start);
|
|
454
|
+
} else {
|
|
455
|
+
return this.parseCommandExpr(latex, start);
|
|
456
|
+
}
|
|
457
|
+
} else if (firstChar === "%") {
|
|
458
|
+
let pos = start + 1;
|
|
459
|
+
while (pos < latex.length && latex[pos] !== "\n") {
|
|
460
|
+
pos += 1;
|
|
461
|
+
}
|
|
462
|
+
return [{ type: "comment", content: latex.slice(start + 1, pos) }, pos];
|
|
463
|
+
} else if (isdigit(firstChar)) {
|
|
464
|
+
let pos = start;
|
|
465
|
+
while (pos < latex.length && isdigit(latex[pos])) {
|
|
466
|
+
pos += 1;
|
|
467
|
+
}
|
|
468
|
+
return [{ type: "token-number", content: latex.slice(start, pos) }, pos];
|
|
469
|
+
} else if (isalpha(firstChar)) {
|
|
470
|
+
return [{ type: "token-letter-var", content: firstChar }, start + 1];
|
|
471
|
+
} else if ("+-*/=<>!".includes(firstChar)) {
|
|
472
|
+
return [{ type: "token-operator", content: firstChar }, start + 1];
|
|
473
|
+
} else if (".,;?".includes(firstChar)) {
|
|
474
|
+
return [{ type: "atom", content: firstChar }, start + 1];
|
|
475
|
+
} else if ("()[]".includes(firstChar)) {
|
|
476
|
+
return [{ type: "token-parenthesis", content: firstChar }, start + 1];
|
|
477
|
+
} else if (firstChar === "_") {
|
|
478
|
+
let [sub, pos] = this.parseNextExpr(latex, start + 1);
|
|
479
|
+
let sup = undefined;
|
|
480
|
+
if (pos < latex.length && latex[pos] === "^") {
|
|
481
|
+
[sup, pos] = this.parseNextExpr(latex, pos + 1);
|
|
482
|
+
}
|
|
483
|
+
return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
|
|
484
|
+
} else if (firstChar === "^") {
|
|
485
|
+
let [sup, pos] = this.parseNextExpr(latex, start + 1);
|
|
486
|
+
let sub = undefined;
|
|
487
|
+
if (pos < latex.length && latex[pos] === "_") {
|
|
488
|
+
[sub, pos] = this.parseNextExpr(latex, pos + 1);
|
|
489
|
+
}
|
|
490
|
+
return [{ type: "supsub", base: EMPTY_NODE, sub, sup }, pos];
|
|
491
|
+
} else if (firstChar === " ") {
|
|
492
|
+
let pos = start;
|
|
493
|
+
while (pos < latex.length && latex[pos] === " ") {
|
|
494
|
+
pos += 1;
|
|
495
|
+
}
|
|
496
|
+
return [{ type: "whitespace", content: latex.slice(start, pos) }, pos];
|
|
497
|
+
} else if (firstChar === "\n") {
|
|
498
|
+
return [{ type: "newline", content: "\n" }, start + 1];
|
|
499
|
+
} else if (firstChar === "\r") {
|
|
500
|
+
if (start + 1 < latex.length && latex[start + 1] === "\n") {
|
|
501
|
+
return [{ type: "newline", content: "\n" }, start + 2];
|
|
502
|
+
} else {
|
|
503
|
+
return [{ type: "newline", content: "\n" }, start + 1];
|
|
504
|
+
}
|
|
505
|
+
} else if (firstChar === "&") {
|
|
506
|
+
return [{ type: "control", content: "&" }, start + 1];
|
|
507
|
+
} else {
|
|
508
|
+
return [{ type: "unknown", content: firstChar }, start + 1];
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
parseCommandExpr(latex, start) {
|
|
512
|
+
assert(latex[start] === "\\");
|
|
513
|
+
let pos = start + 1;
|
|
514
|
+
const command = eat_command_name(latex, pos);
|
|
515
|
+
pos += command.length;
|
|
516
|
+
const paramNum = get_command_param_num(command);
|
|
517
|
+
if (paramNum === 0) {
|
|
518
|
+
return [{ type: "command", content: command }, pos];
|
|
519
|
+
} else if (paramNum === 1) {
|
|
520
|
+
if (command === "sqrt" && pos < latex.length && latex[pos] === "[") {
|
|
521
|
+
const posLeftSquareBracket = pos;
|
|
522
|
+
const posRightSquareBracket = find_closing_square_bracket(latex, pos);
|
|
523
|
+
const exprInside = latex.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
524
|
+
const exponent = this.parse(exprInside);
|
|
525
|
+
const [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, posRightSquareBracket + 1);
|
|
526
|
+
return [{ type: "command", content: command, arg1, exponent }, newPos];
|
|
527
|
+
} else if (command === "text") {
|
|
528
|
+
assert(latex[pos] === "{");
|
|
529
|
+
const posClosingBracket = find_closing_curly_bracket(latex, pos);
|
|
530
|
+
const text = latex.slice(pos + 1, posClosingBracket);
|
|
531
|
+
return [{ type: "text", content: text }, posClosingBracket + 1];
|
|
532
|
+
} else {
|
|
533
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(latex, pos);
|
|
534
|
+
return [{ type: "command", content: command, arg1 }, newPos];
|
|
535
|
+
}
|
|
536
|
+
} else if (paramNum === 2) {
|
|
537
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(latex, pos);
|
|
538
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(latex, pos1);
|
|
539
|
+
return [{ type: "command", content: command, arg1, arg2 }, pos2];
|
|
540
|
+
} else {
|
|
541
|
+
throw new Error("Invalid number of parameters");
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
parseLeftRightExpr(latex, start) {
|
|
545
|
+
assert(latex.slice(start, start + 5) === "\\left");
|
|
546
|
+
let pos = start + "\\left".length;
|
|
547
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
548
|
+
if (pos >= latex.length) {
|
|
549
|
+
throw new LatexParserError("Expecting delimiter after \\left");
|
|
550
|
+
}
|
|
551
|
+
const leftDelimiter = eat_parenthesis(latex, pos);
|
|
552
|
+
if (leftDelimiter === null) {
|
|
553
|
+
throw new LatexParserError("Invalid delimiter after \\left");
|
|
554
|
+
}
|
|
555
|
+
pos += leftDelimiter.length;
|
|
556
|
+
const exprInsideStart = pos;
|
|
557
|
+
const idx = find_closing_right_command(latex, pos);
|
|
558
|
+
if (idx === -1) {
|
|
559
|
+
throw new LatexParserError("No matching \\right");
|
|
560
|
+
}
|
|
561
|
+
const exprInsideEnd = idx;
|
|
562
|
+
pos = idx + "\\right".length;
|
|
563
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
564
|
+
if (pos >= latex.length) {
|
|
565
|
+
throw new LatexParserError("Expecting delimiter after \\right");
|
|
566
|
+
}
|
|
567
|
+
const rightDelimiter = eat_parenthesis(latex, pos);
|
|
568
|
+
if (rightDelimiter === null) {
|
|
569
|
+
throw new LatexParserError("Invalid delimiter after \\right");
|
|
570
|
+
}
|
|
571
|
+
pos += rightDelimiter.length;
|
|
572
|
+
const exprInside = latex.slice(exprInsideStart, exprInsideEnd);
|
|
573
|
+
const body = this.parse(exprInside);
|
|
574
|
+
const res = { type: "leftright", left: leftDelimiter, right: rightDelimiter, body };
|
|
575
|
+
return [res, pos];
|
|
576
|
+
}
|
|
577
|
+
parseBeginEndExpr(latex, start) {
|
|
578
|
+
assert(latex.slice(start, start + 7) === "\\begin{");
|
|
579
|
+
let pos = start + "\\begin".length;
|
|
580
|
+
const idx = find_closing_curly_bracket(latex, pos);
|
|
581
|
+
if (idx === -1) {
|
|
582
|
+
throw new LatexParserError("No matching } after \\begin{");
|
|
583
|
+
}
|
|
584
|
+
const envName = latex.slice(pos + 1, idx);
|
|
585
|
+
pos = idx + 1;
|
|
586
|
+
pos += eat_whitespaces(latex, pos).length;
|
|
587
|
+
const exprInsideStart = pos;
|
|
588
|
+
const endIdx = find_closing_end_command(latex, pos);
|
|
589
|
+
if (endIdx === -1) {
|
|
590
|
+
throw new LatexParserError("No matching \\end");
|
|
591
|
+
}
|
|
592
|
+
const exprInsideEnd = endIdx;
|
|
593
|
+
pos = endIdx + "\\end".length;
|
|
594
|
+
const closingIdx = find_closing_curly_bracket(latex, pos);
|
|
595
|
+
if (closingIdx === -1) {
|
|
596
|
+
throw new LatexParserError("No matching } after \\end{");
|
|
597
|
+
}
|
|
598
|
+
if (latex.slice(pos + 1, closingIdx) !== envName) {
|
|
599
|
+
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
600
|
+
}
|
|
601
|
+
let exprInside = latex.slice(exprInsideStart, exprInsideEnd);
|
|
602
|
+
exprInside = exprInside.trimEnd();
|
|
603
|
+
const body = this.parseAligned(exprInside);
|
|
604
|
+
const res = { type: "beginend", content: envName, body };
|
|
605
|
+
return [res, closingIdx + 1];
|
|
606
|
+
}
|
|
607
|
+
parseAligned(latex) {
|
|
608
|
+
let pos = 0;
|
|
609
|
+
const allRows = [];
|
|
610
|
+
let row = [];
|
|
611
|
+
allRows.push(row);
|
|
612
|
+
let group = { type: "ordgroup", args: [] };
|
|
613
|
+
row.push(group);
|
|
614
|
+
while (pos < latex.length) {
|
|
615
|
+
const [res, newPos] = this.parseNextExpr(latex, pos);
|
|
616
|
+
pos = newPos;
|
|
617
|
+
if (res.type === "whitespace") {
|
|
618
|
+
continue;
|
|
619
|
+
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
620
|
+
continue;
|
|
621
|
+
} else if (res.type === "control" && res.content === "\\\\") {
|
|
622
|
+
row = [];
|
|
623
|
+
group = { type: "ordgroup", args: [] };
|
|
624
|
+
row.push(group);
|
|
625
|
+
allRows.push(row);
|
|
626
|
+
} else if (res.type === "control" && res.content === "&") {
|
|
627
|
+
group = { type: "ordgroup", args: [] };
|
|
628
|
+
row.push(group);
|
|
629
|
+
} else {
|
|
630
|
+
group.args.push(res);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
return allRows;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
class LatexNodeToTexNodeError extends Error {
|
|
236
638
|
node;
|
|
237
639
|
constructor(message, node) {
|
|
238
640
|
super(message);
|
|
239
|
-
this.name = "
|
|
641
|
+
this.name = "LatexNodeToTexNodeError";
|
|
240
642
|
this.node = node;
|
|
241
643
|
}
|
|
242
644
|
}
|
|
@@ -263,6 +665,10 @@ var symbolMap = new Map([
|
|
|
263
665
|
["overline", "overline"],
|
|
264
666
|
["underline", "underline"],
|
|
265
667
|
["bar", "macron"],
|
|
668
|
+
["dbinom", "binom"],
|
|
669
|
+
["tbinom", "binom"],
|
|
670
|
+
["dfrac", "frac"],
|
|
671
|
+
["tfrac", "frac"],
|
|
266
672
|
["boldsymbol", "bold"],
|
|
267
673
|
["mathbf", "bold"],
|
|
268
674
|
["mathbb", "bb"],
|
|
@@ -514,7 +920,9 @@ function convertToken(token) {
|
|
|
514
920
|
if (/^[a-zA-Z0-9]$/.test(token)) {
|
|
515
921
|
return token;
|
|
516
922
|
} else if (token === "\\\\") {
|
|
517
|
-
return "
|
|
923
|
+
return "\\";
|
|
924
|
+
} else if (token == "/") {
|
|
925
|
+
return "\\/";
|
|
518
926
|
} else if (["\\$", "\\#", "\\&", "\\_"].includes(token)) {
|
|
519
927
|
return token;
|
|
520
928
|
} else if (token.startsWith("\\")) {
|
|
@@ -567,6 +975,7 @@ class TypstWriter {
|
|
|
567
975
|
no_need_space ||= str === "'";
|
|
568
976
|
no_need_space ||= /[0-9]$/.test(this.buffer) && /^[0-9]/.test(str);
|
|
569
977
|
no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
|
|
978
|
+
no_need_space ||= str.startsWith("\n");
|
|
570
979
|
no_need_space ||= this.buffer === "";
|
|
571
980
|
no_need_space ||= /[\s"_^{\(]$/.test(this.buffer);
|
|
572
981
|
if (!no_need_space) {
|
|
@@ -602,7 +1011,7 @@ class TypstWriter {
|
|
|
602
1011
|
this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
|
|
603
1012
|
return;
|
|
604
1013
|
}
|
|
605
|
-
if (
|
|
1014
|
+
if (base.type === "empty") {
|
|
606
1015
|
this.queue.push({ type: "text", content: "" });
|
|
607
1016
|
} else {
|
|
608
1017
|
this.appendWithBracketsIfNeeded(base);
|
|
@@ -696,8 +1105,6 @@ class TypstWriter {
|
|
|
696
1105
|
}, "");
|
|
697
1106
|
if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
698
1107
|
this.queue.push({ type: "symbol", content: text });
|
|
699
|
-
} else if (text.startsWith("SyMb01-")) {
|
|
700
|
-
this.queue.push({ type: "symbol", content: "\\" + text.substring(7) });
|
|
701
1108
|
} else {
|
|
702
1109
|
this.queue.push({ type: "symbol", content: "op" });
|
|
703
1110
|
this.queue.push({ type: "atom", content: "(" });
|
|
@@ -712,6 +1119,9 @@ class TypstWriter {
|
|
|
712
1119
|
this.append(arg0);
|
|
713
1120
|
this.queue.push({ type: "atom", content: ")" });
|
|
714
1121
|
this.insideFunctionDepth--;
|
|
1122
|
+
} else if (node.type === "newline") {
|
|
1123
|
+
this.queue.push({ type: "newline", content: "\n" });
|
|
1124
|
+
return;
|
|
715
1125
|
} else if (node.type === "align") {
|
|
716
1126
|
const matrix = node.irregularData;
|
|
717
1127
|
matrix.forEach((row, i) => {
|
|
@@ -734,6 +1144,7 @@ class TypstWriter {
|
|
|
734
1144
|
matrix.forEach((row, i) => {
|
|
735
1145
|
row.forEach((cell, j) => {
|
|
736
1146
|
if (cell.type === "ordgroup" && cell.args.length === 0) {
|
|
1147
|
+
this.queue.push({ type: "atom", content: "," });
|
|
737
1148
|
return;
|
|
738
1149
|
}
|
|
739
1150
|
this.append(cell);
|
|
@@ -754,6 +1165,8 @@ class TypstWriter {
|
|
|
754
1165
|
} else {
|
|
755
1166
|
throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
|
|
756
1167
|
}
|
|
1168
|
+
} else if (node.type === "comment") {
|
|
1169
|
+
this.queue.push({ type: "comment", content: node.content });
|
|
757
1170
|
} else {
|
|
758
1171
|
throw new TypstWriterError(`Unimplemented node type to append: ${node.type}`, node);
|
|
759
1172
|
}
|
|
@@ -775,6 +1188,12 @@ class TypstWriter {
|
|
|
775
1188
|
this.needSpaceAfterSingleItemScript = true;
|
|
776
1189
|
str = "";
|
|
777
1190
|
break;
|
|
1191
|
+
case "comment":
|
|
1192
|
+
str = `//${node.content}`;
|
|
1193
|
+
break;
|
|
1194
|
+
case "newline":
|
|
1195
|
+
str = "\n";
|
|
1196
|
+
break;
|
|
778
1197
|
default:
|
|
779
1198
|
throw new TypstWriterError(`Unexpected node type to stringify: ${node.type}`, node);
|
|
780
1199
|
}
|