tex2typst 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.js +868 -770
- package/dist/parser.d.ts +2 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +23 -2
- package/dist/writer.d.ts +4 -3
- package/package.json +2 -2
- package/src/index.ts +5 -4
- package/src/parser.ts +122 -104
- package/src/types.ts +30 -2
- package/src/writer.ts +274 -189
package/dist/index.js
CHANGED
|
@@ -1,607 +1,3 @@
|
|
|
1
|
-
// src/parser.ts
|
|
2
|
-
function assert(condition, message = "") {
|
|
3
|
-
if (!condition) {
|
|
4
|
-
throw new LatexParserError(message);
|
|
5
|
-
}
|
|
6
|
-
}
|
|
7
|
-
function get_command_param_num(command) {
|
|
8
|
-
if (UNARY_COMMANDS.includes(command)) {
|
|
9
|
-
return 1;
|
|
10
|
-
} else if (BINARY_COMMANDS.includes(command)) {
|
|
11
|
-
return 2;
|
|
12
|
-
} else {
|
|
13
|
-
return 0;
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
function find_closing_curly_bracket(tokens, start) {
|
|
17
|
-
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
18
|
-
let count = 1;
|
|
19
|
-
let pos = start + 1;
|
|
20
|
-
while (count > 0) {
|
|
21
|
-
if (pos >= tokens.length) {
|
|
22
|
-
throw new LatexParserError("Unmatched curly brackets");
|
|
23
|
-
}
|
|
24
|
-
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
25
|
-
count += 1;
|
|
26
|
-
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
27
|
-
count -= 1;
|
|
28
|
-
}
|
|
29
|
-
pos += 1;
|
|
30
|
-
}
|
|
31
|
-
return pos - 1;
|
|
32
|
-
}
|
|
33
|
-
function find_closing_square_bracket(tokens, start) {
|
|
34
|
-
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
35
|
-
let count = 1;
|
|
36
|
-
let pos = start + 1;
|
|
37
|
-
while (count > 0) {
|
|
38
|
-
if (pos >= tokens.length) {
|
|
39
|
-
throw new LatexParserError("Unmatched square brackets");
|
|
40
|
-
}
|
|
41
|
-
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
42
|
-
count += 1;
|
|
43
|
-
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
44
|
-
count -= 1;
|
|
45
|
-
}
|
|
46
|
-
pos += 1;
|
|
47
|
-
}
|
|
48
|
-
return pos - 1;
|
|
49
|
-
}
|
|
50
|
-
function isalpha(char) {
|
|
51
|
-
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
52
|
-
}
|
|
53
|
-
function isdigit(char) {
|
|
54
|
-
return "0123456789".includes(char);
|
|
55
|
-
}
|
|
56
|
-
function eat_whitespaces(tokens, start) {
|
|
57
|
-
let pos = start;
|
|
58
|
-
while (pos < tokens.length && ["whitespace", "newline"].includes(tokens[pos].type)) {
|
|
59
|
-
pos++;
|
|
60
|
-
}
|
|
61
|
-
return tokens.slice(start, pos);
|
|
62
|
-
}
|
|
63
|
-
function eat_parenthesis(tokens, start) {
|
|
64
|
-
const firstToken = tokens[start];
|
|
65
|
-
if (firstToken.type === "element" && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
|
|
66
|
-
return firstToken;
|
|
67
|
-
} else if (firstToken.type === "command" && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
|
|
68
|
-
return firstToken;
|
|
69
|
-
} else {
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
function eat_primes(tokens, start) {
|
|
74
|
-
let pos = start;
|
|
75
|
-
while (pos < tokens.length && token_eq(tokens[pos], { type: "element", value: "'" })) {
|
|
76
|
-
pos += 1;
|
|
77
|
-
}
|
|
78
|
-
return pos - start;
|
|
79
|
-
}
|
|
80
|
-
function eat_command_name(latex, start) {
|
|
81
|
-
let pos = start;
|
|
82
|
-
while (pos < latex.length && isalpha(latex[pos])) {
|
|
83
|
-
pos += 1;
|
|
84
|
-
}
|
|
85
|
-
return latex.substring(start, pos);
|
|
86
|
-
}
|
|
87
|
-
function find_closing_right_command(tokens, start) {
|
|
88
|
-
let count = 1;
|
|
89
|
-
let pos = start;
|
|
90
|
-
while (count > 0) {
|
|
91
|
-
if (pos >= tokens.length) {
|
|
92
|
-
return -1;
|
|
93
|
-
}
|
|
94
|
-
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
95
|
-
count += 1;
|
|
96
|
-
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
97
|
-
count -= 1;
|
|
98
|
-
}
|
|
99
|
-
pos += 1;
|
|
100
|
-
}
|
|
101
|
-
return pos - 1;
|
|
102
|
-
}
|
|
103
|
-
function find_closing_end_command(tokens, start) {
|
|
104
|
-
let count = 1;
|
|
105
|
-
let pos = start;
|
|
106
|
-
while (count > 0) {
|
|
107
|
-
if (pos >= tokens.length) {
|
|
108
|
-
return -1;
|
|
109
|
-
}
|
|
110
|
-
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
111
|
-
count += 1;
|
|
112
|
-
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
113
|
-
count -= 1;
|
|
114
|
-
}
|
|
115
|
-
pos += 1;
|
|
116
|
-
}
|
|
117
|
-
return pos - 1;
|
|
118
|
-
}
|
|
119
|
-
function find_closing_curly_bracket_char(latex, start) {
|
|
120
|
-
assert(latex[start] === "{");
|
|
121
|
-
let count = 1;
|
|
122
|
-
let pos = start + 1;
|
|
123
|
-
while (count > 0) {
|
|
124
|
-
if (pos >= latex.length) {
|
|
125
|
-
throw new LatexParserError("Unmatched curly brackets");
|
|
126
|
-
}
|
|
127
|
-
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
128
|
-
pos += 2;
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
if (latex[pos] === "{") {
|
|
132
|
-
count += 1;
|
|
133
|
-
} else if (latex[pos] === "}") {
|
|
134
|
-
count -= 1;
|
|
135
|
-
}
|
|
136
|
-
pos += 1;
|
|
137
|
-
}
|
|
138
|
-
return pos - 1;
|
|
139
|
-
}
|
|
140
|
-
function tokenize(latex) {
|
|
141
|
-
const tokens = [];
|
|
142
|
-
let pos = 0;
|
|
143
|
-
while (pos < latex.length) {
|
|
144
|
-
const firstChar = latex[pos];
|
|
145
|
-
let token;
|
|
146
|
-
switch (firstChar) {
|
|
147
|
-
case "%": {
|
|
148
|
-
let newPos = pos + 1;
|
|
149
|
-
while (newPos < latex.length && latex[newPos] !== "\n") {
|
|
150
|
-
newPos += 1;
|
|
151
|
-
}
|
|
152
|
-
token = { type: "comment", value: latex.slice(pos + 1, newPos) };
|
|
153
|
-
pos = newPos;
|
|
154
|
-
break;
|
|
155
|
-
}
|
|
156
|
-
case "{":
|
|
157
|
-
case "}":
|
|
158
|
-
case "_":
|
|
159
|
-
case "^":
|
|
160
|
-
case "&":
|
|
161
|
-
token = { type: "control", value: firstChar };
|
|
162
|
-
pos++;
|
|
163
|
-
break;
|
|
164
|
-
case "\n":
|
|
165
|
-
token = { type: "newline", value: firstChar };
|
|
166
|
-
pos++;
|
|
167
|
-
break;
|
|
168
|
-
case "\r": {
|
|
169
|
-
if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
|
|
170
|
-
token = { type: "newline", value: "\n" };
|
|
171
|
-
pos += 2;
|
|
172
|
-
} else {
|
|
173
|
-
token = { type: "newline", value: "\n" };
|
|
174
|
-
pos++;
|
|
175
|
-
}
|
|
176
|
-
break;
|
|
177
|
-
}
|
|
178
|
-
case " ": {
|
|
179
|
-
let newPos = pos;
|
|
180
|
-
while (newPos < latex.length && latex[newPos] === " ") {
|
|
181
|
-
newPos += 1;
|
|
182
|
-
}
|
|
183
|
-
token = { type: "whitespace", value: latex.slice(pos, newPos) };
|
|
184
|
-
pos = newPos;
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
case "\\": {
|
|
188
|
-
if (pos + 1 >= latex.length) {
|
|
189
|
-
throw new LatexParserError("Expecting command name after \\");
|
|
190
|
-
}
|
|
191
|
-
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
192
|
-
if (["\\\\", "\\,"].includes(firstTwoChars)) {
|
|
193
|
-
token = { type: "control", value: firstTwoChars };
|
|
194
|
-
} else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
195
|
-
token = { type: "element", value: firstTwoChars };
|
|
196
|
-
} else {
|
|
197
|
-
const command = eat_command_name(latex, pos + 1);
|
|
198
|
-
token = { type: "command", value: "\\" + command };
|
|
199
|
-
}
|
|
200
|
-
pos += token.value.length;
|
|
201
|
-
break;
|
|
202
|
-
}
|
|
203
|
-
default: {
|
|
204
|
-
if (isdigit(firstChar)) {
|
|
205
|
-
let newPos = pos;
|
|
206
|
-
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
207
|
-
newPos += 1;
|
|
208
|
-
}
|
|
209
|
-
token = { type: "element", value: latex.slice(pos, newPos) };
|
|
210
|
-
} else if (isalpha(firstChar)) {
|
|
211
|
-
token = { type: "element", value: firstChar };
|
|
212
|
-
} else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
|
|
213
|
-
token = { type: "element", value: firstChar };
|
|
214
|
-
} else {
|
|
215
|
-
token = { type: "unknown", value: firstChar };
|
|
216
|
-
}
|
|
217
|
-
pos += token.value.length;
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
tokens.push(token);
|
|
221
|
-
if (token.type === "command" && ["\\text", "\\begin", "\\end"].includes(token.value)) {
|
|
222
|
-
if (pos >= latex.length || latex[pos] !== "{") {
|
|
223
|
-
throw new LatexParserError(`No content for ${token.value} command`);
|
|
224
|
-
}
|
|
225
|
-
tokens.push({ type: "control", value: "{" });
|
|
226
|
-
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
227
|
-
pos++;
|
|
228
|
-
let textInside = latex.slice(pos, posClosingBracket);
|
|
229
|
-
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
230
|
-
for (const char of chars) {
|
|
231
|
-
textInside = textInside.replaceAll("\\" + char, char);
|
|
232
|
-
}
|
|
233
|
-
tokens.push({ type: "text", value: textInside });
|
|
234
|
-
tokens.push({ type: "control", value: "}" });
|
|
235
|
-
pos = posClosingBracket + 1;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
return tokens;
|
|
239
|
-
}
|
|
240
|
-
function token_eq(token1, token2) {
|
|
241
|
-
return token1.type == token2.type && token1.value == token2.value;
|
|
242
|
-
}
|
|
243
|
-
function parseTex(tex, customTexMacros) {
|
|
244
|
-
const parser = new LatexParser;
|
|
245
|
-
const original_tokens = tokenize(tex);
|
|
246
|
-
let processed_tokens = [];
|
|
247
|
-
for (const token of original_tokens) {
|
|
248
|
-
if (token.type === "command" && customTexMacros[token.value]) {
|
|
249
|
-
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
250
|
-
processed_tokens = processed_tokens.concat(expanded_tokens);
|
|
251
|
-
} else {
|
|
252
|
-
processed_tokens.push(token);
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
return parser.parse(processed_tokens);
|
|
256
|
-
}
|
|
257
|
-
var UNARY_COMMANDS = [
|
|
258
|
-
"sqrt",
|
|
259
|
-
"text",
|
|
260
|
-
"bar",
|
|
261
|
-
"bold",
|
|
262
|
-
"boldsymbol",
|
|
263
|
-
"ddot",
|
|
264
|
-
"dot",
|
|
265
|
-
"hat",
|
|
266
|
-
"mathbb",
|
|
267
|
-
"mathbf",
|
|
268
|
-
"mathcal",
|
|
269
|
-
"mathfrak",
|
|
270
|
-
"mathit",
|
|
271
|
-
"mathrm",
|
|
272
|
-
"mathscr",
|
|
273
|
-
"mathsf",
|
|
274
|
-
"mathtt",
|
|
275
|
-
"operatorname",
|
|
276
|
-
"overbrace",
|
|
277
|
-
"overline",
|
|
278
|
-
"pmb",
|
|
279
|
-
"rm",
|
|
280
|
-
"tilde",
|
|
281
|
-
"underbrace",
|
|
282
|
-
"underline",
|
|
283
|
-
"vec",
|
|
284
|
-
"widehat",
|
|
285
|
-
"widetilde"
|
|
286
|
-
];
|
|
287
|
-
var BINARY_COMMANDS = [
|
|
288
|
-
"frac",
|
|
289
|
-
"tfrac",
|
|
290
|
-
"binom",
|
|
291
|
-
"dbinom",
|
|
292
|
-
"dfrac",
|
|
293
|
-
"tbinom"
|
|
294
|
-
];
|
|
295
|
-
var EMPTY_NODE = { type: "empty", content: "" };
|
|
296
|
-
var LEFT_CURLY_BRACKET = { type: "control", value: "{" };
|
|
297
|
-
var RIGHT_CURLY_BRACKET = { type: "control", value: "}" };
|
|
298
|
-
var LEFT_SQUARE_BRACKET = { type: "element", value: "[" };
|
|
299
|
-
var RIGHT_SQUARE_BRACKET = { type: "element", value: "]" };
|
|
300
|
-
var LEFT_COMMAND = { type: "command", value: "\\left" };
|
|
301
|
-
var RIGHT_COMMAND = { type: "command", value: "\\right" };
|
|
302
|
-
var BEGIN_COMMAND = { type: "command", value: "\\begin" };
|
|
303
|
-
var END_COMMAND = { type: "command", value: "\\end" };
|
|
304
|
-
|
|
305
|
-
class LatexParserError extends Error {
|
|
306
|
-
constructor(message) {
|
|
307
|
-
super(message);
|
|
308
|
-
this.name = "LatexParserError";
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
var SUB_SYMBOL = { type: "control", value: "_" };
|
|
312
|
-
var SUP_SYMBOL = { type: "control", value: "^" };
|
|
313
|
-
|
|
314
|
-
class LatexParser {
|
|
315
|
-
space_sensitive;
|
|
316
|
-
newline_sensitive;
|
|
317
|
-
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
318
|
-
this.space_sensitive = space_sensitive;
|
|
319
|
-
this.newline_sensitive = newline_sensitive;
|
|
320
|
-
}
|
|
321
|
-
parse(tokens) {
|
|
322
|
-
const results = [];
|
|
323
|
-
let pos = 0;
|
|
324
|
-
while (pos < tokens.length) {
|
|
325
|
-
const results2 = [];
|
|
326
|
-
let pos2 = 0;
|
|
327
|
-
while (pos2 < tokens.length) {
|
|
328
|
-
const [res, newPos] = this.parseNextExpr(tokens, pos2);
|
|
329
|
-
pos2 = newPos;
|
|
330
|
-
if (!this.space_sensitive && res.type === "whitespace") {
|
|
331
|
-
continue;
|
|
332
|
-
}
|
|
333
|
-
if (!this.newline_sensitive && res.type === "newline") {
|
|
334
|
-
continue;
|
|
335
|
-
}
|
|
336
|
-
if (res.type === "control" && res.content === "&") {
|
|
337
|
-
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
338
|
-
}
|
|
339
|
-
results2.push(res);
|
|
340
|
-
}
|
|
341
|
-
if (results2.length === 0) {
|
|
342
|
-
return EMPTY_NODE;
|
|
343
|
-
} else if (results2.length === 1) {
|
|
344
|
-
return results2[0];
|
|
345
|
-
} else {
|
|
346
|
-
return { type: "ordgroup", content: "", args: results2 };
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
if (results.length === 0) {
|
|
350
|
-
return EMPTY_NODE;
|
|
351
|
-
} else if (results.length === 1) {
|
|
352
|
-
return results[0];
|
|
353
|
-
} else {
|
|
354
|
-
return { type: "ordgroup", content: "", args: results };
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
parseNextExpr(tokens, start) {
|
|
358
|
-
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
359
|
-
let sub = null;
|
|
360
|
-
let sup = null;
|
|
361
|
-
let num_prime = 0;
|
|
362
|
-
num_prime += eat_primes(tokens, pos);
|
|
363
|
-
pos += num_prime;
|
|
364
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
365
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
366
|
-
num_prime += eat_primes(tokens, pos);
|
|
367
|
-
pos += num_prime;
|
|
368
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
369
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
370
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
371
|
-
throw new LatexParserError("Double superscript");
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
375
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
376
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
377
|
-
throw new LatexParserError("Double superscript");
|
|
378
|
-
}
|
|
379
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
380
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
381
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
382
|
-
throw new LatexParserError("Double superscript");
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
if (sub !== null || sup !== null || num_prime > 0) {
|
|
387
|
-
const res = { base };
|
|
388
|
-
if (sub) {
|
|
389
|
-
res.sub = sub;
|
|
390
|
-
}
|
|
391
|
-
if (num_prime > 0) {
|
|
392
|
-
res.sup = { type: "ordgroup", content: "", args: [] };
|
|
393
|
-
for (let i = 0;i < num_prime; i++) {
|
|
394
|
-
res.sup.args.push({ type: "symbol", content: "\\prime" });
|
|
395
|
-
}
|
|
396
|
-
if (sup) {
|
|
397
|
-
res.sup.args.push(sup);
|
|
398
|
-
}
|
|
399
|
-
if (res.sup.args.length === 1) {
|
|
400
|
-
res.sup = res.sup.args[0];
|
|
401
|
-
}
|
|
402
|
-
} else if (sup) {
|
|
403
|
-
res.sup = sup;
|
|
404
|
-
}
|
|
405
|
-
return [{ type: "supsub", content: "", data: res }, pos];
|
|
406
|
-
} else {
|
|
407
|
-
return [base, pos];
|
|
408
|
-
}
|
|
409
|
-
}
|
|
410
|
-
parseNextExprWithoutSupSub(tokens, start) {
|
|
411
|
-
const firstToken = tokens[start];
|
|
412
|
-
const tokenType = firstToken.type;
|
|
413
|
-
switch (tokenType) {
|
|
414
|
-
case "element":
|
|
415
|
-
case "text":
|
|
416
|
-
case "comment":
|
|
417
|
-
case "whitespace":
|
|
418
|
-
case "newline":
|
|
419
|
-
return [{ type: tokenType, content: firstToken.value }, start + 1];
|
|
420
|
-
case "command":
|
|
421
|
-
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
422
|
-
return this.parseBeginEndExpr(tokens, start);
|
|
423
|
-
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
424
|
-
return this.parseLeftRightExpr(tokens, start);
|
|
425
|
-
} else {
|
|
426
|
-
return this.parseCommandExpr(tokens, start);
|
|
427
|
-
}
|
|
428
|
-
case "control":
|
|
429
|
-
const controlChar = firstToken.value;
|
|
430
|
-
switch (controlChar) {
|
|
431
|
-
case "{":
|
|
432
|
-
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
433
|
-
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
434
|
-
return [this.parse(exprInside), posClosingBracket + 1];
|
|
435
|
-
case "}":
|
|
436
|
-
throw new LatexParserError("Unmatched '}'");
|
|
437
|
-
case "\\\\":
|
|
438
|
-
return [{ type: "control", content: "\\\\" }, start + 1];
|
|
439
|
-
case "\\,":
|
|
440
|
-
return [{ type: "control", content: "\\," }, start + 1];
|
|
441
|
-
case "_": {
|
|
442
|
-
let [sub, pos] = this.parseNextExpr(tokens, start + 1);
|
|
443
|
-
let sup = undefined;
|
|
444
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
445
|
-
[sup, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
446
|
-
}
|
|
447
|
-
const subData = { base: EMPTY_NODE, sub, sup };
|
|
448
|
-
return [{ type: "supsub", content: "", data: subData }, pos];
|
|
449
|
-
}
|
|
450
|
-
case "^": {
|
|
451
|
-
let [sup, pos] = this.parseNextExpr(tokens, start + 1);
|
|
452
|
-
let sub = undefined;
|
|
453
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
454
|
-
[sub, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
455
|
-
}
|
|
456
|
-
const supData = { base: EMPTY_NODE, sub, sup };
|
|
457
|
-
return [{ type: "supsub", content: "", data: supData }, pos];
|
|
458
|
-
}
|
|
459
|
-
case "&":
|
|
460
|
-
return [{ type: "control", content: "&" }, start + 1];
|
|
461
|
-
default:
|
|
462
|
-
throw new LatexParserError("Unknown control sequence");
|
|
463
|
-
}
|
|
464
|
-
default:
|
|
465
|
-
throw new LatexParserError("Unknown token type");
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
parseCommandExpr(tokens, start) {
|
|
469
|
-
assert(tokens[start].type === "command");
|
|
470
|
-
const command = tokens[start].value;
|
|
471
|
-
let pos = start + 1;
|
|
472
|
-
if (["left", "right", "begin", "end"].includes(command.slice(1))) {
|
|
473
|
-
throw new LatexParserError("Unexpected command: " + command);
|
|
474
|
-
}
|
|
475
|
-
const paramNum = get_command_param_num(command.slice(1));
|
|
476
|
-
if (paramNum === 0) {
|
|
477
|
-
return [{ type: "symbol", content: command }, pos];
|
|
478
|
-
} else if (paramNum === 1) {
|
|
479
|
-
if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
480
|
-
const posLeftSquareBracket = pos;
|
|
481
|
-
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
482
|
-
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
483
|
-
const exponent = this.parse(exprInside);
|
|
484
|
-
const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
485
|
-
return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
|
|
486
|
-
} else if (command === "\\text") {
|
|
487
|
-
if (pos + 2 >= tokens.length) {
|
|
488
|
-
throw new LatexParserError("Expecting content for \\text command");
|
|
489
|
-
}
|
|
490
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
491
|
-
assert(tokens[pos + 1].type === "text");
|
|
492
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
493
|
-
const text = tokens[pos + 1].value;
|
|
494
|
-
return [{ type: "text", content: text }, pos + 3];
|
|
495
|
-
}
|
|
496
|
-
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
497
|
-
return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
|
|
498
|
-
} else if (paramNum === 2) {
|
|
499
|
-
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
500
|
-
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
501
|
-
return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
|
|
502
|
-
} else {
|
|
503
|
-
throw new Error("Invalid number of parameters");
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
parseLeftRightExpr(tokens, start) {
|
|
507
|
-
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
508
|
-
let pos = start + 1;
|
|
509
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
510
|
-
if (pos >= tokens.length) {
|
|
511
|
-
throw new LatexParserError("Expecting delimiter after \\left");
|
|
512
|
-
}
|
|
513
|
-
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
514
|
-
if (leftDelimiter === null) {
|
|
515
|
-
throw new LatexParserError("Invalid delimiter after \\left");
|
|
516
|
-
}
|
|
517
|
-
pos++;
|
|
518
|
-
const exprInsideStart = pos;
|
|
519
|
-
const idx = find_closing_right_command(tokens, pos);
|
|
520
|
-
if (idx === -1) {
|
|
521
|
-
throw new LatexParserError("No matching \\right");
|
|
522
|
-
}
|
|
523
|
-
const exprInsideEnd = idx;
|
|
524
|
-
pos = idx + 1;
|
|
525
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
526
|
-
if (pos >= tokens.length) {
|
|
527
|
-
throw new LatexParserError("Expecting \\right after \\left");
|
|
528
|
-
}
|
|
529
|
-
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
530
|
-
if (rightDelimiter === null) {
|
|
531
|
-
throw new LatexParserError("Invalid delimiter after \\right");
|
|
532
|
-
}
|
|
533
|
-
pos++;
|
|
534
|
-
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
535
|
-
const body = this.parse(exprInside);
|
|
536
|
-
const args = [
|
|
537
|
-
{ type: "element", content: leftDelimiter.value },
|
|
538
|
-
body,
|
|
539
|
-
{ type: "element", content: rightDelimiter.value }
|
|
540
|
-
];
|
|
541
|
-
const res = { type: "leftright", content: "", args };
|
|
542
|
-
return [res, pos];
|
|
543
|
-
}
|
|
544
|
-
parseBeginEndExpr(tokens, start) {
|
|
545
|
-
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
546
|
-
let pos = start + 1;
|
|
547
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
548
|
-
assert(tokens[pos + 1].type === "text");
|
|
549
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
550
|
-
const envName = tokens[pos + 1].value;
|
|
551
|
-
pos += 3;
|
|
552
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
553
|
-
const exprInsideStart = pos;
|
|
554
|
-
const endIdx = find_closing_end_command(tokens, pos);
|
|
555
|
-
if (endIdx === -1) {
|
|
556
|
-
throw new LatexParserError("No matching \\end");
|
|
557
|
-
}
|
|
558
|
-
const exprInsideEnd = endIdx;
|
|
559
|
-
pos = endIdx + 1;
|
|
560
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
561
|
-
assert(tokens[pos + 1].type === "text");
|
|
562
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
563
|
-
if (tokens[pos + 1].value !== envName) {
|
|
564
|
-
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
565
|
-
}
|
|
566
|
-
pos += 3;
|
|
567
|
-
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
568
|
-
while (exprInside.length > 0 && ["whitespace", "newline"].includes(exprInside[exprInside.length - 1].type)) {
|
|
569
|
-
exprInside.pop();
|
|
570
|
-
}
|
|
571
|
-
const body = this.parseAligned(exprInside);
|
|
572
|
-
const res = { type: "beginend", content: envName, data: body };
|
|
573
|
-
return [res, pos];
|
|
574
|
-
}
|
|
575
|
-
parseAligned(tokens) {
|
|
576
|
-
let pos = 0;
|
|
577
|
-
const allRows = [];
|
|
578
|
-
let row = [];
|
|
579
|
-
allRows.push(row);
|
|
580
|
-
let group = { type: "ordgroup", content: "", args: [] };
|
|
581
|
-
row.push(group);
|
|
582
|
-
while (pos < tokens.length) {
|
|
583
|
-
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
584
|
-
pos = newPos;
|
|
585
|
-
if (res.type === "whitespace") {
|
|
586
|
-
continue;
|
|
587
|
-
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
588
|
-
continue;
|
|
589
|
-
} else if (res.type === "control" && res.content === "\\\\") {
|
|
590
|
-
row = [];
|
|
591
|
-
group = { type: "ordgroup", content: "", args: [] };
|
|
592
|
-
row.push(group);
|
|
593
|
-
allRows.push(row);
|
|
594
|
-
} else if (res.type === "control" && res.content === "&") {
|
|
595
|
-
group = { type: "ordgroup", content: "", args: [] };
|
|
596
|
-
row.push(group);
|
|
597
|
-
} else {
|
|
598
|
-
group.args.push(res);
|
|
599
|
-
}
|
|
600
|
-
}
|
|
601
|
-
return allRows;
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
|
|
605
1
|
// src/map.ts
|
|
606
2
|
var symbolMap = new Map([
|
|
607
3
|
["nonumber", ""],
|
|
@@ -902,7 +298,788 @@ var symbolMap = new Map([
|
|
|
902
298
|
["TeX", "#TeX"]
|
|
903
299
|
]);
|
|
904
300
|
|
|
301
|
+
// src/parser.ts
|
|
302
|
+
function assert(condition, message = "") {
|
|
303
|
+
if (!condition) {
|
|
304
|
+
throw new LatexParserError(message);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
function get_command_param_num(command) {
|
|
308
|
+
if (UNARY_COMMANDS.includes(command)) {
|
|
309
|
+
return 1;
|
|
310
|
+
} else if (BINARY_COMMANDS.includes(command)) {
|
|
311
|
+
return 2;
|
|
312
|
+
} else {
|
|
313
|
+
return 0;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
function find_closing_curly_bracket(tokens, start) {
|
|
317
|
+
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
318
|
+
let count = 1;
|
|
319
|
+
let pos = start + 1;
|
|
320
|
+
while (count > 0) {
|
|
321
|
+
if (pos >= tokens.length) {
|
|
322
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
323
|
+
}
|
|
324
|
+
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
325
|
+
count += 1;
|
|
326
|
+
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
327
|
+
count -= 1;
|
|
328
|
+
}
|
|
329
|
+
pos += 1;
|
|
330
|
+
}
|
|
331
|
+
return pos - 1;
|
|
332
|
+
}
|
|
333
|
+
function find_closing_square_bracket(tokens, start) {
|
|
334
|
+
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
335
|
+
let count = 1;
|
|
336
|
+
let pos = start + 1;
|
|
337
|
+
while (count > 0) {
|
|
338
|
+
if (pos >= tokens.length) {
|
|
339
|
+
throw new LatexParserError("Unmatched square brackets");
|
|
340
|
+
}
|
|
341
|
+
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
342
|
+
count += 1;
|
|
343
|
+
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
344
|
+
count -= 1;
|
|
345
|
+
}
|
|
346
|
+
pos += 1;
|
|
347
|
+
}
|
|
348
|
+
return pos - 1;
|
|
349
|
+
}
|
|
350
|
+
function isalpha(char) {
|
|
351
|
+
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
352
|
+
}
|
|
353
|
+
function isdigit(char) {
|
|
354
|
+
return "0123456789".includes(char);
|
|
355
|
+
}
|
|
356
|
+
function eat_whitespaces(tokens, start) {
|
|
357
|
+
let pos = start;
|
|
358
|
+
while (pos < tokens.length && [4 /* WHITESPACE */, 5 /* NEWLINE */].includes(tokens[pos].type)) {
|
|
359
|
+
pos++;
|
|
360
|
+
}
|
|
361
|
+
return tokens.slice(start, pos);
|
|
362
|
+
}
|
|
363
|
+
function eat_parenthesis(tokens, start) {
|
|
364
|
+
const firstToken = tokens[start];
|
|
365
|
+
if (firstToken.type === 0 /* ELEMENT */ && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
|
|
366
|
+
return firstToken;
|
|
367
|
+
} else if (firstToken.type === 1 /* COMMAND */ && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
|
|
368
|
+
return firstToken;
|
|
369
|
+
} else {
|
|
370
|
+
return null;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
function eat_primes(tokens, start) {
|
|
374
|
+
let pos = start;
|
|
375
|
+
while (pos < tokens.length && token_eq(tokens[pos], { type: 0 /* ELEMENT */, value: "'" })) {
|
|
376
|
+
pos += 1;
|
|
377
|
+
}
|
|
378
|
+
return pos - start;
|
|
379
|
+
}
|
|
380
|
+
function eat_command_name(latex, start) {
|
|
381
|
+
let pos = start;
|
|
382
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
383
|
+
pos += 1;
|
|
384
|
+
}
|
|
385
|
+
return latex.substring(start, pos);
|
|
386
|
+
}
|
|
387
|
+
function find_closing_right_command(tokens, start) {
|
|
388
|
+
let count = 1;
|
|
389
|
+
let pos = start;
|
|
390
|
+
while (count > 0) {
|
|
391
|
+
if (pos >= tokens.length) {
|
|
392
|
+
return -1;
|
|
393
|
+
}
|
|
394
|
+
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
395
|
+
count += 1;
|
|
396
|
+
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
397
|
+
count -= 1;
|
|
398
|
+
}
|
|
399
|
+
pos += 1;
|
|
400
|
+
}
|
|
401
|
+
return pos - 1;
|
|
402
|
+
}
|
|
403
|
+
function find_closing_end_command(tokens, start) {
|
|
404
|
+
let count = 1;
|
|
405
|
+
let pos = start;
|
|
406
|
+
while (count > 0) {
|
|
407
|
+
if (pos >= tokens.length) {
|
|
408
|
+
return -1;
|
|
409
|
+
}
|
|
410
|
+
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
411
|
+
count += 1;
|
|
412
|
+
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
413
|
+
count -= 1;
|
|
414
|
+
}
|
|
415
|
+
pos += 1;
|
|
416
|
+
}
|
|
417
|
+
return pos - 1;
|
|
418
|
+
}
|
|
419
|
+
function find_closing_curly_bracket_char(latex, start) {
|
|
420
|
+
assert(latex[start] === "{");
|
|
421
|
+
let count = 1;
|
|
422
|
+
let pos = start + 1;
|
|
423
|
+
while (count > 0) {
|
|
424
|
+
if (pos >= latex.length) {
|
|
425
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
426
|
+
}
|
|
427
|
+
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
428
|
+
pos += 2;
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
if (latex[pos] === "{") {
|
|
432
|
+
count += 1;
|
|
433
|
+
} else if (latex[pos] === "}") {
|
|
434
|
+
count -= 1;
|
|
435
|
+
}
|
|
436
|
+
pos += 1;
|
|
437
|
+
}
|
|
438
|
+
return pos - 1;
|
|
439
|
+
}
|
|
440
|
+
function tokenize(latex) {
|
|
441
|
+
const tokens = [];
|
|
442
|
+
let pos = 0;
|
|
443
|
+
while (pos < latex.length) {
|
|
444
|
+
const firstChar = latex[pos];
|
|
445
|
+
let token;
|
|
446
|
+
switch (firstChar) {
|
|
447
|
+
case "%": {
|
|
448
|
+
let newPos = pos + 1;
|
|
449
|
+
while (newPos < latex.length && latex[newPos] !== "\n") {
|
|
450
|
+
newPos += 1;
|
|
451
|
+
}
|
|
452
|
+
token = { type: 3 /* COMMENT */, value: latex.slice(pos + 1, newPos) };
|
|
453
|
+
pos = newPos;
|
|
454
|
+
break;
|
|
455
|
+
}
|
|
456
|
+
case "{":
|
|
457
|
+
case "}":
|
|
458
|
+
case "_":
|
|
459
|
+
case "^":
|
|
460
|
+
case "&":
|
|
461
|
+
token = { type: 6 /* CONTROL */, value: firstChar };
|
|
462
|
+
pos++;
|
|
463
|
+
break;
|
|
464
|
+
case "\n":
|
|
465
|
+
token = { type: 5 /* NEWLINE */, value: firstChar };
|
|
466
|
+
pos++;
|
|
467
|
+
break;
|
|
468
|
+
case "\r": {
|
|
469
|
+
if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
|
|
470
|
+
token = { type: 5 /* NEWLINE */, value: "\n" };
|
|
471
|
+
pos += 2;
|
|
472
|
+
} else {
|
|
473
|
+
token = { type: 5 /* NEWLINE */, value: "\n" };
|
|
474
|
+
pos++;
|
|
475
|
+
}
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
case " ": {
|
|
479
|
+
let newPos = pos;
|
|
480
|
+
while (newPos < latex.length && latex[newPos] === " ") {
|
|
481
|
+
newPos += 1;
|
|
482
|
+
}
|
|
483
|
+
token = { type: 4 /* WHITESPACE */, value: latex.slice(pos, newPos) };
|
|
484
|
+
pos = newPos;
|
|
485
|
+
break;
|
|
486
|
+
}
|
|
487
|
+
case "\\": {
|
|
488
|
+
if (pos + 1 >= latex.length) {
|
|
489
|
+
throw new LatexParserError("Expecting command name after \\");
|
|
490
|
+
}
|
|
491
|
+
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
492
|
+
if (["\\\\", "\\,"].includes(firstTwoChars)) {
|
|
493
|
+
token = { type: 6 /* CONTROL */, value: firstTwoChars };
|
|
494
|
+
} else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
495
|
+
token = { type: 0 /* ELEMENT */, value: firstTwoChars };
|
|
496
|
+
} else {
|
|
497
|
+
const command = eat_command_name(latex, pos + 1);
|
|
498
|
+
token = { type: 1 /* COMMAND */, value: "\\" + command };
|
|
499
|
+
}
|
|
500
|
+
pos += token.value.length;
|
|
501
|
+
break;
|
|
502
|
+
}
|
|
503
|
+
default: {
|
|
504
|
+
if (isdigit(firstChar)) {
|
|
505
|
+
let newPos = pos;
|
|
506
|
+
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
507
|
+
newPos += 1;
|
|
508
|
+
}
|
|
509
|
+
token = { type: 0 /* ELEMENT */, value: latex.slice(pos, newPos) };
|
|
510
|
+
} else if (isalpha(firstChar)) {
|
|
511
|
+
token = { type: 0 /* ELEMENT */, value: firstChar };
|
|
512
|
+
} else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
|
|
513
|
+
token = { type: 0 /* ELEMENT */, value: firstChar };
|
|
514
|
+
} else {
|
|
515
|
+
token = { type: 7 /* UNKNOWN */, value: firstChar };
|
|
516
|
+
}
|
|
517
|
+
pos += token.value.length;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
tokens.push(token);
|
|
521
|
+
if (token.type === 1 /* COMMAND */ && ["\\text", "\\operatorname", "\\begin", "\\end"].includes(token.value)) {
|
|
522
|
+
if (pos >= latex.length || latex[pos] !== "{") {
|
|
523
|
+
throw new LatexParserError(`No content for ${token.value} command`);
|
|
524
|
+
}
|
|
525
|
+
tokens.push({ type: 6 /* CONTROL */, value: "{" });
|
|
526
|
+
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
527
|
+
pos++;
|
|
528
|
+
let textInside = latex.slice(pos, posClosingBracket);
|
|
529
|
+
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
530
|
+
for (const char of chars) {
|
|
531
|
+
textInside = textInside.replaceAll("\\" + char, char);
|
|
532
|
+
}
|
|
533
|
+
tokens.push({ type: 2 /* TEXT */, value: textInside });
|
|
534
|
+
tokens.push({ type: 6 /* CONTROL */, value: "}" });
|
|
535
|
+
pos = posClosingBracket + 1;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
return tokens;
|
|
539
|
+
}
|
|
540
|
+
function token_eq(token1, token2) {
|
|
541
|
+
return token1.type == token2.type && token1.value == token2.value;
|
|
542
|
+
}
|
|
543
|
+
function passIgnoreWhitespaceBeforeScriptMark(tokens) {
|
|
544
|
+
const is_script_mark = (token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
|
|
545
|
+
let out_tokens = [];
|
|
546
|
+
for (let i = 0;i < tokens.length; i++) {
|
|
547
|
+
if (tokens[i].type === 4 /* WHITESPACE */ && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
548
|
+
continue;
|
|
549
|
+
}
|
|
550
|
+
if (tokens[i].type === 4 /* WHITESPACE */ && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
out_tokens.push(tokens[i]);
|
|
554
|
+
}
|
|
555
|
+
return out_tokens;
|
|
556
|
+
}
|
|
557
|
+
function passExpandCustomTexMacros(tokens, customTexMacros) {
|
|
558
|
+
let out_tokens = [];
|
|
559
|
+
for (const token of tokens) {
|
|
560
|
+
if (token.type === 1 /* COMMAND */ && customTexMacros[token.value]) {
|
|
561
|
+
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
562
|
+
out_tokens = out_tokens.concat(expanded_tokens);
|
|
563
|
+
} else {
|
|
564
|
+
out_tokens.push(token);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
return out_tokens;
|
|
568
|
+
}
|
|
569
|
+
function parseTex(tex, customTexMacros) {
|
|
570
|
+
const parser = new LatexParser;
|
|
571
|
+
let tokens = tokenize(tex);
|
|
572
|
+
tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
|
|
573
|
+
tokens = passExpandCustomTexMacros(tokens, customTexMacros);
|
|
574
|
+
return parser.parse(tokens);
|
|
575
|
+
}
|
|
576
|
+
var UNARY_COMMANDS = [
|
|
577
|
+
"sqrt",
|
|
578
|
+
"text",
|
|
579
|
+
"bar",
|
|
580
|
+
"bold",
|
|
581
|
+
"boldsymbol",
|
|
582
|
+
"ddot",
|
|
583
|
+
"dot",
|
|
584
|
+
"hat",
|
|
585
|
+
"mathbb",
|
|
586
|
+
"mathbf",
|
|
587
|
+
"mathcal",
|
|
588
|
+
"mathfrak",
|
|
589
|
+
"mathit",
|
|
590
|
+
"mathrm",
|
|
591
|
+
"mathscr",
|
|
592
|
+
"mathsf",
|
|
593
|
+
"mathtt",
|
|
594
|
+
"operatorname",
|
|
595
|
+
"overbrace",
|
|
596
|
+
"overline",
|
|
597
|
+
"pmb",
|
|
598
|
+
"rm",
|
|
599
|
+
"tilde",
|
|
600
|
+
"underbrace",
|
|
601
|
+
"underline",
|
|
602
|
+
"vec",
|
|
603
|
+
"widehat",
|
|
604
|
+
"widetilde"
|
|
605
|
+
];
|
|
606
|
+
var BINARY_COMMANDS = [
|
|
607
|
+
"frac",
|
|
608
|
+
"tfrac",
|
|
609
|
+
"binom",
|
|
610
|
+
"dbinom",
|
|
611
|
+
"dfrac",
|
|
612
|
+
"tbinom"
|
|
613
|
+
];
|
|
614
|
+
var EMPTY_NODE = { type: "empty", content: "" };
|
|
615
|
+
var LEFT_CURLY_BRACKET = { type: 6 /* CONTROL */, value: "{" };
|
|
616
|
+
var RIGHT_CURLY_BRACKET = { type: 6 /* CONTROL */, value: "}" };
|
|
617
|
+
var LEFT_SQUARE_BRACKET = { type: 0 /* ELEMENT */, value: "[" };
|
|
618
|
+
var RIGHT_SQUARE_BRACKET = { type: 0 /* ELEMENT */, value: "]" };
|
|
619
|
+
var LEFT_COMMAND = { type: 1 /* COMMAND */, value: "\\left" };
|
|
620
|
+
var RIGHT_COMMAND = { type: 1 /* COMMAND */, value: "\\right" };
|
|
621
|
+
var BEGIN_COMMAND = { type: 1 /* COMMAND */, value: "\\begin" };
|
|
622
|
+
var END_COMMAND = { type: 1 /* COMMAND */, value: "\\end" };
|
|
623
|
+
|
|
624
|
+
class LatexParserError extends Error {
|
|
625
|
+
constructor(message) {
|
|
626
|
+
super(message);
|
|
627
|
+
this.name = "LatexParserError";
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
var SUB_SYMBOL = { type: 6 /* CONTROL */, value: "_" };
|
|
631
|
+
var SUP_SYMBOL = { type: 6 /* CONTROL */, value: "^" };
|
|
632
|
+
|
|
633
|
+
class LatexParser {
|
|
634
|
+
space_sensitive;
|
|
635
|
+
newline_sensitive;
|
|
636
|
+
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
637
|
+
this.space_sensitive = space_sensitive;
|
|
638
|
+
this.newline_sensitive = newline_sensitive;
|
|
639
|
+
}
|
|
640
|
+
parse(tokens) {
|
|
641
|
+
const results = [];
|
|
642
|
+
let pos = 0;
|
|
643
|
+
while (pos < tokens.length) {
|
|
644
|
+
const results2 = [];
|
|
645
|
+
let pos2 = 0;
|
|
646
|
+
while (pos2 < tokens.length) {
|
|
647
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos2);
|
|
648
|
+
pos2 = newPos;
|
|
649
|
+
if (!this.space_sensitive && res.type === "whitespace") {
|
|
650
|
+
continue;
|
|
651
|
+
}
|
|
652
|
+
if (!this.newline_sensitive && res.type === "newline") {
|
|
653
|
+
continue;
|
|
654
|
+
}
|
|
655
|
+
if (res.type === "control" && res.content === "&") {
|
|
656
|
+
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
657
|
+
}
|
|
658
|
+
results2.push(res);
|
|
659
|
+
}
|
|
660
|
+
if (results2.length === 0) {
|
|
661
|
+
return EMPTY_NODE;
|
|
662
|
+
} else if (results2.length === 1) {
|
|
663
|
+
return results2[0];
|
|
664
|
+
} else {
|
|
665
|
+
return { type: "ordgroup", content: "", args: results2 };
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
if (results.length === 0) {
|
|
669
|
+
return EMPTY_NODE;
|
|
670
|
+
} else if (results.length === 1) {
|
|
671
|
+
return results[0];
|
|
672
|
+
} else {
|
|
673
|
+
return { type: "ordgroup", content: "", args: results };
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
parseNextExpr(tokens, start) {
|
|
677
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
678
|
+
let sub = null;
|
|
679
|
+
let sup = null;
|
|
680
|
+
let num_prime = 0;
|
|
681
|
+
num_prime += eat_primes(tokens, pos);
|
|
682
|
+
pos += num_prime;
|
|
683
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
684
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
685
|
+
num_prime += eat_primes(tokens, pos);
|
|
686
|
+
pos += num_prime;
|
|
687
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
688
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
689
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
690
|
+
throw new LatexParserError("Double superscript");
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
694
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
695
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
696
|
+
throw new LatexParserError("Double superscript");
|
|
697
|
+
}
|
|
698
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
699
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
700
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
701
|
+
throw new LatexParserError("Double superscript");
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
if (sub !== null || sup !== null || num_prime > 0) {
|
|
706
|
+
const res = { base };
|
|
707
|
+
if (sub) {
|
|
708
|
+
res.sub = sub;
|
|
709
|
+
}
|
|
710
|
+
if (num_prime > 0) {
|
|
711
|
+
res.sup = { type: "ordgroup", content: "", args: [] };
|
|
712
|
+
for (let i = 0;i < num_prime; i++) {
|
|
713
|
+
res.sup.args.push({ type: "element", content: "'" });
|
|
714
|
+
}
|
|
715
|
+
if (sup) {
|
|
716
|
+
res.sup.args.push(sup);
|
|
717
|
+
}
|
|
718
|
+
if (res.sup.args.length === 1) {
|
|
719
|
+
res.sup = res.sup.args[0];
|
|
720
|
+
}
|
|
721
|
+
} else if (sup) {
|
|
722
|
+
res.sup = sup;
|
|
723
|
+
}
|
|
724
|
+
return [{ type: "supsub", content: "", data: res }, pos];
|
|
725
|
+
} else {
|
|
726
|
+
return [base, pos];
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
parseNextExprWithoutSupSub(tokens, start) {
|
|
730
|
+
const firstToken = tokens[start];
|
|
731
|
+
const tokenType = firstToken.type;
|
|
732
|
+
switch (tokenType) {
|
|
733
|
+
case 0 /* ELEMENT */:
|
|
734
|
+
return [{ type: "element", content: firstToken.value }, start + 1];
|
|
735
|
+
case 2 /* TEXT */:
|
|
736
|
+
return [{ type: "text", content: firstToken.value }, start + 1];
|
|
737
|
+
case 3 /* COMMENT */:
|
|
738
|
+
return [{ type: "comment", content: firstToken.value }, start + 1];
|
|
739
|
+
case 4 /* WHITESPACE */:
|
|
740
|
+
return [{ type: "whitespace", content: firstToken.value }, start + 1];
|
|
741
|
+
case 5 /* NEWLINE */:
|
|
742
|
+
return [{ type: "newline", content: firstToken.value }, start + 1];
|
|
743
|
+
case 1 /* COMMAND */:
|
|
744
|
+
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
745
|
+
return this.parseBeginEndExpr(tokens, start);
|
|
746
|
+
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
747
|
+
return this.parseLeftRightExpr(tokens, start);
|
|
748
|
+
} else {
|
|
749
|
+
return this.parseCommandExpr(tokens, start);
|
|
750
|
+
}
|
|
751
|
+
case 6 /* CONTROL */:
|
|
752
|
+
const controlChar = firstToken.value;
|
|
753
|
+
switch (controlChar) {
|
|
754
|
+
case "{":
|
|
755
|
+
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
756
|
+
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
757
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
758
|
+
case "}":
|
|
759
|
+
throw new LatexParserError("Unmatched '}'");
|
|
760
|
+
case "\\\\":
|
|
761
|
+
return [{ type: "control", content: "\\\\" }, start + 1];
|
|
762
|
+
case "\\,":
|
|
763
|
+
return [{ type: "control", content: "\\," }, start + 1];
|
|
764
|
+
case "_": {
|
|
765
|
+
return [EMPTY_NODE, start];
|
|
766
|
+
}
|
|
767
|
+
case "^": {
|
|
768
|
+
return [EMPTY_NODE, start];
|
|
769
|
+
}
|
|
770
|
+
case "&":
|
|
771
|
+
return [{ type: "control", content: "&" }, start + 1];
|
|
772
|
+
default:
|
|
773
|
+
throw new LatexParserError("Unknown control sequence");
|
|
774
|
+
}
|
|
775
|
+
default:
|
|
776
|
+
throw new LatexParserError("Unknown token type");
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
parseCommandExpr(tokens, start) {
|
|
780
|
+
assert(tokens[start].type === 1 /* COMMAND */);
|
|
781
|
+
const command = tokens[start].value;
|
|
782
|
+
let pos = start + 1;
|
|
783
|
+
if (["left", "right", "begin", "end"].includes(command.slice(1))) {
|
|
784
|
+
throw new LatexParserError("Unexpected command: " + command);
|
|
785
|
+
}
|
|
786
|
+
const paramNum = get_command_param_num(command.slice(1));
|
|
787
|
+
switch (paramNum) {
|
|
788
|
+
case 0:
|
|
789
|
+
if (!symbolMap.has(command.slice(1))) {
|
|
790
|
+
return [{ type: "unknownMacro", content: command }, pos];
|
|
791
|
+
}
|
|
792
|
+
return [{ type: "symbol", content: command }, pos];
|
|
793
|
+
case 1: {
|
|
794
|
+
if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
795
|
+
const posLeftSquareBracket = pos;
|
|
796
|
+
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
797
|
+
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
798
|
+
const exponent = this.parse(exprInside);
|
|
799
|
+
const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
800
|
+
return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
|
|
801
|
+
} else if (command === "\\text") {
|
|
802
|
+
if (pos + 2 >= tokens.length) {
|
|
803
|
+
throw new LatexParserError("Expecting content for \\text command");
|
|
804
|
+
}
|
|
805
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
806
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
807
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
808
|
+
const text = tokens[pos + 1].value;
|
|
809
|
+
return [{ type: "text", content: text }, pos + 3];
|
|
810
|
+
}
|
|
811
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
812
|
+
return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
|
|
813
|
+
}
|
|
814
|
+
case 2: {
|
|
815
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
816
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
817
|
+
return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
|
|
818
|
+
}
|
|
819
|
+
default:
|
|
820
|
+
throw new Error("Invalid number of parameters");
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
parseLeftRightExpr(tokens, start) {
|
|
824
|
+
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
825
|
+
let pos = start + 1;
|
|
826
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
827
|
+
if (pos >= tokens.length) {
|
|
828
|
+
throw new LatexParserError("Expecting delimiter after \\left");
|
|
829
|
+
}
|
|
830
|
+
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
831
|
+
if (leftDelimiter === null) {
|
|
832
|
+
throw new LatexParserError("Invalid delimiter after \\left");
|
|
833
|
+
}
|
|
834
|
+
pos++;
|
|
835
|
+
const exprInsideStart = pos;
|
|
836
|
+
const idx = find_closing_right_command(tokens, pos);
|
|
837
|
+
if (idx === -1) {
|
|
838
|
+
throw new LatexParserError("No matching \\right");
|
|
839
|
+
}
|
|
840
|
+
const exprInsideEnd = idx;
|
|
841
|
+
pos = idx + 1;
|
|
842
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
843
|
+
if (pos >= tokens.length) {
|
|
844
|
+
throw new LatexParserError("Expecting \\right after \\left");
|
|
845
|
+
}
|
|
846
|
+
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
847
|
+
if (rightDelimiter === null) {
|
|
848
|
+
throw new LatexParserError("Invalid delimiter after \\right");
|
|
849
|
+
}
|
|
850
|
+
pos++;
|
|
851
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
852
|
+
const body = this.parse(exprInside);
|
|
853
|
+
const args = [
|
|
854
|
+
{ type: "element", content: leftDelimiter.value },
|
|
855
|
+
body,
|
|
856
|
+
{ type: "element", content: rightDelimiter.value }
|
|
857
|
+
];
|
|
858
|
+
const res = { type: "leftright", content: "", args };
|
|
859
|
+
return [res, pos];
|
|
860
|
+
}
|
|
861
|
+
parseBeginEndExpr(tokens, start) {
|
|
862
|
+
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
863
|
+
let pos = start + 1;
|
|
864
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
865
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
866
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
867
|
+
const envName = tokens[pos + 1].value;
|
|
868
|
+
pos += 3;
|
|
869
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
870
|
+
const exprInsideStart = pos;
|
|
871
|
+
const endIdx = find_closing_end_command(tokens, pos);
|
|
872
|
+
if (endIdx === -1) {
|
|
873
|
+
throw new LatexParserError("No matching \\end");
|
|
874
|
+
}
|
|
875
|
+
const exprInsideEnd = endIdx;
|
|
876
|
+
pos = endIdx + 1;
|
|
877
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
878
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
879
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
880
|
+
if (tokens[pos + 1].value !== envName) {
|
|
881
|
+
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
882
|
+
}
|
|
883
|
+
pos += 3;
|
|
884
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
885
|
+
while (exprInside.length > 0 && [4 /* WHITESPACE */, 5 /* NEWLINE */].includes(exprInside[exprInside.length - 1].type)) {
|
|
886
|
+
exprInside.pop();
|
|
887
|
+
}
|
|
888
|
+
const body = this.parseAligned(exprInside);
|
|
889
|
+
const res = { type: "beginend", content: envName, data: body };
|
|
890
|
+
return [res, pos];
|
|
891
|
+
}
|
|
892
|
+
parseAligned(tokens) {
|
|
893
|
+
let pos = 0;
|
|
894
|
+
const allRows = [];
|
|
895
|
+
let row = [];
|
|
896
|
+
allRows.push(row);
|
|
897
|
+
let group = { type: "ordgroup", content: "", args: [] };
|
|
898
|
+
row.push(group);
|
|
899
|
+
while (pos < tokens.length) {
|
|
900
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
901
|
+
pos = newPos;
|
|
902
|
+
if (res.type === "whitespace") {
|
|
903
|
+
continue;
|
|
904
|
+
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
905
|
+
continue;
|
|
906
|
+
} else if (res.type === "control" && res.content === "\\\\") {
|
|
907
|
+
row = [];
|
|
908
|
+
group = { type: "ordgroup", content: "", args: [] };
|
|
909
|
+
row.push(group);
|
|
910
|
+
allRows.push(row);
|
|
911
|
+
} else if (res.type === "control" && res.content === "&") {
|
|
912
|
+
group = { type: "ordgroup", content: "", args: [] };
|
|
913
|
+
row.push(group);
|
|
914
|
+
} else {
|
|
915
|
+
group.args.push(res);
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
return allRows;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
905
922
|
// src/writer.ts
|
|
923
|
+
function convertTree(node) {
|
|
924
|
+
switch (node.type) {
|
|
925
|
+
case "empty":
|
|
926
|
+
case "whitespace":
|
|
927
|
+
return { type: "empty", content: "" };
|
|
928
|
+
case "ordgroup":
|
|
929
|
+
return {
|
|
930
|
+
type: "group",
|
|
931
|
+
content: "",
|
|
932
|
+
args: node.args.map(convertTree)
|
|
933
|
+
};
|
|
934
|
+
case "element":
|
|
935
|
+
case "symbol":
|
|
936
|
+
return { type: "symbol", content: convertToken(node.content) };
|
|
937
|
+
case "text":
|
|
938
|
+
return { type: "text", content: node.content };
|
|
939
|
+
case "comment":
|
|
940
|
+
return { type: "comment", content: node.content };
|
|
941
|
+
case "supsub": {
|
|
942
|
+
let { base, sup, sub } = node.data;
|
|
943
|
+
if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
|
|
944
|
+
return {
|
|
945
|
+
type: "binaryFunc",
|
|
946
|
+
content: "overbrace",
|
|
947
|
+
args: [convertTree(base.args[0]), convertTree(sup)]
|
|
948
|
+
};
|
|
949
|
+
} else if (base && base.type === "unaryFunc" && base.content === "\\underbrace" && sub) {
|
|
950
|
+
return {
|
|
951
|
+
type: "binaryFunc",
|
|
952
|
+
content: "underbrace",
|
|
953
|
+
args: [convertTree(base.args[0]), convertTree(sub)]
|
|
954
|
+
};
|
|
955
|
+
}
|
|
956
|
+
const data = {
|
|
957
|
+
base: convertTree(base)
|
|
958
|
+
};
|
|
959
|
+
if (data.base.type === "empty") {
|
|
960
|
+
data.base = { type: "text", content: "" };
|
|
961
|
+
}
|
|
962
|
+
if (sup) {
|
|
963
|
+
data.sup = convertTree(sup);
|
|
964
|
+
}
|
|
965
|
+
if (sub) {
|
|
966
|
+
data.sub = convertTree(sub);
|
|
967
|
+
}
|
|
968
|
+
return {
|
|
969
|
+
type: "supsub",
|
|
970
|
+
content: "",
|
|
971
|
+
data
|
|
972
|
+
};
|
|
973
|
+
}
|
|
974
|
+
case "leftright": {
|
|
975
|
+
const [left, body, right] = node.args;
|
|
976
|
+
const group = {
|
|
977
|
+
type: "group",
|
|
978
|
+
content: "",
|
|
979
|
+
args: node.args.map(convertTree)
|
|
980
|
+
};
|
|
981
|
+
if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
|
|
982
|
+
return group;
|
|
983
|
+
}
|
|
984
|
+
return {
|
|
985
|
+
type: "unaryFunc",
|
|
986
|
+
content: "lr",
|
|
987
|
+
args: [group]
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
case "binaryFunc": {
|
|
991
|
+
return {
|
|
992
|
+
type: "binaryFunc",
|
|
993
|
+
content: convertToken(node.content),
|
|
994
|
+
args: node.args.map(convertTree)
|
|
995
|
+
};
|
|
996
|
+
}
|
|
997
|
+
case "unaryFunc": {
|
|
998
|
+
const arg0 = convertTree(node.args[0]);
|
|
999
|
+
if (node.content === "\\sqrt" && node.data) {
|
|
1000
|
+
const data = convertTree(node.data);
|
|
1001
|
+
return {
|
|
1002
|
+
type: "binaryFunc",
|
|
1003
|
+
content: "root",
|
|
1004
|
+
args: [data, arg0]
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
if (node.content === "\\mathbf") {
|
|
1008
|
+
const inner = {
|
|
1009
|
+
type: "unaryFunc",
|
|
1010
|
+
content: "bold",
|
|
1011
|
+
args: [arg0]
|
|
1012
|
+
};
|
|
1013
|
+
return {
|
|
1014
|
+
type: "unaryFunc",
|
|
1015
|
+
content: "upright",
|
|
1016
|
+
args: [inner]
|
|
1017
|
+
};
|
|
1018
|
+
}
|
|
1019
|
+
if (node.content === "\\mathbb" && arg0.type === "symbol" && /^[A-Z]$/.test(arg0.content)) {
|
|
1020
|
+
return {
|
|
1021
|
+
type: "symbol",
|
|
1022
|
+
content: arg0.content + arg0.content
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
if (node.content === "\\operatorname") {
|
|
1026
|
+
const body = node.args;
|
|
1027
|
+
if (body.length !== 1 || body[0].type !== "text") {
|
|
1028
|
+
throw new TypstWriterError(`Expecting body of \\operatorname to be text but got`, node);
|
|
1029
|
+
}
|
|
1030
|
+
const text = body[0].content;
|
|
1031
|
+
if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
1032
|
+
return {
|
|
1033
|
+
type: "symbol",
|
|
1034
|
+
content: text
|
|
1035
|
+
};
|
|
1036
|
+
} else {
|
|
1037
|
+
return {
|
|
1038
|
+
type: "unaryFunc",
|
|
1039
|
+
content: "op",
|
|
1040
|
+
args: [{ type: "text", content: text }]
|
|
1041
|
+
};
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
return {
|
|
1045
|
+
type: "unaryFunc",
|
|
1046
|
+
content: convertToken(node.content),
|
|
1047
|
+
args: node.args.map(convertTree)
|
|
1048
|
+
};
|
|
1049
|
+
}
|
|
1050
|
+
case "newline":
|
|
1051
|
+
return { type: "newline", content: "\n" };
|
|
1052
|
+
case "beginend": {
|
|
1053
|
+
const matrix = node.data;
|
|
1054
|
+
const data = matrix.map((row) => row.map(convertTree));
|
|
1055
|
+
if (node.content.startsWith("align")) {
|
|
1056
|
+
return {
|
|
1057
|
+
type: "align",
|
|
1058
|
+
content: "",
|
|
1059
|
+
data
|
|
1060
|
+
};
|
|
1061
|
+
} else {
|
|
1062
|
+
return {
|
|
1063
|
+
type: "matrix",
|
|
1064
|
+
content: "mat",
|
|
1065
|
+
data
|
|
1066
|
+
};
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
case "unknownMacro":
|
|
1070
|
+
return { type: "unknown", content: convertToken(node.content) };
|
|
1071
|
+
case "control":
|
|
1072
|
+
if (node.content === "\\\\") {
|
|
1073
|
+
return { type: "symbol", content: "\\" };
|
|
1074
|
+
} else if (node.content === "\\,") {
|
|
1075
|
+
return { type: "symbol", content: "thin" };
|
|
1076
|
+
} else {
|
|
1077
|
+
throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
|
|
1078
|
+
}
|
|
1079
|
+
default:
|
|
1080
|
+
throw new TypstWriterError(`Unimplemented node type: ${node.type}`, node);
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
906
1083
|
function convertToken(token) {
|
|
907
1084
|
if (/^[a-zA-Z0-9]$/.test(token)) {
|
|
908
1085
|
return token;
|
|
@@ -975,142 +1152,74 @@ class TypstWriter {
|
|
|
975
1152
|
this.buffer += str;
|
|
976
1153
|
}
|
|
977
1154
|
append(node) {
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
} else if (node.type === "symbol") {
|
|
989
|
-
this.queue.push({ type: "symbol", content: node.content });
|
|
990
|
-
} else if (node.type === "text") {
|
|
991
|
-
this.queue.push(node);
|
|
992
|
-
} else if (node.type === "supsub") {
|
|
993
|
-
let { base, sup, sub } = node.data;
|
|
994
|
-
if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
|
|
995
|
-
this.append({ type: "binaryFunc", content: "\\overbrace", args: [base.args[0], sup] });
|
|
996
|
-
return;
|
|
997
|
-
} else if (base && base.type === "unaryFunc" && base.content === "\\underbrace" && sub) {
|
|
998
|
-
this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
|
|
999
|
-
return;
|
|
1155
|
+
switch (node.type) {
|
|
1156
|
+
case "empty":
|
|
1157
|
+
break;
|
|
1158
|
+
case "symbol": {
|
|
1159
|
+
let content = node.content;
|
|
1160
|
+
if (node.content === "," && this.insideFunctionDepth > 0) {
|
|
1161
|
+
content = "comma";
|
|
1162
|
+
}
|
|
1163
|
+
this.queue.push({ type: "symbol", content });
|
|
1164
|
+
break;
|
|
1000
1165
|
}
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1166
|
+
case "text":
|
|
1167
|
+
case "comment":
|
|
1168
|
+
case "newline":
|
|
1169
|
+
this.queue.push(node);
|
|
1170
|
+
break;
|
|
1171
|
+
case "group":
|
|
1172
|
+
for (const item of node.args) {
|
|
1173
|
+
this.append(item);
|
|
1174
|
+
}
|
|
1175
|
+
break;
|
|
1176
|
+
case "supsub": {
|
|
1177
|
+
let { base, sup, sub } = node.data;
|
|
1004
1178
|
this.appendWithBracketsIfNeeded(base);
|
|
1179
|
+
let trailing_space_needed = false;
|
|
1180
|
+
const has_prime = sup && sup.type === "symbol" && sup.content === "\'";
|
|
1181
|
+
if (has_prime) {
|
|
1182
|
+
this.queue.push({ type: "atom", content: "\'" });
|
|
1183
|
+
trailing_space_needed = false;
|
|
1184
|
+
}
|
|
1185
|
+
if (sub) {
|
|
1186
|
+
this.queue.push({ type: "atom", content: "_" });
|
|
1187
|
+
trailing_space_needed = this.appendWithBracketsIfNeeded(sub);
|
|
1188
|
+
}
|
|
1189
|
+
if (sup && !has_prime) {
|
|
1190
|
+
this.queue.push({ type: "atom", content: "^" });
|
|
1191
|
+
trailing_space_needed = this.appendWithBracketsIfNeeded(sup);
|
|
1192
|
+
}
|
|
1193
|
+
if (trailing_space_needed) {
|
|
1194
|
+
this.queue.push({ type: "softSpace", content: "" });
|
|
1195
|
+
}
|
|
1196
|
+
break;
|
|
1005
1197
|
}
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
this.queue.push({ type: "atom", content: "\'" });
|
|
1010
|
-
trailing_space_needed = false;
|
|
1011
|
-
}
|
|
1012
|
-
if (sub) {
|
|
1013
|
-
this.queue.push({ type: "atom", content: "_" });
|
|
1014
|
-
trailing_space_needed = this.appendWithBracketsIfNeeded(sub);
|
|
1015
|
-
}
|
|
1016
|
-
if (sup && !has_prime) {
|
|
1017
|
-
this.queue.push({ type: "atom", content: "^" });
|
|
1018
|
-
trailing_space_needed = this.appendWithBracketsIfNeeded(sup);
|
|
1019
|
-
}
|
|
1020
|
-
if (trailing_space_needed) {
|
|
1021
|
-
this.queue.push({ type: "softSpace", content: "" });
|
|
1022
|
-
}
|
|
1023
|
-
} else if (node.type === "leftright") {
|
|
1024
|
-
const [left, body, right] = node.args;
|
|
1025
|
-
if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
|
|
1026
|
-
this.append(left);
|
|
1027
|
-
this.append(body);
|
|
1028
|
-
this.append(right);
|
|
1029
|
-
return;
|
|
1030
|
-
}
|
|
1031
|
-
const func_symbol = { type: "symbol", content: "lr" };
|
|
1032
|
-
this.queue.push(func_symbol);
|
|
1033
|
-
this.insideFunctionDepth++;
|
|
1034
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1035
|
-
this.append(left);
|
|
1036
|
-
this.append(body);
|
|
1037
|
-
this.append(right);
|
|
1038
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1039
|
-
this.insideFunctionDepth--;
|
|
1040
|
-
} else if (node.type === "binaryFunc") {
|
|
1041
|
-
const func_symbol = { type: "symbol", content: node.content };
|
|
1042
|
-
const [arg0, arg1] = node.args;
|
|
1043
|
-
this.queue.push(func_symbol);
|
|
1044
|
-
this.insideFunctionDepth++;
|
|
1045
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1046
|
-
this.append(arg0);
|
|
1047
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1048
|
-
this.append(arg1);
|
|
1049
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1050
|
-
this.insideFunctionDepth--;
|
|
1051
|
-
} else if (node.type === "unaryFunc") {
|
|
1052
|
-
const func_symbol = { type: "symbol", content: node.content };
|
|
1053
|
-
const arg0 = node.args[0];
|
|
1054
|
-
if (node.content === "\\sqrt" && node.data) {
|
|
1055
|
-
func_symbol.content = "root";
|
|
1198
|
+
case "binaryFunc": {
|
|
1199
|
+
const func_symbol = { type: "symbol", content: node.content };
|
|
1200
|
+
const [arg0, arg1] = node.args;
|
|
1056
1201
|
this.queue.push(func_symbol);
|
|
1057
1202
|
this.insideFunctionDepth++;
|
|
1058
1203
|
this.queue.push({ type: "atom", content: "(" });
|
|
1059
|
-
this.append(node.data);
|
|
1060
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1061
1204
|
this.append(arg0);
|
|
1205
|
+
this.queue.push({ type: "atom", content: "," });
|
|
1206
|
+
this.append(arg1);
|
|
1062
1207
|
this.queue.push({ type: "atom", content: ")" });
|
|
1063
1208
|
this.insideFunctionDepth--;
|
|
1064
|
-
|
|
1065
|
-
}
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1209
|
+
break;
|
|
1210
|
+
}
|
|
1211
|
+
case "unaryFunc": {
|
|
1212
|
+
const func_symbol = { type: "symbol", content: node.content };
|
|
1213
|
+
const arg0 = node.args[0];
|
|
1069
1214
|
this.queue.push(func_symbol);
|
|
1070
1215
|
this.insideFunctionDepth++;
|
|
1071
1216
|
this.queue.push({ type: "atom", content: "(" });
|
|
1072
1217
|
this.append(arg0);
|
|
1073
1218
|
this.queue.push({ type: "atom", content: ")" });
|
|
1074
1219
|
this.insideFunctionDepth--;
|
|
1075
|
-
|
|
1076
|
-
this.insideFunctionDepth--;
|
|
1077
|
-
return;
|
|
1078
|
-
} else if (node.content === "\\mathbb") {
|
|
1079
|
-
const body = node.args[0];
|
|
1080
|
-
if (body.type === "element" && /^[A-Z]$/.test(body.content)) {
|
|
1081
|
-
this.queue.push({ type: "symbol", content: body.content + body.content });
|
|
1082
|
-
return;
|
|
1083
|
-
}
|
|
1084
|
-
} else if (node.content === "\\operatorname") {
|
|
1085
|
-
let body = node.args;
|
|
1086
|
-
if (body.length === 1 && body[0].type == "ordgroup") {
|
|
1087
|
-
body = body[0].args;
|
|
1088
|
-
}
|
|
1089
|
-
const text = body.reduce((buff, n) => {
|
|
1090
|
-
buff += convertToken(n.content);
|
|
1091
|
-
return buff;
|
|
1092
|
-
}, "");
|
|
1093
|
-
if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
1094
|
-
this.queue.push({ type: "symbol", content: text });
|
|
1095
|
-
} else {
|
|
1096
|
-
this.queue.push({ type: "symbol", content: "op" });
|
|
1097
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1098
|
-
this.queue.push({ type: "text", content: text });
|
|
1099
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1100
|
-
}
|
|
1101
|
-
return;
|
|
1220
|
+
break;
|
|
1102
1221
|
}
|
|
1103
|
-
|
|
1104
|
-
this.insideFunctionDepth++;
|
|
1105
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1106
|
-
this.append(arg0);
|
|
1107
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1108
|
-
this.insideFunctionDepth--;
|
|
1109
|
-
} else if (node.type === "newline") {
|
|
1110
|
-
this.queue.push({ type: "newline", content: "\n" });
|
|
1111
|
-
return;
|
|
1112
|
-
} else if (node.type === "beginend") {
|
|
1113
|
-
if (node.content.startsWith("align")) {
|
|
1222
|
+
case "align": {
|
|
1114
1223
|
const matrix = node.data;
|
|
1115
1224
|
matrix.forEach((row, i) => {
|
|
1116
1225
|
row.forEach((cell, j) => {
|
|
@@ -1120,10 +1229,12 @@ class TypstWriter {
|
|
|
1120
1229
|
this.append(cell);
|
|
1121
1230
|
});
|
|
1122
1231
|
if (i < matrix.length - 1) {
|
|
1123
|
-
this.queue.push({ type: "symbol", content: "
|
|
1232
|
+
this.queue.push({ type: "symbol", content: "\\" });
|
|
1124
1233
|
}
|
|
1125
1234
|
});
|
|
1126
|
-
|
|
1235
|
+
break;
|
|
1236
|
+
}
|
|
1237
|
+
case "matrix": {
|
|
1127
1238
|
const matrix = node.data;
|
|
1128
1239
|
this.queue.push({ type: "symbol", content: "mat" });
|
|
1129
1240
|
this.insideFunctionDepth++;
|
|
@@ -1131,10 +1242,6 @@ class TypstWriter {
|
|
|
1131
1242
|
this.queue.push({ type: "symbol", content: "delim: #none, " });
|
|
1132
1243
|
matrix.forEach((row, i) => {
|
|
1133
1244
|
row.forEach((cell, j) => {
|
|
1134
|
-
if (cell.type === "ordgroup" && cell.args.length === 0) {
|
|
1135
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1136
|
-
return;
|
|
1137
|
-
}
|
|
1138
1245
|
this.append(cell);
|
|
1139
1246
|
if (j < row.length - 1) {
|
|
1140
1247
|
this.queue.push({ type: "atom", content: "," });
|
|
@@ -1147,37 +1254,44 @@ class TypstWriter {
|
|
|
1147
1254
|
});
|
|
1148
1255
|
this.queue.push({ type: "atom", content: ")" });
|
|
1149
1256
|
this.insideFunctionDepth--;
|
|
1257
|
+
break;
|
|
1150
1258
|
}
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
} else if (node.type === "control") {
|
|
1159
|
-
if (node.content === "\\\\") {
|
|
1160
|
-
this.queue.push({ type: "symbol", content: node.content });
|
|
1161
|
-
} else if (node.content === "\\,") {
|
|
1162
|
-
this.queue.push({ type: "symbol", content: "thin" });
|
|
1163
|
-
} else {
|
|
1164
|
-
throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
|
|
1259
|
+
case "unknown": {
|
|
1260
|
+
if (this.nonStrict) {
|
|
1261
|
+
this.queue.push({ type: "symbol", content: node.content });
|
|
1262
|
+
} else {
|
|
1263
|
+
throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
|
|
1264
|
+
}
|
|
1265
|
+
break;
|
|
1165
1266
|
}
|
|
1166
|
-
|
|
1167
|
-
|
|
1267
|
+
default:
|
|
1268
|
+
throw new TypstWriterError(`Unimplemented node type to append: ${node.type}`, node);
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
appendWithBracketsIfNeeded(node) {
|
|
1272
|
+
const is_single = !["group", "supsub", "empty"].includes(node.type);
|
|
1273
|
+
if (is_single) {
|
|
1274
|
+
this.append(node);
|
|
1168
1275
|
} else {
|
|
1169
|
-
|
|
1276
|
+
this.queue.push({
|
|
1277
|
+
type: "atom",
|
|
1278
|
+
content: "("
|
|
1279
|
+
});
|
|
1280
|
+
this.append(node);
|
|
1281
|
+
this.queue.push({
|
|
1282
|
+
type: "atom",
|
|
1283
|
+
content: ")"
|
|
1284
|
+
});
|
|
1170
1285
|
}
|
|
1286
|
+
return is_single;
|
|
1171
1287
|
}
|
|
1172
1288
|
flushQueue() {
|
|
1173
1289
|
this.queue.forEach((node) => {
|
|
1174
1290
|
let str = "";
|
|
1175
1291
|
switch (node.type) {
|
|
1176
1292
|
case "atom":
|
|
1177
|
-
str = node.content;
|
|
1178
|
-
break;
|
|
1179
1293
|
case "symbol":
|
|
1180
|
-
str =
|
|
1294
|
+
str = node.content;
|
|
1181
1295
|
break;
|
|
1182
1296
|
case "text":
|
|
1183
1297
|
str = `"${node.content}"`;
|
|
@@ -1201,23 +1315,6 @@ class TypstWriter {
|
|
|
1201
1315
|
});
|
|
1202
1316
|
this.queue = [];
|
|
1203
1317
|
}
|
|
1204
|
-
appendWithBracketsIfNeeded(node) {
|
|
1205
|
-
const is_single = ["symbol", "element", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
|
|
1206
|
-
if (is_single) {
|
|
1207
|
-
this.append(node);
|
|
1208
|
-
} else {
|
|
1209
|
-
this.queue.push({
|
|
1210
|
-
type: "atom",
|
|
1211
|
-
content: "("
|
|
1212
|
-
});
|
|
1213
|
-
this.append(node);
|
|
1214
|
-
this.queue.push({
|
|
1215
|
-
type: "atom",
|
|
1216
|
-
content: ")"
|
|
1217
|
-
});
|
|
1218
|
-
}
|
|
1219
|
-
return is_single;
|
|
1220
|
-
}
|
|
1221
1318
|
finalize() {
|
|
1222
1319
|
this.flushQueue();
|
|
1223
1320
|
const smartFloorPass = function(input) {
|
|
@@ -1239,7 +1336,7 @@ class TypstWriter {
|
|
|
1239
1336
|
// src/index.ts
|
|
1240
1337
|
function tex2typst(tex, options) {
|
|
1241
1338
|
const opt = {
|
|
1242
|
-
nonStrict:
|
|
1339
|
+
nonStrict: true,
|
|
1243
1340
|
preferTypstIntrinsic: true,
|
|
1244
1341
|
customTexMacros: {}
|
|
1245
1342
|
};
|
|
@@ -1254,9 +1351,10 @@ function tex2typst(tex, options) {
|
|
|
1254
1351
|
opt.customTexMacros = options.customTexMacros;
|
|
1255
1352
|
}
|
|
1256
1353
|
}
|
|
1257
|
-
const
|
|
1354
|
+
const texTree = parseTex(tex, opt.customTexMacros);
|
|
1355
|
+
const typstTree = convertTree(texTree);
|
|
1258
1356
|
const writer2 = new TypstWriter(opt.nonStrict, opt.preferTypstIntrinsic);
|
|
1259
|
-
writer2.append(
|
|
1357
|
+
writer2.append(typstTree);
|
|
1260
1358
|
return writer2.finalize();
|
|
1261
1359
|
}
|
|
1262
1360
|
export {
|