tex2typst 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +874 -777
- package/dist/parser.d.ts +1 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +23 -2
- package/dist/writer.d.ts +4 -3
- package/package.json +2 -2
- package/src/index.ts +5 -4
- package/src/parser.ts +91 -84
- package/src/types.ts +30 -2
- package/src/writer.ts +285 -189
package/dist/index.js
CHANGED
|
@@ -1,614 +1,3 @@
|
|
|
1
|
-
// src/parser.ts
|
|
2
|
-
function assert(condition, message = "") {
|
|
3
|
-
if (!condition) {
|
|
4
|
-
throw new LatexParserError(message);
|
|
5
|
-
}
|
|
6
|
-
}
|
|
7
|
-
function get_command_param_num(command) {
|
|
8
|
-
if (UNARY_COMMANDS.includes(command)) {
|
|
9
|
-
return 1;
|
|
10
|
-
} else if (BINARY_COMMANDS.includes(command)) {
|
|
11
|
-
return 2;
|
|
12
|
-
} else {
|
|
13
|
-
return 0;
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
function find_closing_curly_bracket(tokens, start) {
|
|
17
|
-
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
18
|
-
let count = 1;
|
|
19
|
-
let pos = start + 1;
|
|
20
|
-
while (count > 0) {
|
|
21
|
-
if (pos >= tokens.length) {
|
|
22
|
-
throw new LatexParserError("Unmatched curly brackets");
|
|
23
|
-
}
|
|
24
|
-
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
25
|
-
count += 1;
|
|
26
|
-
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
27
|
-
count -= 1;
|
|
28
|
-
}
|
|
29
|
-
pos += 1;
|
|
30
|
-
}
|
|
31
|
-
return pos - 1;
|
|
32
|
-
}
|
|
33
|
-
function find_closing_square_bracket(tokens, start) {
|
|
34
|
-
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
35
|
-
let count = 1;
|
|
36
|
-
let pos = start + 1;
|
|
37
|
-
while (count > 0) {
|
|
38
|
-
if (pos >= tokens.length) {
|
|
39
|
-
throw new LatexParserError("Unmatched square brackets");
|
|
40
|
-
}
|
|
41
|
-
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
42
|
-
count += 1;
|
|
43
|
-
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
44
|
-
count -= 1;
|
|
45
|
-
}
|
|
46
|
-
pos += 1;
|
|
47
|
-
}
|
|
48
|
-
return pos - 1;
|
|
49
|
-
}
|
|
50
|
-
function isalpha(char) {
|
|
51
|
-
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
52
|
-
}
|
|
53
|
-
function isdigit(char) {
|
|
54
|
-
return "0123456789".includes(char);
|
|
55
|
-
}
|
|
56
|
-
function eat_whitespaces(tokens, start) {
|
|
57
|
-
let pos = start;
|
|
58
|
-
while (pos < tokens.length && ["whitespace", "newline"].includes(tokens[pos].type)) {
|
|
59
|
-
pos++;
|
|
60
|
-
}
|
|
61
|
-
return tokens.slice(start, pos);
|
|
62
|
-
}
|
|
63
|
-
function eat_parenthesis(tokens, start) {
|
|
64
|
-
const firstToken = tokens[start];
|
|
65
|
-
if (firstToken.type === "element" && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
|
|
66
|
-
return firstToken;
|
|
67
|
-
} else if (firstToken.type === "command" && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
|
|
68
|
-
return firstToken;
|
|
69
|
-
} else {
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
function eat_primes(tokens, start) {
|
|
74
|
-
let pos = start;
|
|
75
|
-
while (pos < tokens.length && token_eq(tokens[pos], { type: "element", value: "'" })) {
|
|
76
|
-
pos += 1;
|
|
77
|
-
}
|
|
78
|
-
return pos - start;
|
|
79
|
-
}
|
|
80
|
-
function eat_command_name(latex, start) {
|
|
81
|
-
let pos = start;
|
|
82
|
-
while (pos < latex.length && isalpha(latex[pos])) {
|
|
83
|
-
pos += 1;
|
|
84
|
-
}
|
|
85
|
-
return latex.substring(start, pos);
|
|
86
|
-
}
|
|
87
|
-
function find_closing_right_command(tokens, start) {
|
|
88
|
-
let count = 1;
|
|
89
|
-
let pos = start;
|
|
90
|
-
while (count > 0) {
|
|
91
|
-
if (pos >= tokens.length) {
|
|
92
|
-
return -1;
|
|
93
|
-
}
|
|
94
|
-
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
95
|
-
count += 1;
|
|
96
|
-
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
97
|
-
count -= 1;
|
|
98
|
-
}
|
|
99
|
-
pos += 1;
|
|
100
|
-
}
|
|
101
|
-
return pos - 1;
|
|
102
|
-
}
|
|
103
|
-
function find_closing_end_command(tokens, start) {
|
|
104
|
-
let count = 1;
|
|
105
|
-
let pos = start;
|
|
106
|
-
while (count > 0) {
|
|
107
|
-
if (pos >= tokens.length) {
|
|
108
|
-
return -1;
|
|
109
|
-
}
|
|
110
|
-
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
111
|
-
count += 1;
|
|
112
|
-
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
113
|
-
count -= 1;
|
|
114
|
-
}
|
|
115
|
-
pos += 1;
|
|
116
|
-
}
|
|
117
|
-
return pos - 1;
|
|
118
|
-
}
|
|
119
|
-
function find_closing_curly_bracket_char(latex, start) {
|
|
120
|
-
assert(latex[start] === "{");
|
|
121
|
-
let count = 1;
|
|
122
|
-
let pos = start + 1;
|
|
123
|
-
while (count > 0) {
|
|
124
|
-
if (pos >= latex.length) {
|
|
125
|
-
throw new LatexParserError("Unmatched curly brackets");
|
|
126
|
-
}
|
|
127
|
-
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
128
|
-
pos += 2;
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
if (latex[pos] === "{") {
|
|
132
|
-
count += 1;
|
|
133
|
-
} else if (latex[pos] === "}") {
|
|
134
|
-
count -= 1;
|
|
135
|
-
}
|
|
136
|
-
pos += 1;
|
|
137
|
-
}
|
|
138
|
-
return pos - 1;
|
|
139
|
-
}
|
|
140
|
-
function tokenize(latex) {
|
|
141
|
-
const tokens = [];
|
|
142
|
-
let pos = 0;
|
|
143
|
-
while (pos < latex.length) {
|
|
144
|
-
const firstChar = latex[pos];
|
|
145
|
-
let token;
|
|
146
|
-
switch (firstChar) {
|
|
147
|
-
case "%": {
|
|
148
|
-
let newPos = pos + 1;
|
|
149
|
-
while (newPos < latex.length && latex[newPos] !== "\n") {
|
|
150
|
-
newPos += 1;
|
|
151
|
-
}
|
|
152
|
-
token = { type: "comment", value: latex.slice(pos + 1, newPos) };
|
|
153
|
-
pos = newPos;
|
|
154
|
-
break;
|
|
155
|
-
}
|
|
156
|
-
case "{":
|
|
157
|
-
case "}":
|
|
158
|
-
case "_":
|
|
159
|
-
case "^":
|
|
160
|
-
case "&":
|
|
161
|
-
token = { type: "control", value: firstChar };
|
|
162
|
-
pos++;
|
|
163
|
-
break;
|
|
164
|
-
case "\n":
|
|
165
|
-
token = { type: "newline", value: firstChar };
|
|
166
|
-
pos++;
|
|
167
|
-
break;
|
|
168
|
-
case "\r": {
|
|
169
|
-
if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
|
|
170
|
-
token = { type: "newline", value: "\n" };
|
|
171
|
-
pos += 2;
|
|
172
|
-
} else {
|
|
173
|
-
token = { type: "newline", value: "\n" };
|
|
174
|
-
pos++;
|
|
175
|
-
}
|
|
176
|
-
break;
|
|
177
|
-
}
|
|
178
|
-
case " ": {
|
|
179
|
-
let newPos = pos;
|
|
180
|
-
while (newPos < latex.length && latex[newPos] === " ") {
|
|
181
|
-
newPos += 1;
|
|
182
|
-
}
|
|
183
|
-
token = { type: "whitespace", value: latex.slice(pos, newPos) };
|
|
184
|
-
pos = newPos;
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
case "\\": {
|
|
188
|
-
if (pos + 1 >= latex.length) {
|
|
189
|
-
throw new LatexParserError("Expecting command name after \\");
|
|
190
|
-
}
|
|
191
|
-
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
192
|
-
if (["\\\\", "\\,"].includes(firstTwoChars)) {
|
|
193
|
-
token = { type: "control", value: firstTwoChars };
|
|
194
|
-
} else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
195
|
-
token = { type: "element", value: firstTwoChars };
|
|
196
|
-
} else {
|
|
197
|
-
const command = eat_command_name(latex, pos + 1);
|
|
198
|
-
token = { type: "command", value: "\\" + command };
|
|
199
|
-
}
|
|
200
|
-
pos += token.value.length;
|
|
201
|
-
break;
|
|
202
|
-
}
|
|
203
|
-
default: {
|
|
204
|
-
if (isdigit(firstChar)) {
|
|
205
|
-
let newPos = pos;
|
|
206
|
-
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
207
|
-
newPos += 1;
|
|
208
|
-
}
|
|
209
|
-
token = { type: "element", value: latex.slice(pos, newPos) };
|
|
210
|
-
} else if (isalpha(firstChar)) {
|
|
211
|
-
token = { type: "element", value: firstChar };
|
|
212
|
-
} else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
|
|
213
|
-
token = { type: "element", value: firstChar };
|
|
214
|
-
} else {
|
|
215
|
-
token = { type: "unknown", value: firstChar };
|
|
216
|
-
}
|
|
217
|
-
pos += token.value.length;
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
tokens.push(token);
|
|
221
|
-
if (token.type === "command" && ["\\text", "\\begin", "\\end"].includes(token.value)) {
|
|
222
|
-
if (pos >= latex.length || latex[pos] !== "{") {
|
|
223
|
-
throw new LatexParserError(`No content for ${token.value} command`);
|
|
224
|
-
}
|
|
225
|
-
tokens.push({ type: "control", value: "{" });
|
|
226
|
-
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
227
|
-
pos++;
|
|
228
|
-
let textInside = latex.slice(pos, posClosingBracket);
|
|
229
|
-
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
230
|
-
for (const char of chars) {
|
|
231
|
-
textInside = textInside.replaceAll("\\" + char, char);
|
|
232
|
-
}
|
|
233
|
-
tokens.push({ type: "text", value: textInside });
|
|
234
|
-
tokens.push({ type: "control", value: "}" });
|
|
235
|
-
pos = posClosingBracket + 1;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
return tokens;
|
|
239
|
-
}
|
|
240
|
-
function token_eq(token1, token2) {
|
|
241
|
-
return token1.type == token2.type && token1.value == token2.value;
|
|
242
|
-
}
|
|
243
|
-
function passIgnoreWhitespaceBeforeScriptMark(tokens) {
|
|
244
|
-
const is_script_mark = (token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
|
|
245
|
-
let out_tokens = [];
|
|
246
|
-
for (let i = 0;i < tokens.length; i++) {
|
|
247
|
-
if (tokens[i].type === "whitespace" && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
248
|
-
continue;
|
|
249
|
-
}
|
|
250
|
-
if (tokens[i].type === "whitespace" && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
251
|
-
continue;
|
|
252
|
-
}
|
|
253
|
-
out_tokens.push(tokens[i]);
|
|
254
|
-
}
|
|
255
|
-
return out_tokens;
|
|
256
|
-
}
|
|
257
|
-
function passExpandCustomTexMacros(tokens, customTexMacros) {
|
|
258
|
-
let out_tokens = [];
|
|
259
|
-
for (const token of tokens) {
|
|
260
|
-
if (token.type === "command" && customTexMacros[token.value]) {
|
|
261
|
-
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
262
|
-
out_tokens = out_tokens.concat(expanded_tokens);
|
|
263
|
-
} else {
|
|
264
|
-
out_tokens.push(token);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
return out_tokens;
|
|
268
|
-
}
|
|
269
|
-
function parseTex(tex, customTexMacros) {
|
|
270
|
-
const parser = new LatexParser;
|
|
271
|
-
let tokens = tokenize(tex);
|
|
272
|
-
tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
|
|
273
|
-
tokens = passExpandCustomTexMacros(tokens, customTexMacros);
|
|
274
|
-
return parser.parse(tokens);
|
|
275
|
-
}
|
|
276
|
-
var UNARY_COMMANDS = [
|
|
277
|
-
"sqrt",
|
|
278
|
-
"text",
|
|
279
|
-
"bar",
|
|
280
|
-
"bold",
|
|
281
|
-
"boldsymbol",
|
|
282
|
-
"ddot",
|
|
283
|
-
"dot",
|
|
284
|
-
"hat",
|
|
285
|
-
"mathbb",
|
|
286
|
-
"mathbf",
|
|
287
|
-
"mathcal",
|
|
288
|
-
"mathfrak",
|
|
289
|
-
"mathit",
|
|
290
|
-
"mathrm",
|
|
291
|
-
"mathscr",
|
|
292
|
-
"mathsf",
|
|
293
|
-
"mathtt",
|
|
294
|
-
"operatorname",
|
|
295
|
-
"overbrace",
|
|
296
|
-
"overline",
|
|
297
|
-
"pmb",
|
|
298
|
-
"rm",
|
|
299
|
-
"tilde",
|
|
300
|
-
"underbrace",
|
|
301
|
-
"underline",
|
|
302
|
-
"vec",
|
|
303
|
-
"widehat",
|
|
304
|
-
"widetilde"
|
|
305
|
-
];
|
|
306
|
-
var BINARY_COMMANDS = [
|
|
307
|
-
"frac",
|
|
308
|
-
"tfrac",
|
|
309
|
-
"binom",
|
|
310
|
-
"dbinom",
|
|
311
|
-
"dfrac",
|
|
312
|
-
"tbinom"
|
|
313
|
-
];
|
|
314
|
-
var EMPTY_NODE = { type: "empty", content: "" };
|
|
315
|
-
var LEFT_CURLY_BRACKET = { type: "control", value: "{" };
|
|
316
|
-
var RIGHT_CURLY_BRACKET = { type: "control", value: "}" };
|
|
317
|
-
var LEFT_SQUARE_BRACKET = { type: "element", value: "[" };
|
|
318
|
-
var RIGHT_SQUARE_BRACKET = { type: "element", value: "]" };
|
|
319
|
-
var LEFT_COMMAND = { type: "command", value: "\\left" };
|
|
320
|
-
var RIGHT_COMMAND = { type: "command", value: "\\right" };
|
|
321
|
-
var BEGIN_COMMAND = { type: "command", value: "\\begin" };
|
|
322
|
-
var END_COMMAND = { type: "command", value: "\\end" };
|
|
323
|
-
|
|
324
|
-
class LatexParserError extends Error {
|
|
325
|
-
constructor(message) {
|
|
326
|
-
super(message);
|
|
327
|
-
this.name = "LatexParserError";
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
var SUB_SYMBOL = { type: "control", value: "_" };
|
|
331
|
-
var SUP_SYMBOL = { type: "control", value: "^" };
|
|
332
|
-
|
|
333
|
-
class LatexParser {
|
|
334
|
-
space_sensitive;
|
|
335
|
-
newline_sensitive;
|
|
336
|
-
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
337
|
-
this.space_sensitive = space_sensitive;
|
|
338
|
-
this.newline_sensitive = newline_sensitive;
|
|
339
|
-
}
|
|
340
|
-
parse(tokens) {
|
|
341
|
-
const results = [];
|
|
342
|
-
let pos = 0;
|
|
343
|
-
while (pos < tokens.length) {
|
|
344
|
-
const results2 = [];
|
|
345
|
-
let pos2 = 0;
|
|
346
|
-
while (pos2 < tokens.length) {
|
|
347
|
-
const [res, newPos] = this.parseNextExpr(tokens, pos2);
|
|
348
|
-
pos2 = newPos;
|
|
349
|
-
if (!this.space_sensitive && res.type === "whitespace") {
|
|
350
|
-
continue;
|
|
351
|
-
}
|
|
352
|
-
if (!this.newline_sensitive && res.type === "newline") {
|
|
353
|
-
continue;
|
|
354
|
-
}
|
|
355
|
-
if (res.type === "control" && res.content === "&") {
|
|
356
|
-
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
357
|
-
}
|
|
358
|
-
results2.push(res);
|
|
359
|
-
}
|
|
360
|
-
if (results2.length === 0) {
|
|
361
|
-
return EMPTY_NODE;
|
|
362
|
-
} else if (results2.length === 1) {
|
|
363
|
-
return results2[0];
|
|
364
|
-
} else {
|
|
365
|
-
return { type: "ordgroup", content: "", args: results2 };
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
if (results.length === 0) {
|
|
369
|
-
return EMPTY_NODE;
|
|
370
|
-
} else if (results.length === 1) {
|
|
371
|
-
return results[0];
|
|
372
|
-
} else {
|
|
373
|
-
return { type: "ordgroup", content: "", args: results };
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
parseNextExpr(tokens, start) {
|
|
377
|
-
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
378
|
-
let sub = null;
|
|
379
|
-
let sup = null;
|
|
380
|
-
let num_prime = 0;
|
|
381
|
-
num_prime += eat_primes(tokens, pos);
|
|
382
|
-
pos += num_prime;
|
|
383
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
384
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
385
|
-
num_prime += eat_primes(tokens, pos);
|
|
386
|
-
pos += num_prime;
|
|
387
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
388
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
389
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
390
|
-
throw new LatexParserError("Double superscript");
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
394
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
395
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
396
|
-
throw new LatexParserError("Double superscript");
|
|
397
|
-
}
|
|
398
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
399
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
400
|
-
if (eat_primes(tokens, pos) > 0) {
|
|
401
|
-
throw new LatexParserError("Double superscript");
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
if (sub !== null || sup !== null || num_prime > 0) {
|
|
406
|
-
const res = { base };
|
|
407
|
-
if (sub) {
|
|
408
|
-
res.sub = sub;
|
|
409
|
-
}
|
|
410
|
-
if (num_prime > 0) {
|
|
411
|
-
res.sup = { type: "ordgroup", content: "", args: [] };
|
|
412
|
-
for (let i = 0;i < num_prime; i++) {
|
|
413
|
-
res.sup.args.push({ type: "symbol", content: "\\prime" });
|
|
414
|
-
}
|
|
415
|
-
if (sup) {
|
|
416
|
-
res.sup.args.push(sup);
|
|
417
|
-
}
|
|
418
|
-
if (res.sup.args.length === 1) {
|
|
419
|
-
res.sup = res.sup.args[0];
|
|
420
|
-
}
|
|
421
|
-
} else if (sup) {
|
|
422
|
-
res.sup = sup;
|
|
423
|
-
}
|
|
424
|
-
return [{ type: "supsub", content: "", data: res }, pos];
|
|
425
|
-
} else {
|
|
426
|
-
return [base, pos];
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
parseNextExprWithoutSupSub(tokens, start) {
|
|
430
|
-
const firstToken = tokens[start];
|
|
431
|
-
const tokenType = firstToken.type;
|
|
432
|
-
switch (tokenType) {
|
|
433
|
-
case "element":
|
|
434
|
-
case "text":
|
|
435
|
-
case "comment":
|
|
436
|
-
case "whitespace":
|
|
437
|
-
case "newline":
|
|
438
|
-
return [{ type: tokenType, content: firstToken.value }, start + 1];
|
|
439
|
-
case "command":
|
|
440
|
-
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
441
|
-
return this.parseBeginEndExpr(tokens, start);
|
|
442
|
-
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
443
|
-
return this.parseLeftRightExpr(tokens, start);
|
|
444
|
-
} else {
|
|
445
|
-
return this.parseCommandExpr(tokens, start);
|
|
446
|
-
}
|
|
447
|
-
case "control":
|
|
448
|
-
const controlChar = firstToken.value;
|
|
449
|
-
switch (controlChar) {
|
|
450
|
-
case "{":
|
|
451
|
-
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
452
|
-
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
453
|
-
return [this.parse(exprInside), posClosingBracket + 1];
|
|
454
|
-
case "}":
|
|
455
|
-
throw new LatexParserError("Unmatched '}'");
|
|
456
|
-
case "\\\\":
|
|
457
|
-
return [{ type: "control", content: "\\\\" }, start + 1];
|
|
458
|
-
case "\\,":
|
|
459
|
-
return [{ type: "control", content: "\\," }, start + 1];
|
|
460
|
-
case "_": {
|
|
461
|
-
return [EMPTY_NODE, start];
|
|
462
|
-
}
|
|
463
|
-
case "^": {
|
|
464
|
-
return [EMPTY_NODE, start];
|
|
465
|
-
}
|
|
466
|
-
case "&":
|
|
467
|
-
return [{ type: "control", content: "&" }, start + 1];
|
|
468
|
-
default:
|
|
469
|
-
throw new LatexParserError("Unknown control sequence");
|
|
470
|
-
}
|
|
471
|
-
default:
|
|
472
|
-
throw new LatexParserError("Unknown token type");
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
parseCommandExpr(tokens, start) {
|
|
476
|
-
assert(tokens[start].type === "command");
|
|
477
|
-
const command = tokens[start].value;
|
|
478
|
-
let pos = start + 1;
|
|
479
|
-
if (["left", "right", "begin", "end"].includes(command.slice(1))) {
|
|
480
|
-
throw new LatexParserError("Unexpected command: " + command);
|
|
481
|
-
}
|
|
482
|
-
const paramNum = get_command_param_num(command.slice(1));
|
|
483
|
-
if (paramNum === 0) {
|
|
484
|
-
return [{ type: "symbol", content: command }, pos];
|
|
485
|
-
} else if (paramNum === 1) {
|
|
486
|
-
if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
487
|
-
const posLeftSquareBracket = pos;
|
|
488
|
-
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
489
|
-
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
490
|
-
const exponent = this.parse(exprInside);
|
|
491
|
-
const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
492
|
-
return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
|
|
493
|
-
} else if (command === "\\text") {
|
|
494
|
-
if (pos + 2 >= tokens.length) {
|
|
495
|
-
throw new LatexParserError("Expecting content for \\text command");
|
|
496
|
-
}
|
|
497
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
498
|
-
assert(tokens[pos + 1].type === "text");
|
|
499
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
500
|
-
const text = tokens[pos + 1].value;
|
|
501
|
-
return [{ type: "text", content: text }, pos + 3];
|
|
502
|
-
}
|
|
503
|
-
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
504
|
-
return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
|
|
505
|
-
} else if (paramNum === 2) {
|
|
506
|
-
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
507
|
-
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
508
|
-
return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
|
|
509
|
-
} else {
|
|
510
|
-
throw new Error("Invalid number of parameters");
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
parseLeftRightExpr(tokens, start) {
|
|
514
|
-
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
515
|
-
let pos = start + 1;
|
|
516
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
517
|
-
if (pos >= tokens.length) {
|
|
518
|
-
throw new LatexParserError("Expecting delimiter after \\left");
|
|
519
|
-
}
|
|
520
|
-
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
521
|
-
if (leftDelimiter === null) {
|
|
522
|
-
throw new LatexParserError("Invalid delimiter after \\left");
|
|
523
|
-
}
|
|
524
|
-
pos++;
|
|
525
|
-
const exprInsideStart = pos;
|
|
526
|
-
const idx = find_closing_right_command(tokens, pos);
|
|
527
|
-
if (idx === -1) {
|
|
528
|
-
throw new LatexParserError("No matching \\right");
|
|
529
|
-
}
|
|
530
|
-
const exprInsideEnd = idx;
|
|
531
|
-
pos = idx + 1;
|
|
532
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
533
|
-
if (pos >= tokens.length) {
|
|
534
|
-
throw new LatexParserError("Expecting \\right after \\left");
|
|
535
|
-
}
|
|
536
|
-
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
537
|
-
if (rightDelimiter === null) {
|
|
538
|
-
throw new LatexParserError("Invalid delimiter after \\right");
|
|
539
|
-
}
|
|
540
|
-
pos++;
|
|
541
|
-
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
542
|
-
const body = this.parse(exprInside);
|
|
543
|
-
const args = [
|
|
544
|
-
{ type: "element", content: leftDelimiter.value },
|
|
545
|
-
body,
|
|
546
|
-
{ type: "element", content: rightDelimiter.value }
|
|
547
|
-
];
|
|
548
|
-
const res = { type: "leftright", content: "", args };
|
|
549
|
-
return [res, pos];
|
|
550
|
-
}
|
|
551
|
-
parseBeginEndExpr(tokens, start) {
|
|
552
|
-
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
553
|
-
let pos = start + 1;
|
|
554
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
555
|
-
assert(tokens[pos + 1].type === "text");
|
|
556
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
557
|
-
const envName = tokens[pos + 1].value;
|
|
558
|
-
pos += 3;
|
|
559
|
-
pos += eat_whitespaces(tokens, pos).length;
|
|
560
|
-
const exprInsideStart = pos;
|
|
561
|
-
const endIdx = find_closing_end_command(tokens, pos);
|
|
562
|
-
if (endIdx === -1) {
|
|
563
|
-
throw new LatexParserError("No matching \\end");
|
|
564
|
-
}
|
|
565
|
-
const exprInsideEnd = endIdx;
|
|
566
|
-
pos = endIdx + 1;
|
|
567
|
-
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
568
|
-
assert(tokens[pos + 1].type === "text");
|
|
569
|
-
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
570
|
-
if (tokens[pos + 1].value !== envName) {
|
|
571
|
-
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
572
|
-
}
|
|
573
|
-
pos += 3;
|
|
574
|
-
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
575
|
-
while (exprInside.length > 0 && ["whitespace", "newline"].includes(exprInside[exprInside.length - 1].type)) {
|
|
576
|
-
exprInside.pop();
|
|
577
|
-
}
|
|
578
|
-
const body = this.parseAligned(exprInside);
|
|
579
|
-
const res = { type: "beginend", content: envName, data: body };
|
|
580
|
-
return [res, pos];
|
|
581
|
-
}
|
|
582
|
-
parseAligned(tokens) {
|
|
583
|
-
let pos = 0;
|
|
584
|
-
const allRows = [];
|
|
585
|
-
let row = [];
|
|
586
|
-
allRows.push(row);
|
|
587
|
-
let group = { type: "ordgroup", content: "", args: [] };
|
|
588
|
-
row.push(group);
|
|
589
|
-
while (pos < tokens.length) {
|
|
590
|
-
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
591
|
-
pos = newPos;
|
|
592
|
-
if (res.type === "whitespace") {
|
|
593
|
-
continue;
|
|
594
|
-
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
595
|
-
continue;
|
|
596
|
-
} else if (res.type === "control" && res.content === "\\\\") {
|
|
597
|
-
row = [];
|
|
598
|
-
group = { type: "ordgroup", content: "", args: [] };
|
|
599
|
-
row.push(group);
|
|
600
|
-
allRows.push(row);
|
|
601
|
-
} else if (res.type === "control" && res.content === "&") {
|
|
602
|
-
group = { type: "ordgroup", content: "", args: [] };
|
|
603
|
-
row.push(group);
|
|
604
|
-
} else {
|
|
605
|
-
group.args.push(res);
|
|
606
|
-
}
|
|
607
|
-
}
|
|
608
|
-
return allRows;
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
1
|
// src/map.ts
|
|
613
2
|
var symbolMap = new Map([
|
|
614
3
|
["nonumber", ""],
|
|
@@ -909,7 +298,792 @@ var symbolMap = new Map([
|
|
|
909
298
|
["TeX", "#TeX"]
|
|
910
299
|
]);
|
|
911
300
|
|
|
301
|
+
// src/parser.ts
|
|
302
|
+
function assert(condition, message = "") {
|
|
303
|
+
if (!condition) {
|
|
304
|
+
throw new LatexParserError(message);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
function get_command_param_num(command) {
|
|
308
|
+
if (UNARY_COMMANDS.includes(command)) {
|
|
309
|
+
return 1;
|
|
310
|
+
} else if (BINARY_COMMANDS.includes(command)) {
|
|
311
|
+
return 2;
|
|
312
|
+
} else {
|
|
313
|
+
return 0;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
function find_closing_curly_bracket(tokens, start) {
|
|
317
|
+
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
318
|
+
let count = 1;
|
|
319
|
+
let pos = start + 1;
|
|
320
|
+
while (count > 0) {
|
|
321
|
+
if (pos >= tokens.length) {
|
|
322
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
323
|
+
}
|
|
324
|
+
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
325
|
+
count += 1;
|
|
326
|
+
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
327
|
+
count -= 1;
|
|
328
|
+
}
|
|
329
|
+
pos += 1;
|
|
330
|
+
}
|
|
331
|
+
return pos - 1;
|
|
332
|
+
}
|
|
333
|
+
function find_closing_square_bracket(tokens, start) {
|
|
334
|
+
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
335
|
+
let count = 1;
|
|
336
|
+
let pos = start + 1;
|
|
337
|
+
while (count > 0) {
|
|
338
|
+
if (pos >= tokens.length) {
|
|
339
|
+
throw new LatexParserError("Unmatched square brackets");
|
|
340
|
+
}
|
|
341
|
+
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
342
|
+
count += 1;
|
|
343
|
+
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
344
|
+
count -= 1;
|
|
345
|
+
}
|
|
346
|
+
pos += 1;
|
|
347
|
+
}
|
|
348
|
+
return pos - 1;
|
|
349
|
+
}
|
|
350
|
+
function isalpha(char) {
|
|
351
|
+
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
352
|
+
}
|
|
353
|
+
function isdigit(char) {
|
|
354
|
+
return "0123456789".includes(char);
|
|
355
|
+
}
|
|
356
|
+
function eat_whitespaces(tokens, start) {
|
|
357
|
+
let pos = start;
|
|
358
|
+
while (pos < tokens.length && [4 /* WHITESPACE */, 5 /* NEWLINE */].includes(tokens[pos].type)) {
|
|
359
|
+
pos++;
|
|
360
|
+
}
|
|
361
|
+
return tokens.slice(start, pos);
|
|
362
|
+
}
|
|
363
|
+
function eat_parenthesis(tokens, start) {
|
|
364
|
+
const firstToken = tokens[start];
|
|
365
|
+
if (firstToken.type === 0 /* ELEMENT */ && ["(", ")", "[", "]", "|", "\\{", "\\}"].includes(firstToken.value)) {
|
|
366
|
+
return firstToken;
|
|
367
|
+
} else if (firstToken.type === 1 /* COMMAND */ && ["lfloor", "rfloor", "lceil", "rceil", "langle", "rangle"].includes(firstToken.value.slice(1))) {
|
|
368
|
+
return firstToken;
|
|
369
|
+
} else {
|
|
370
|
+
return null;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
function eat_primes(tokens, start) {
|
|
374
|
+
let pos = start;
|
|
375
|
+
while (pos < tokens.length && token_eq(tokens[pos], { type: 0 /* ELEMENT */, value: "'" })) {
|
|
376
|
+
pos += 1;
|
|
377
|
+
}
|
|
378
|
+
return pos - start;
|
|
379
|
+
}
|
|
380
|
+
function eat_command_name(latex, start) {
|
|
381
|
+
let pos = start;
|
|
382
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
383
|
+
pos += 1;
|
|
384
|
+
}
|
|
385
|
+
return latex.substring(start, pos);
|
|
386
|
+
}
|
|
387
|
+
function find_closing_right_command(tokens, start) {
|
|
388
|
+
let count = 1;
|
|
389
|
+
let pos = start;
|
|
390
|
+
while (count > 0) {
|
|
391
|
+
if (pos >= tokens.length) {
|
|
392
|
+
return -1;
|
|
393
|
+
}
|
|
394
|
+
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
395
|
+
count += 1;
|
|
396
|
+
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
397
|
+
count -= 1;
|
|
398
|
+
}
|
|
399
|
+
pos += 1;
|
|
400
|
+
}
|
|
401
|
+
return pos - 1;
|
|
402
|
+
}
|
|
403
|
+
function find_closing_end_command(tokens, start) {
|
|
404
|
+
let count = 1;
|
|
405
|
+
let pos = start;
|
|
406
|
+
while (count > 0) {
|
|
407
|
+
if (pos >= tokens.length) {
|
|
408
|
+
return -1;
|
|
409
|
+
}
|
|
410
|
+
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
411
|
+
count += 1;
|
|
412
|
+
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
413
|
+
count -= 1;
|
|
414
|
+
}
|
|
415
|
+
pos += 1;
|
|
416
|
+
}
|
|
417
|
+
return pos - 1;
|
|
418
|
+
}
|
|
419
|
+
function find_closing_curly_bracket_char(latex, start) {
|
|
420
|
+
assert(latex[start] === "{");
|
|
421
|
+
let count = 1;
|
|
422
|
+
let pos = start + 1;
|
|
423
|
+
while (count > 0) {
|
|
424
|
+
if (pos >= latex.length) {
|
|
425
|
+
throw new LatexParserError("Unmatched curly brackets");
|
|
426
|
+
}
|
|
427
|
+
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
428
|
+
pos += 2;
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
if (latex[pos] === "{") {
|
|
432
|
+
count += 1;
|
|
433
|
+
} else if (latex[pos] === "}") {
|
|
434
|
+
count -= 1;
|
|
435
|
+
}
|
|
436
|
+
pos += 1;
|
|
437
|
+
}
|
|
438
|
+
return pos - 1;
|
|
439
|
+
}
|
|
440
|
+
function tokenize(latex) {
|
|
441
|
+
const tokens = [];
|
|
442
|
+
let pos = 0;
|
|
443
|
+
while (pos < latex.length) {
|
|
444
|
+
const firstChar = latex[pos];
|
|
445
|
+
let token;
|
|
446
|
+
switch (firstChar) {
|
|
447
|
+
case "%": {
|
|
448
|
+
let newPos = pos + 1;
|
|
449
|
+
while (newPos < latex.length && latex[newPos] !== "\n") {
|
|
450
|
+
newPos += 1;
|
|
451
|
+
}
|
|
452
|
+
token = { type: 3 /* COMMENT */, value: latex.slice(pos + 1, newPos) };
|
|
453
|
+
pos = newPos;
|
|
454
|
+
break;
|
|
455
|
+
}
|
|
456
|
+
case "{":
|
|
457
|
+
case "}":
|
|
458
|
+
case "_":
|
|
459
|
+
case "^":
|
|
460
|
+
case "&":
|
|
461
|
+
token = { type: 6 /* CONTROL */, value: firstChar };
|
|
462
|
+
pos++;
|
|
463
|
+
break;
|
|
464
|
+
case "\n":
|
|
465
|
+
token = { type: 5 /* NEWLINE */, value: firstChar };
|
|
466
|
+
pos++;
|
|
467
|
+
break;
|
|
468
|
+
case "\r": {
|
|
469
|
+
if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
|
|
470
|
+
token = { type: 5 /* NEWLINE */, value: "\n" };
|
|
471
|
+
pos += 2;
|
|
472
|
+
} else {
|
|
473
|
+
token = { type: 5 /* NEWLINE */, value: "\n" };
|
|
474
|
+
pos++;
|
|
475
|
+
}
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
case " ": {
|
|
479
|
+
let newPos = pos;
|
|
480
|
+
while (newPos < latex.length && latex[newPos] === " ") {
|
|
481
|
+
newPos += 1;
|
|
482
|
+
}
|
|
483
|
+
token = { type: 4 /* WHITESPACE */, value: latex.slice(pos, newPos) };
|
|
484
|
+
pos = newPos;
|
|
485
|
+
break;
|
|
486
|
+
}
|
|
487
|
+
case "\\": {
|
|
488
|
+
if (pos + 1 >= latex.length) {
|
|
489
|
+
throw new LatexParserError("Expecting command name after \\");
|
|
490
|
+
}
|
|
491
|
+
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
492
|
+
if (["\\\\", "\\,"].includes(firstTwoChars)) {
|
|
493
|
+
token = { type: 6 /* CONTROL */, value: firstTwoChars };
|
|
494
|
+
} else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
495
|
+
token = { type: 0 /* ELEMENT */, value: firstTwoChars };
|
|
496
|
+
} else {
|
|
497
|
+
const command = eat_command_name(latex, pos + 1);
|
|
498
|
+
token = { type: 1 /* COMMAND */, value: "\\" + command };
|
|
499
|
+
}
|
|
500
|
+
pos += token.value.length;
|
|
501
|
+
break;
|
|
502
|
+
}
|
|
503
|
+
default: {
|
|
504
|
+
if (isdigit(firstChar)) {
|
|
505
|
+
let newPos = pos;
|
|
506
|
+
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
507
|
+
newPos += 1;
|
|
508
|
+
}
|
|
509
|
+
token = { type: 0 /* ELEMENT */, value: latex.slice(pos, newPos) };
|
|
510
|
+
} else if (isalpha(firstChar)) {
|
|
511
|
+
token = { type: 0 /* ELEMENT */, value: firstChar };
|
|
512
|
+
} else if ("+-*/=\'<>!.,;?()[]|".includes(firstChar)) {
|
|
513
|
+
token = { type: 0 /* ELEMENT */, value: firstChar };
|
|
514
|
+
} else {
|
|
515
|
+
token = { type: 7 /* UNKNOWN */, value: firstChar };
|
|
516
|
+
}
|
|
517
|
+
pos += token.value.length;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
tokens.push(token);
|
|
521
|
+
if (token.type === 1 /* COMMAND */ && ["\\text", "\\operatorname", "\\begin", "\\end"].includes(token.value)) {
|
|
522
|
+
if (pos >= latex.length || latex[pos] !== "{") {
|
|
523
|
+
throw new LatexParserError(`No content for ${token.value} command`);
|
|
524
|
+
}
|
|
525
|
+
tokens.push({ type: 6 /* CONTROL */, value: "{" });
|
|
526
|
+
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
527
|
+
pos++;
|
|
528
|
+
let textInside = latex.slice(pos, posClosingBracket);
|
|
529
|
+
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
530
|
+
for (const char of chars) {
|
|
531
|
+
textInside = textInside.replaceAll("\\" + char, char);
|
|
532
|
+
}
|
|
533
|
+
tokens.push({ type: 2 /* TEXT */, value: textInside });
|
|
534
|
+
tokens.push({ type: 6 /* CONTROL */, value: "}" });
|
|
535
|
+
pos = posClosingBracket + 1;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
return tokens;
|
|
539
|
+
}
|
|
540
|
+
function token_eq(token1, token2) {
|
|
541
|
+
return token1.type == token2.type && token1.value == token2.value;
|
|
542
|
+
}
|
|
543
|
+
function passIgnoreWhitespaceBeforeScriptMark(tokens) {
|
|
544
|
+
const is_script_mark = (token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
|
|
545
|
+
let out_tokens = [];
|
|
546
|
+
for (let i = 0;i < tokens.length; i++) {
|
|
547
|
+
if (tokens[i].type === 4 /* WHITESPACE */ && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
548
|
+
continue;
|
|
549
|
+
}
|
|
550
|
+
if (tokens[i].type === 4 /* WHITESPACE */ && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
out_tokens.push(tokens[i]);
|
|
554
|
+
}
|
|
555
|
+
return out_tokens;
|
|
556
|
+
}
|
|
557
|
+
function passExpandCustomTexMacros(tokens, customTexMacros) {
|
|
558
|
+
let out_tokens = [];
|
|
559
|
+
for (const token of tokens) {
|
|
560
|
+
if (token.type === 1 /* COMMAND */ && customTexMacros[token.value]) {
|
|
561
|
+
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
562
|
+
out_tokens = out_tokens.concat(expanded_tokens);
|
|
563
|
+
} else {
|
|
564
|
+
out_tokens.push(token);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
return out_tokens;
|
|
568
|
+
}
|
|
569
|
+
function parseTex(tex, customTexMacros) {
|
|
570
|
+
const parser = new LatexParser;
|
|
571
|
+
let tokens = tokenize(tex);
|
|
572
|
+
tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
|
|
573
|
+
tokens = passExpandCustomTexMacros(tokens, customTexMacros);
|
|
574
|
+
return parser.parse(tokens);
|
|
575
|
+
}
|
|
576
|
+
var UNARY_COMMANDS = [
|
|
577
|
+
"sqrt",
|
|
578
|
+
"text",
|
|
579
|
+
"bar",
|
|
580
|
+
"bold",
|
|
581
|
+
"boldsymbol",
|
|
582
|
+
"ddot",
|
|
583
|
+
"dot",
|
|
584
|
+
"hat",
|
|
585
|
+
"mathbb",
|
|
586
|
+
"mathbf",
|
|
587
|
+
"mathcal",
|
|
588
|
+
"mathfrak",
|
|
589
|
+
"mathit",
|
|
590
|
+
"mathrm",
|
|
591
|
+
"mathscr",
|
|
592
|
+
"mathsf",
|
|
593
|
+
"mathtt",
|
|
594
|
+
"operatorname",
|
|
595
|
+
"overbrace",
|
|
596
|
+
"overline",
|
|
597
|
+
"pmb",
|
|
598
|
+
"rm",
|
|
599
|
+
"tilde",
|
|
600
|
+
"underbrace",
|
|
601
|
+
"underline",
|
|
602
|
+
"vec",
|
|
603
|
+
"widehat",
|
|
604
|
+
"widetilde"
|
|
605
|
+
];
|
|
606
|
+
var BINARY_COMMANDS = [
|
|
607
|
+
"frac",
|
|
608
|
+
"tfrac",
|
|
609
|
+
"binom",
|
|
610
|
+
"dbinom",
|
|
611
|
+
"dfrac",
|
|
612
|
+
"tbinom"
|
|
613
|
+
];
|
|
614
|
+
var EMPTY_NODE = { type: "empty", content: "" };
|
|
615
|
+
var LEFT_CURLY_BRACKET = { type: 6 /* CONTROL */, value: "{" };
|
|
616
|
+
var RIGHT_CURLY_BRACKET = { type: 6 /* CONTROL */, value: "}" };
|
|
617
|
+
var LEFT_SQUARE_BRACKET = { type: 0 /* ELEMENT */, value: "[" };
|
|
618
|
+
var RIGHT_SQUARE_BRACKET = { type: 0 /* ELEMENT */, value: "]" };
|
|
619
|
+
var LEFT_COMMAND = { type: 1 /* COMMAND */, value: "\\left" };
|
|
620
|
+
var RIGHT_COMMAND = { type: 1 /* COMMAND */, value: "\\right" };
|
|
621
|
+
var BEGIN_COMMAND = { type: 1 /* COMMAND */, value: "\\begin" };
|
|
622
|
+
var END_COMMAND = { type: 1 /* COMMAND */, value: "\\end" };
|
|
623
|
+
|
|
624
|
+
class LatexParserError extends Error {
|
|
625
|
+
constructor(message) {
|
|
626
|
+
super(message);
|
|
627
|
+
this.name = "LatexParserError";
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
var SUB_SYMBOL = { type: 6 /* CONTROL */, value: "_" };
|
|
631
|
+
var SUP_SYMBOL = { type: 6 /* CONTROL */, value: "^" };
|
|
632
|
+
|
|
633
|
+
class LatexParser {
|
|
634
|
+
space_sensitive;
|
|
635
|
+
newline_sensitive;
|
|
636
|
+
constructor(space_sensitive = false, newline_sensitive = true) {
|
|
637
|
+
this.space_sensitive = space_sensitive;
|
|
638
|
+
this.newline_sensitive = newline_sensitive;
|
|
639
|
+
}
|
|
640
|
+
parse(tokens) {
|
|
641
|
+
const results = [];
|
|
642
|
+
let pos = 0;
|
|
643
|
+
while (pos < tokens.length) {
|
|
644
|
+
const results2 = [];
|
|
645
|
+
let pos2 = 0;
|
|
646
|
+
while (pos2 < tokens.length) {
|
|
647
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos2);
|
|
648
|
+
pos2 = newPos;
|
|
649
|
+
if (!this.space_sensitive && res.type === "whitespace") {
|
|
650
|
+
continue;
|
|
651
|
+
}
|
|
652
|
+
if (!this.newline_sensitive && res.type === "newline") {
|
|
653
|
+
continue;
|
|
654
|
+
}
|
|
655
|
+
if (res.type === "control" && res.content === "&") {
|
|
656
|
+
throw new LatexParserError("Unexpected & outside of an alignment");
|
|
657
|
+
}
|
|
658
|
+
results2.push(res);
|
|
659
|
+
}
|
|
660
|
+
if (results2.length === 0) {
|
|
661
|
+
return EMPTY_NODE;
|
|
662
|
+
} else if (results2.length === 1) {
|
|
663
|
+
return results2[0];
|
|
664
|
+
} else {
|
|
665
|
+
return { type: "ordgroup", content: "", args: results2 };
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
if (results.length === 0) {
|
|
669
|
+
return EMPTY_NODE;
|
|
670
|
+
} else if (results.length === 1) {
|
|
671
|
+
return results[0];
|
|
672
|
+
} else {
|
|
673
|
+
return { type: "ordgroup", content: "", args: results };
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
parseNextExpr(tokens, start) {
|
|
677
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
678
|
+
let sub = null;
|
|
679
|
+
let sup = null;
|
|
680
|
+
let num_prime = 0;
|
|
681
|
+
num_prime += eat_primes(tokens, pos);
|
|
682
|
+
pos += num_prime;
|
|
683
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
684
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
685
|
+
num_prime += eat_primes(tokens, pos);
|
|
686
|
+
pos += num_prime;
|
|
687
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
688
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
689
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
690
|
+
throw new LatexParserError("Double superscript");
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
694
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
695
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
696
|
+
throw new LatexParserError("Double superscript");
|
|
697
|
+
}
|
|
698
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
699
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
700
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
701
|
+
throw new LatexParserError("Double superscript");
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
if (sub !== null || sup !== null || num_prime > 0) {
|
|
706
|
+
const res = { base };
|
|
707
|
+
if (sub) {
|
|
708
|
+
res.sub = sub;
|
|
709
|
+
}
|
|
710
|
+
if (num_prime > 0) {
|
|
711
|
+
res.sup = { type: "ordgroup", content: "", args: [] };
|
|
712
|
+
for (let i = 0;i < num_prime; i++) {
|
|
713
|
+
res.sup.args.push({ type: "element", content: "'" });
|
|
714
|
+
}
|
|
715
|
+
if (sup) {
|
|
716
|
+
res.sup.args.push(sup);
|
|
717
|
+
}
|
|
718
|
+
if (res.sup.args.length === 1) {
|
|
719
|
+
res.sup = res.sup.args[0];
|
|
720
|
+
}
|
|
721
|
+
} else if (sup) {
|
|
722
|
+
res.sup = sup;
|
|
723
|
+
}
|
|
724
|
+
return [{ type: "supsub", content: "", data: res }, pos];
|
|
725
|
+
} else {
|
|
726
|
+
return [base, pos];
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
parseNextExprWithoutSupSub(tokens, start) {
|
|
730
|
+
const firstToken = tokens[start];
|
|
731
|
+
const tokenType = firstToken.type;
|
|
732
|
+
switch (tokenType) {
|
|
733
|
+
case 0 /* ELEMENT */:
|
|
734
|
+
return [{ type: "element", content: firstToken.value }, start + 1];
|
|
735
|
+
case 2 /* TEXT */:
|
|
736
|
+
return [{ type: "text", content: firstToken.value }, start + 1];
|
|
737
|
+
case 3 /* COMMENT */:
|
|
738
|
+
return [{ type: "comment", content: firstToken.value }, start + 1];
|
|
739
|
+
case 4 /* WHITESPACE */:
|
|
740
|
+
return [{ type: "whitespace", content: firstToken.value }, start + 1];
|
|
741
|
+
case 5 /* NEWLINE */:
|
|
742
|
+
return [{ type: "newline", content: firstToken.value }, start + 1];
|
|
743
|
+
case 1 /* COMMAND */:
|
|
744
|
+
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
745
|
+
return this.parseBeginEndExpr(tokens, start);
|
|
746
|
+
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
747
|
+
return this.parseLeftRightExpr(tokens, start);
|
|
748
|
+
} else {
|
|
749
|
+
return this.parseCommandExpr(tokens, start);
|
|
750
|
+
}
|
|
751
|
+
case 6 /* CONTROL */:
|
|
752
|
+
const controlChar = firstToken.value;
|
|
753
|
+
switch (controlChar) {
|
|
754
|
+
case "{":
|
|
755
|
+
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
756
|
+
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
757
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
758
|
+
case "}":
|
|
759
|
+
throw new LatexParserError("Unmatched '}'");
|
|
760
|
+
case "\\\\":
|
|
761
|
+
return [{ type: "control", content: "\\\\" }, start + 1];
|
|
762
|
+
case "\\,":
|
|
763
|
+
return [{ type: "control", content: "\\," }, start + 1];
|
|
764
|
+
case "_": {
|
|
765
|
+
return [EMPTY_NODE, start];
|
|
766
|
+
}
|
|
767
|
+
case "^": {
|
|
768
|
+
return [EMPTY_NODE, start];
|
|
769
|
+
}
|
|
770
|
+
case "&":
|
|
771
|
+
return [{ type: "control", content: "&" }, start + 1];
|
|
772
|
+
default:
|
|
773
|
+
throw new LatexParserError("Unknown control sequence");
|
|
774
|
+
}
|
|
775
|
+
default:
|
|
776
|
+
throw new LatexParserError("Unknown token type");
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
parseCommandExpr(tokens, start) {
|
|
780
|
+
assert(tokens[start].type === 1 /* COMMAND */);
|
|
781
|
+
const command = tokens[start].value;
|
|
782
|
+
let pos = start + 1;
|
|
783
|
+
if (["left", "right", "begin", "end"].includes(command.slice(1))) {
|
|
784
|
+
throw new LatexParserError("Unexpected command: " + command);
|
|
785
|
+
}
|
|
786
|
+
const paramNum = get_command_param_num(command.slice(1));
|
|
787
|
+
switch (paramNum) {
|
|
788
|
+
case 0:
|
|
789
|
+
if (!symbolMap.has(command.slice(1))) {
|
|
790
|
+
return [{ type: "unknownMacro", content: command }, pos];
|
|
791
|
+
}
|
|
792
|
+
return [{ type: "symbol", content: command }, pos];
|
|
793
|
+
case 1: {
|
|
794
|
+
if (command === "\\sqrt" && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
795
|
+
const posLeftSquareBracket = pos;
|
|
796
|
+
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
797
|
+
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
798
|
+
const exponent = this.parse(exprInside);
|
|
799
|
+
const [arg12, newPos2] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
800
|
+
return [{ type: "unaryFunc", content: command, args: [arg12], data: exponent }, newPos2];
|
|
801
|
+
} else if (command === "\\text") {
|
|
802
|
+
if (pos + 2 >= tokens.length) {
|
|
803
|
+
throw new LatexParserError("Expecting content for \\text command");
|
|
804
|
+
}
|
|
805
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
806
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
807
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
808
|
+
const text = tokens[pos + 1].value;
|
|
809
|
+
return [{ type: "text", content: text }, pos + 3];
|
|
810
|
+
}
|
|
811
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
812
|
+
return [{ type: "unaryFunc", content: command, args: [arg1] }, newPos];
|
|
813
|
+
}
|
|
814
|
+
case 2: {
|
|
815
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
816
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
817
|
+
return [{ type: "binaryFunc", content: command, args: [arg1, arg2] }, pos2];
|
|
818
|
+
}
|
|
819
|
+
default:
|
|
820
|
+
throw new Error("Invalid number of parameters");
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
parseLeftRightExpr(tokens, start) {
|
|
824
|
+
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
825
|
+
let pos = start + 1;
|
|
826
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
827
|
+
if (pos >= tokens.length) {
|
|
828
|
+
throw new LatexParserError("Expecting delimiter after \\left");
|
|
829
|
+
}
|
|
830
|
+
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
831
|
+
if (leftDelimiter === null) {
|
|
832
|
+
throw new LatexParserError("Invalid delimiter after \\left");
|
|
833
|
+
}
|
|
834
|
+
pos++;
|
|
835
|
+
const exprInsideStart = pos;
|
|
836
|
+
const idx = find_closing_right_command(tokens, pos);
|
|
837
|
+
if (idx === -1) {
|
|
838
|
+
throw new LatexParserError("No matching \\right");
|
|
839
|
+
}
|
|
840
|
+
const exprInsideEnd = idx;
|
|
841
|
+
pos = idx + 1;
|
|
842
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
843
|
+
if (pos >= tokens.length) {
|
|
844
|
+
throw new LatexParserError("Expecting \\right after \\left");
|
|
845
|
+
}
|
|
846
|
+
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
847
|
+
if (rightDelimiter === null) {
|
|
848
|
+
throw new LatexParserError("Invalid delimiter after \\right");
|
|
849
|
+
}
|
|
850
|
+
pos++;
|
|
851
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
852
|
+
const body = this.parse(exprInside);
|
|
853
|
+
const args = [
|
|
854
|
+
{ type: "element", content: leftDelimiter.value },
|
|
855
|
+
body,
|
|
856
|
+
{ type: "element", content: rightDelimiter.value }
|
|
857
|
+
];
|
|
858
|
+
const res = { type: "leftright", content: "", args };
|
|
859
|
+
return [res, pos];
|
|
860
|
+
}
|
|
861
|
+
parseBeginEndExpr(tokens, start) {
|
|
862
|
+
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
863
|
+
let pos = start + 1;
|
|
864
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
865
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
866
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
867
|
+
const envName = tokens[pos + 1].value;
|
|
868
|
+
pos += 3;
|
|
869
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
870
|
+
const exprInsideStart = pos;
|
|
871
|
+
const endIdx = find_closing_end_command(tokens, pos);
|
|
872
|
+
if (endIdx === -1) {
|
|
873
|
+
throw new LatexParserError("No matching \\end");
|
|
874
|
+
}
|
|
875
|
+
const exprInsideEnd = endIdx;
|
|
876
|
+
pos = endIdx + 1;
|
|
877
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
878
|
+
assert(tokens[pos + 1].type === 2 /* TEXT */);
|
|
879
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
880
|
+
if (tokens[pos + 1].value !== envName) {
|
|
881
|
+
throw new LatexParserError("Mismatched \\begin and \\end environments");
|
|
882
|
+
}
|
|
883
|
+
pos += 3;
|
|
884
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
885
|
+
while (exprInside.length > 0 && [4 /* WHITESPACE */, 5 /* NEWLINE */].includes(exprInside[exprInside.length - 1].type)) {
|
|
886
|
+
exprInside.pop();
|
|
887
|
+
}
|
|
888
|
+
const body = this.parseAligned(exprInside);
|
|
889
|
+
const res = { type: "beginend", content: envName, data: body };
|
|
890
|
+
return [res, pos];
|
|
891
|
+
}
|
|
892
|
+
parseAligned(tokens) {
|
|
893
|
+
let pos = 0;
|
|
894
|
+
const allRows = [];
|
|
895
|
+
let row = [];
|
|
896
|
+
allRows.push(row);
|
|
897
|
+
let group = { type: "ordgroup", content: "", args: [] };
|
|
898
|
+
row.push(group);
|
|
899
|
+
while (pos < tokens.length) {
|
|
900
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
901
|
+
pos = newPos;
|
|
902
|
+
if (res.type === "whitespace") {
|
|
903
|
+
continue;
|
|
904
|
+
} else if (res.type === "newline" && !this.newline_sensitive) {
|
|
905
|
+
continue;
|
|
906
|
+
} else if (res.type === "control" && res.content === "\\\\") {
|
|
907
|
+
row = [];
|
|
908
|
+
group = { type: "ordgroup", content: "", args: [] };
|
|
909
|
+
row.push(group);
|
|
910
|
+
allRows.push(row);
|
|
911
|
+
} else if (res.type === "control" && res.content === "&") {
|
|
912
|
+
group = { type: "ordgroup", content: "", args: [] };
|
|
913
|
+
row.push(group);
|
|
914
|
+
} else {
|
|
915
|
+
group.args.push(res);
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
return allRows;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
912
922
|
// src/writer.ts
|
|
923
|
+
function is_delimiter(c) {
|
|
924
|
+
return c.type === "atom" && ["(", ")", "[", "]", "{", "}", "|", "\u230A", "\u230B", "\u2308", "\u2309"].includes(c.content);
|
|
925
|
+
}
|
|
926
|
+
function convertTree(node) {
|
|
927
|
+
switch (node.type) {
|
|
928
|
+
case "empty":
|
|
929
|
+
case "whitespace":
|
|
930
|
+
return { type: "empty", content: "" };
|
|
931
|
+
case "ordgroup":
|
|
932
|
+
return {
|
|
933
|
+
type: "group",
|
|
934
|
+
content: "",
|
|
935
|
+
args: node.args.map(convertTree)
|
|
936
|
+
};
|
|
937
|
+
case "element":
|
|
938
|
+
return { type: "atom", content: convertToken(node.content) };
|
|
939
|
+
case "symbol":
|
|
940
|
+
return { type: "symbol", content: convertToken(node.content) };
|
|
941
|
+
case "text":
|
|
942
|
+
return { type: "text", content: node.content };
|
|
943
|
+
case "comment":
|
|
944
|
+
return { type: "comment", content: node.content };
|
|
945
|
+
case "supsub": {
|
|
946
|
+
let { base, sup, sub } = node.data;
|
|
947
|
+
if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
|
|
948
|
+
return {
|
|
949
|
+
type: "binaryFunc",
|
|
950
|
+
content: "overbrace",
|
|
951
|
+
args: [convertTree(base.args[0]), convertTree(sup)]
|
|
952
|
+
};
|
|
953
|
+
} else if (base && base.type === "unaryFunc" && base.content === "\\underbrace" && sub) {
|
|
954
|
+
return {
|
|
955
|
+
type: "binaryFunc",
|
|
956
|
+
content: "underbrace",
|
|
957
|
+
args: [convertTree(base.args[0]), convertTree(sub)]
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
const data = {
|
|
961
|
+
base: convertTree(base)
|
|
962
|
+
};
|
|
963
|
+
if (data.base.type === "empty") {
|
|
964
|
+
data.base = { type: "text", content: "" };
|
|
965
|
+
}
|
|
966
|
+
if (sup) {
|
|
967
|
+
data.sup = convertTree(sup);
|
|
968
|
+
}
|
|
969
|
+
if (sub) {
|
|
970
|
+
data.sub = convertTree(sub);
|
|
971
|
+
}
|
|
972
|
+
return {
|
|
973
|
+
type: "supsub",
|
|
974
|
+
content: "",
|
|
975
|
+
data
|
|
976
|
+
};
|
|
977
|
+
}
|
|
978
|
+
case "leftright": {
|
|
979
|
+
const [left, body, right] = node.args;
|
|
980
|
+
const group = {
|
|
981
|
+
type: "group",
|
|
982
|
+
content: "",
|
|
983
|
+
args: node.args.map(convertTree)
|
|
984
|
+
};
|
|
985
|
+
if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
|
|
986
|
+
return group;
|
|
987
|
+
}
|
|
988
|
+
return {
|
|
989
|
+
type: "unaryFunc",
|
|
990
|
+
content: "lr",
|
|
991
|
+
args: [group]
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
case "binaryFunc": {
|
|
995
|
+
return {
|
|
996
|
+
type: "binaryFunc",
|
|
997
|
+
content: convertToken(node.content),
|
|
998
|
+
args: node.args.map(convertTree)
|
|
999
|
+
};
|
|
1000
|
+
}
|
|
1001
|
+
case "unaryFunc": {
|
|
1002
|
+
const arg0 = convertTree(node.args[0]);
|
|
1003
|
+
if (node.content === "\\sqrt" && node.data) {
|
|
1004
|
+
const data = convertTree(node.data);
|
|
1005
|
+
return {
|
|
1006
|
+
type: "binaryFunc",
|
|
1007
|
+
content: "root",
|
|
1008
|
+
args: [data, arg0]
|
|
1009
|
+
};
|
|
1010
|
+
}
|
|
1011
|
+
if (node.content === "\\mathbf") {
|
|
1012
|
+
const inner = {
|
|
1013
|
+
type: "unaryFunc",
|
|
1014
|
+
content: "bold",
|
|
1015
|
+
args: [arg0]
|
|
1016
|
+
};
|
|
1017
|
+
return {
|
|
1018
|
+
type: "unaryFunc",
|
|
1019
|
+
content: "upright",
|
|
1020
|
+
args: [inner]
|
|
1021
|
+
};
|
|
1022
|
+
}
|
|
1023
|
+
if (node.content === "\\mathbb" && arg0.type === "atom" && /^[A-Z]$/.test(arg0.content)) {
|
|
1024
|
+
return {
|
|
1025
|
+
type: "symbol",
|
|
1026
|
+
content: arg0.content + arg0.content
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
if (node.content === "\\operatorname") {
|
|
1030
|
+
const body = node.args;
|
|
1031
|
+
if (body.length !== 1 || body[0].type !== "text") {
|
|
1032
|
+
throw new TypstWriterError(`Expecting body of \\operatorname to be text but got`, node);
|
|
1033
|
+
}
|
|
1034
|
+
const text = body[0].content;
|
|
1035
|
+
if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
1036
|
+
return {
|
|
1037
|
+
type: "symbol",
|
|
1038
|
+
content: text
|
|
1039
|
+
};
|
|
1040
|
+
} else {
|
|
1041
|
+
return {
|
|
1042
|
+
type: "unaryFunc",
|
|
1043
|
+
content: "op",
|
|
1044
|
+
args: [{ type: "text", content: text }]
|
|
1045
|
+
};
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
return {
|
|
1049
|
+
type: "unaryFunc",
|
|
1050
|
+
content: convertToken(node.content),
|
|
1051
|
+
args: node.args.map(convertTree)
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
case "newline":
|
|
1055
|
+
return { type: "newline", content: "\n" };
|
|
1056
|
+
case "beginend": {
|
|
1057
|
+
const matrix = node.data;
|
|
1058
|
+
const data = matrix.map((row) => row.map(convertTree));
|
|
1059
|
+
if (node.content.startsWith("align")) {
|
|
1060
|
+
return {
|
|
1061
|
+
type: "align",
|
|
1062
|
+
content: "",
|
|
1063
|
+
data
|
|
1064
|
+
};
|
|
1065
|
+
} else {
|
|
1066
|
+
return {
|
|
1067
|
+
type: "matrix",
|
|
1068
|
+
content: "mat",
|
|
1069
|
+
data
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
case "unknownMacro":
|
|
1074
|
+
return { type: "unknown", content: convertToken(node.content) };
|
|
1075
|
+
case "control":
|
|
1076
|
+
if (node.content === "\\\\") {
|
|
1077
|
+
return { type: "symbol", content: "\\" };
|
|
1078
|
+
} else if (node.content === "\\,") {
|
|
1079
|
+
return { type: "symbol", content: "thin" };
|
|
1080
|
+
} else {
|
|
1081
|
+
throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
|
|
1082
|
+
}
|
|
1083
|
+
default:
|
|
1084
|
+
throw new TypstWriterError(`Unimplemented node type: ${node.type}`, node);
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
913
1087
|
function convertToken(token) {
|
|
914
1088
|
if (/^[a-zA-Z0-9]$/.test(token)) {
|
|
915
1089
|
return token;
|
|
@@ -971,7 +1145,7 @@ class TypstWriter {
|
|
|
971
1145
|
no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
|
|
972
1146
|
no_need_space ||= str.startsWith("\n");
|
|
973
1147
|
no_need_space ||= this.buffer === "";
|
|
974
|
-
no_need_space ||= /[\
|
|
1148
|
+
no_need_space ||= /[\s_^{\(]$/.test(this.buffer);
|
|
975
1149
|
if (!no_need_space) {
|
|
976
1150
|
this.buffer += " ";
|
|
977
1151
|
}
|
|
@@ -982,142 +1156,75 @@ class TypstWriter {
|
|
|
982
1156
|
this.buffer += str;
|
|
983
1157
|
}
|
|
984
1158
|
append(node) {
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
} else if (node.type === "symbol") {
|
|
996
|
-
this.queue.push({ type: "symbol", content: node.content });
|
|
997
|
-
} else if (node.type === "text") {
|
|
998
|
-
this.queue.push(node);
|
|
999
|
-
} else if (node.type === "supsub") {
|
|
1000
|
-
let { base, sup, sub } = node.data;
|
|
1001
|
-
if (base && base.type === "unaryFunc" && base.content === "\\overbrace" && sup) {
|
|
1002
|
-
this.append({ type: "binaryFunc", content: "\\overbrace", args: [base.args[0], sup] });
|
|
1003
|
-
return;
|
|
1004
|
-
} else if (base && base.type === "unaryFunc" && base.content === "\\underbrace" && sub) {
|
|
1005
|
-
this.append({ type: "binaryFunc", content: "\\underbrace", args: [base.args[0], sub] });
|
|
1006
|
-
return;
|
|
1159
|
+
switch (node.type) {
|
|
1160
|
+
case "empty":
|
|
1161
|
+
break;
|
|
1162
|
+
case "atom": {
|
|
1163
|
+
if (node.content === "," && this.insideFunctionDepth > 0) {
|
|
1164
|
+
this.queue.push({ type: "symbol", content: "comma" });
|
|
1165
|
+
} else {
|
|
1166
|
+
this.queue.push({ type: "atom", content: node.content });
|
|
1167
|
+
}
|
|
1168
|
+
break;
|
|
1007
1169
|
}
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1170
|
+
case "symbol":
|
|
1171
|
+
case "text":
|
|
1172
|
+
case "comment":
|
|
1173
|
+
case "newline":
|
|
1174
|
+
this.queue.push(node);
|
|
1175
|
+
break;
|
|
1176
|
+
case "group":
|
|
1177
|
+
for (const item of node.args) {
|
|
1178
|
+
this.append(item);
|
|
1179
|
+
}
|
|
1180
|
+
break;
|
|
1181
|
+
case "supsub": {
|
|
1182
|
+
let { base, sup, sub } = node.data;
|
|
1011
1183
|
this.appendWithBracketsIfNeeded(base);
|
|
1184
|
+
let trailing_space_needed = false;
|
|
1185
|
+
const has_prime = sup && sup.type === "atom" && sup.content === "\'";
|
|
1186
|
+
if (has_prime) {
|
|
1187
|
+
this.queue.push({ type: "atom", content: "\'" });
|
|
1188
|
+
trailing_space_needed = false;
|
|
1189
|
+
}
|
|
1190
|
+
if (sub) {
|
|
1191
|
+
this.queue.push({ type: "atom", content: "_" });
|
|
1192
|
+
trailing_space_needed = this.appendWithBracketsIfNeeded(sub);
|
|
1193
|
+
}
|
|
1194
|
+
if (sup && !has_prime) {
|
|
1195
|
+
this.queue.push({ type: "atom", content: "^" });
|
|
1196
|
+
trailing_space_needed = this.appendWithBracketsIfNeeded(sup);
|
|
1197
|
+
}
|
|
1198
|
+
if (trailing_space_needed) {
|
|
1199
|
+
this.queue.push({ type: "softSpace", content: "" });
|
|
1200
|
+
}
|
|
1201
|
+
break;
|
|
1012
1202
|
}
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
this.queue.push({ type: "atom", content: "\'" });
|
|
1017
|
-
trailing_space_needed = false;
|
|
1018
|
-
}
|
|
1019
|
-
if (sub) {
|
|
1020
|
-
this.queue.push({ type: "atom", content: "_" });
|
|
1021
|
-
trailing_space_needed = this.appendWithBracketsIfNeeded(sub);
|
|
1022
|
-
}
|
|
1023
|
-
if (sup && !has_prime) {
|
|
1024
|
-
this.queue.push({ type: "atom", content: "^" });
|
|
1025
|
-
trailing_space_needed = this.appendWithBracketsIfNeeded(sup);
|
|
1026
|
-
}
|
|
1027
|
-
if (trailing_space_needed) {
|
|
1028
|
-
this.queue.push({ type: "softSpace", content: "" });
|
|
1029
|
-
}
|
|
1030
|
-
} else if (node.type === "leftright") {
|
|
1031
|
-
const [left, body, right] = node.args;
|
|
1032
|
-
if (["[]", "()", "\\{\\}", "\\lfloor\\rfloor", "\\lceil\\rceil"].includes(left.content + right.content)) {
|
|
1033
|
-
this.append(left);
|
|
1034
|
-
this.append(body);
|
|
1035
|
-
this.append(right);
|
|
1036
|
-
return;
|
|
1037
|
-
}
|
|
1038
|
-
const func_symbol = { type: "symbol", content: "lr" };
|
|
1039
|
-
this.queue.push(func_symbol);
|
|
1040
|
-
this.insideFunctionDepth++;
|
|
1041
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1042
|
-
this.append(left);
|
|
1043
|
-
this.append(body);
|
|
1044
|
-
this.append(right);
|
|
1045
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1046
|
-
this.insideFunctionDepth--;
|
|
1047
|
-
} else if (node.type === "binaryFunc") {
|
|
1048
|
-
const func_symbol = { type: "symbol", content: node.content };
|
|
1049
|
-
const [arg0, arg1] = node.args;
|
|
1050
|
-
this.queue.push(func_symbol);
|
|
1051
|
-
this.insideFunctionDepth++;
|
|
1052
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1053
|
-
this.append(arg0);
|
|
1054
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1055
|
-
this.append(arg1);
|
|
1056
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1057
|
-
this.insideFunctionDepth--;
|
|
1058
|
-
} else if (node.type === "unaryFunc") {
|
|
1059
|
-
const func_symbol = { type: "symbol", content: node.content };
|
|
1060
|
-
const arg0 = node.args[0];
|
|
1061
|
-
if (node.content === "\\sqrt" && node.data) {
|
|
1062
|
-
func_symbol.content = "root";
|
|
1203
|
+
case "binaryFunc": {
|
|
1204
|
+
const func_symbol = { type: "symbol", content: node.content };
|
|
1205
|
+
const [arg0, arg1] = node.args;
|
|
1063
1206
|
this.queue.push(func_symbol);
|
|
1064
1207
|
this.insideFunctionDepth++;
|
|
1065
1208
|
this.queue.push({ type: "atom", content: "(" });
|
|
1066
|
-
this.append(node.data);
|
|
1067
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1068
1209
|
this.append(arg0);
|
|
1210
|
+
this.queue.push({ type: "atom", content: "," });
|
|
1211
|
+
this.append(arg1);
|
|
1069
1212
|
this.queue.push({ type: "atom", content: ")" });
|
|
1070
1213
|
this.insideFunctionDepth--;
|
|
1071
|
-
|
|
1072
|
-
}
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1214
|
+
break;
|
|
1215
|
+
}
|
|
1216
|
+
case "unaryFunc": {
|
|
1217
|
+
const func_symbol = { type: "symbol", content: node.content };
|
|
1218
|
+
const arg0 = node.args[0];
|
|
1076
1219
|
this.queue.push(func_symbol);
|
|
1077
1220
|
this.insideFunctionDepth++;
|
|
1078
1221
|
this.queue.push({ type: "atom", content: "(" });
|
|
1079
1222
|
this.append(arg0);
|
|
1080
1223
|
this.queue.push({ type: "atom", content: ")" });
|
|
1081
1224
|
this.insideFunctionDepth--;
|
|
1082
|
-
|
|
1083
|
-
this.insideFunctionDepth--;
|
|
1084
|
-
return;
|
|
1085
|
-
} else if (node.content === "\\mathbb") {
|
|
1086
|
-
const body = node.args[0];
|
|
1087
|
-
if (body.type === "element" && /^[A-Z]$/.test(body.content)) {
|
|
1088
|
-
this.queue.push({ type: "symbol", content: body.content + body.content });
|
|
1089
|
-
return;
|
|
1090
|
-
}
|
|
1091
|
-
} else if (node.content === "\\operatorname") {
|
|
1092
|
-
let body = node.args;
|
|
1093
|
-
if (body.length === 1 && body[0].type == "ordgroup") {
|
|
1094
|
-
body = body[0].args;
|
|
1095
|
-
}
|
|
1096
|
-
const text = body.reduce((buff, n) => {
|
|
1097
|
-
buff += convertToken(n.content);
|
|
1098
|
-
return buff;
|
|
1099
|
-
}, "");
|
|
1100
|
-
if (this.preferTypstIntrinsic && TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
1101
|
-
this.queue.push({ type: "symbol", content: text });
|
|
1102
|
-
} else {
|
|
1103
|
-
this.queue.push({ type: "symbol", content: "op" });
|
|
1104
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1105
|
-
this.queue.push({ type: "text", content: text });
|
|
1106
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1107
|
-
}
|
|
1108
|
-
return;
|
|
1225
|
+
break;
|
|
1109
1226
|
}
|
|
1110
|
-
|
|
1111
|
-
this.insideFunctionDepth++;
|
|
1112
|
-
this.queue.push({ type: "atom", content: "(" });
|
|
1113
|
-
this.append(arg0);
|
|
1114
|
-
this.queue.push({ type: "atom", content: ")" });
|
|
1115
|
-
this.insideFunctionDepth--;
|
|
1116
|
-
} else if (node.type === "newline") {
|
|
1117
|
-
this.queue.push({ type: "newline", content: "\n" });
|
|
1118
|
-
return;
|
|
1119
|
-
} else if (node.type === "beginend") {
|
|
1120
|
-
if (node.content.startsWith("align")) {
|
|
1227
|
+
case "align": {
|
|
1121
1228
|
const matrix = node.data;
|
|
1122
1229
|
matrix.forEach((row, i) => {
|
|
1123
1230
|
row.forEach((cell, j) => {
|
|
@@ -1127,10 +1234,12 @@ class TypstWriter {
|
|
|
1127
1234
|
this.append(cell);
|
|
1128
1235
|
});
|
|
1129
1236
|
if (i < matrix.length - 1) {
|
|
1130
|
-
this.queue.push({ type: "symbol", content: "
|
|
1237
|
+
this.queue.push({ type: "symbol", content: "\\" });
|
|
1131
1238
|
}
|
|
1132
1239
|
});
|
|
1133
|
-
|
|
1240
|
+
break;
|
|
1241
|
+
}
|
|
1242
|
+
case "matrix": {
|
|
1134
1243
|
const matrix = node.data;
|
|
1135
1244
|
this.queue.push({ type: "symbol", content: "mat" });
|
|
1136
1245
|
this.insideFunctionDepth++;
|
|
@@ -1138,10 +1247,6 @@ class TypstWriter {
|
|
|
1138
1247
|
this.queue.push({ type: "symbol", content: "delim: #none, " });
|
|
1139
1248
|
matrix.forEach((row, i) => {
|
|
1140
1249
|
row.forEach((cell, j) => {
|
|
1141
|
-
if (cell.type === "ordgroup" && cell.args.length === 0) {
|
|
1142
|
-
this.queue.push({ type: "atom", content: "," });
|
|
1143
|
-
return;
|
|
1144
|
-
}
|
|
1145
1250
|
this.append(cell);
|
|
1146
1251
|
if (j < row.length - 1) {
|
|
1147
1252
|
this.queue.push({ type: "atom", content: "," });
|
|
@@ -1154,37 +1259,45 @@ class TypstWriter {
|
|
|
1154
1259
|
});
|
|
1155
1260
|
this.queue.push({ type: "atom", content: ")" });
|
|
1156
1261
|
this.insideFunctionDepth--;
|
|
1262
|
+
break;
|
|
1157
1263
|
}
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1264
|
+
case "unknown": {
|
|
1265
|
+
if (this.nonStrict) {
|
|
1266
|
+
this.queue.push({ type: "symbol", content: node.content });
|
|
1267
|
+
} else {
|
|
1268
|
+
throw new TypstWriterError(`Unknown macro: ${node.content}`, node);
|
|
1269
|
+
}
|
|
1270
|
+
break;
|
|
1164
1271
|
}
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1272
|
+
default:
|
|
1273
|
+
throw new TypstWriterError(`Unimplemented node type to append: ${node.type}`, node);
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
appendWithBracketsIfNeeded(node) {
|
|
1277
|
+
let need_to_wrap = ["group", "supsub", "empty"].includes(node.type);
|
|
1278
|
+
if (node.type === "group") {
|
|
1279
|
+
const first = node.args[0];
|
|
1280
|
+
const last = node.args[node.args.length - 1];
|
|
1281
|
+
if (is_delimiter(first) && is_delimiter(last)) {
|
|
1282
|
+
need_to_wrap = false;
|
|
1172
1283
|
}
|
|
1173
|
-
}
|
|
1174
|
-
|
|
1284
|
+
}
|
|
1285
|
+
if (need_to_wrap) {
|
|
1286
|
+
this.queue.push({ type: "atom", content: "(" });
|
|
1287
|
+
this.append(node);
|
|
1288
|
+
this.queue.push({ type: "atom", content: ")" });
|
|
1175
1289
|
} else {
|
|
1176
|
-
|
|
1290
|
+
this.append(node);
|
|
1177
1291
|
}
|
|
1292
|
+
return !need_to_wrap;
|
|
1178
1293
|
}
|
|
1179
1294
|
flushQueue() {
|
|
1180
1295
|
this.queue.forEach((node) => {
|
|
1181
1296
|
let str = "";
|
|
1182
1297
|
switch (node.type) {
|
|
1183
1298
|
case "atom":
|
|
1184
|
-
str = node.content;
|
|
1185
|
-
break;
|
|
1186
1299
|
case "symbol":
|
|
1187
|
-
str =
|
|
1300
|
+
str = node.content;
|
|
1188
1301
|
break;
|
|
1189
1302
|
case "text":
|
|
1190
1303
|
str = `"${node.content}"`;
|
|
@@ -1208,23 +1321,6 @@ class TypstWriter {
|
|
|
1208
1321
|
});
|
|
1209
1322
|
this.queue = [];
|
|
1210
1323
|
}
|
|
1211
|
-
appendWithBracketsIfNeeded(node) {
|
|
1212
|
-
const is_single = ["symbol", "element", "unaryFunc", "binaryFunc", "leftright"].includes(node.type);
|
|
1213
|
-
if (is_single) {
|
|
1214
|
-
this.append(node);
|
|
1215
|
-
} else {
|
|
1216
|
-
this.queue.push({
|
|
1217
|
-
type: "atom",
|
|
1218
|
-
content: "("
|
|
1219
|
-
});
|
|
1220
|
-
this.append(node);
|
|
1221
|
-
this.queue.push({
|
|
1222
|
-
type: "atom",
|
|
1223
|
-
content: ")"
|
|
1224
|
-
});
|
|
1225
|
-
}
|
|
1226
|
-
return is_single;
|
|
1227
|
-
}
|
|
1228
1324
|
finalize() {
|
|
1229
1325
|
this.flushQueue();
|
|
1230
1326
|
const smartFloorPass = function(input) {
|
|
@@ -1246,7 +1342,7 @@ class TypstWriter {
|
|
|
1246
1342
|
// src/index.ts
|
|
1247
1343
|
function tex2typst(tex, options) {
|
|
1248
1344
|
const opt = {
|
|
1249
|
-
nonStrict:
|
|
1345
|
+
nonStrict: true,
|
|
1250
1346
|
preferTypstIntrinsic: true,
|
|
1251
1347
|
customTexMacros: {}
|
|
1252
1348
|
};
|
|
@@ -1261,9 +1357,10 @@ function tex2typst(tex, options) {
|
|
|
1261
1357
|
opt.customTexMacros = options.customTexMacros;
|
|
1262
1358
|
}
|
|
1263
1359
|
}
|
|
1264
|
-
const
|
|
1360
|
+
const texTree = parseTex(tex, opt.customTexMacros);
|
|
1361
|
+
const typstTree = convertTree(texTree);
|
|
1265
1362
|
const writer2 = new TypstWriter(opt.nonStrict, opt.preferTypstIntrinsic);
|
|
1266
|
-
writer2.append(
|
|
1363
|
+
writer2.append(typstTree);
|
|
1267
1364
|
return writer2.finalize();
|
|
1268
1365
|
}
|
|
1269
1366
|
export {
|