tex2typst 0.1.20 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/index.js +635 -313
- package/dist/parser.d.ts +21 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +1 -8
- package/package.json +2 -4
- package/src/map.ts +4 -0
- package/src/parser.ts +652 -302
- package/src/types.ts +1 -9
- package/src/writer.ts +61 -59
- package/tsconfig.json +1 -1
package/src/parser.ts
CHANGED
|
@@ -1,354 +1,704 @@
|
|
|
1
|
-
|
|
2
|
-
import katex from 'katex';
|
|
3
|
-
import { TexNode, KatexParseNode, TexSupsubData } from './types';
|
|
1
|
+
import { TexNode, TexSupsubData } from "./types";
|
|
4
2
|
|
|
5
3
|
|
|
6
|
-
const
|
|
4
|
+
const UNARY_COMMANDS = [
|
|
5
|
+
'sqrt',
|
|
6
|
+
'text',
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
'arccos',
|
|
9
|
+
'arcsin',
|
|
10
|
+
'arctan',
|
|
11
|
+
'arg',
|
|
12
|
+
'bar',
|
|
13
|
+
'bold',
|
|
14
|
+
'boldsymbol',
|
|
15
|
+
'ddot',
|
|
16
|
+
'det',
|
|
17
|
+
'dim',
|
|
18
|
+
'dot',
|
|
19
|
+
'exp',
|
|
20
|
+
'gcd',
|
|
21
|
+
'hat',
|
|
22
|
+
'ker',
|
|
23
|
+
'mathbb',
|
|
24
|
+
'mathbf',
|
|
25
|
+
'mathcal',
|
|
26
|
+
'mathscr',
|
|
27
|
+
'mathsf',
|
|
28
|
+
'mathtt',
|
|
29
|
+
'mathrm',
|
|
30
|
+
'max',
|
|
31
|
+
'min',
|
|
32
|
+
'mod',
|
|
33
|
+
'operatorname',
|
|
34
|
+
'overbrace',
|
|
35
|
+
'overline',
|
|
36
|
+
'pmb',
|
|
37
|
+
'sup',
|
|
38
|
+
'rm',
|
|
39
|
+
'tilde',
|
|
40
|
+
'underbrace',
|
|
41
|
+
'underline',
|
|
42
|
+
'vec',
|
|
43
|
+
'widehat',
|
|
44
|
+
'widetilde',
|
|
45
|
+
]
|
|
10
46
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
47
|
+
const BINARY_COMMANDS = [
|
|
48
|
+
'frac',
|
|
49
|
+
'tfrac',
|
|
50
|
+
'binom',
|
|
51
|
+
'dbinom',
|
|
52
|
+
'dfrac',
|
|
53
|
+
'tbinom',
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
const EMPTY_NODE = { 'type': 'empty', 'content': '' }
|
|
57
|
+
|
|
58
|
+
function assert(condition: boolean, message: string = ''): void {
|
|
59
|
+
if (!condition) {
|
|
60
|
+
throw new LatexParserError(message);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function get_command_param_num(command: string): number {
|
|
65
|
+
if (UNARY_COMMANDS.includes(command)) {
|
|
66
|
+
return 1;
|
|
67
|
+
} else if (BINARY_COMMANDS.includes(command)) {
|
|
68
|
+
return 2;
|
|
69
|
+
} else {
|
|
70
|
+
return 0;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const LEFT_CURLY_BRACKET: Token = {type: 'control', value: '{'};
|
|
75
|
+
const RIGHT_CURLY_BRACKET: Token = {type: 'control', value: '}'};
|
|
76
|
+
|
|
77
|
+
function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
78
|
+
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
79
|
+
let count = 1;
|
|
80
|
+
let pos = start + 1;
|
|
81
|
+
|
|
82
|
+
while (count > 0) {
|
|
83
|
+
if (pos >= tokens.length) {
|
|
84
|
+
throw new LatexParserError('Unmatched curly brackets');
|
|
85
|
+
}
|
|
86
|
+
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
87
|
+
count += 1;
|
|
88
|
+
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
89
|
+
count -= 1;
|
|
90
|
+
}
|
|
91
|
+
pos += 1;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return pos - 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const LEFT_SQUARE_BRACKET: Token = {type: 'element', value: '['};
|
|
98
|
+
const RIGHT_SQUARE_BRACKET: Token = {type: 'element', value: ']'};
|
|
99
|
+
|
|
100
|
+
function find_closing_square_bracket(tokens: Token[], start: number): number {
|
|
101
|
+
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
102
|
+
let count = 1;
|
|
103
|
+
let pos = start + 1;
|
|
104
|
+
|
|
105
|
+
while (count > 0) {
|
|
106
|
+
if (pos >= tokens.length) {
|
|
107
|
+
throw new LatexParserError('Unmatched square brackets');
|
|
108
|
+
}
|
|
109
|
+
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
110
|
+
count += 1;
|
|
111
|
+
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
112
|
+
count -= 1;
|
|
113
|
+
}
|
|
114
|
+
pos += 1;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return pos - 1;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
function isalpha(char: string): boolean {
|
|
122
|
+
return 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.includes(char);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function isdigit(char: string): boolean {
|
|
126
|
+
return '0123456789'.includes(char);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function eat_whitespaces(tokens: Token[], start: number): Token[] {
|
|
130
|
+
let pos = start;
|
|
131
|
+
while (pos < tokens.length && ['whitespace', 'newline'].includes(tokens[pos].type)) {
|
|
132
|
+
pos++;
|
|
133
|
+
}
|
|
134
|
+
return tokens.slice(start, pos);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
function eat_parenthesis(tokens: Token[], start: number): Token | null {
|
|
139
|
+
const firstToken = tokens[start];
|
|
140
|
+
if (firstToken.type === 'element' && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
|
|
141
|
+
return firstToken;
|
|
142
|
+
} else if (firstToken.type === 'command' && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
143
|
+
return firstToken;
|
|
144
|
+
} else {
|
|
145
|
+
return null;
|
|
15
146
|
}
|
|
16
147
|
}
|
|
17
148
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
149
|
+
function eat_primes(tokens: Token[], start: number): number {
|
|
150
|
+
let pos = start;
|
|
151
|
+
while (pos < tokens.length && token_eq(tokens[pos], { type: 'element', value: "'" })) {
|
|
152
|
+
pos += 1;
|
|
153
|
+
}
|
|
154
|
+
return pos - start;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
function eat_command_name(latex: string, start: number): string {
|
|
159
|
+
let pos = start;
|
|
160
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
161
|
+
pos += 1;
|
|
162
|
+
}
|
|
163
|
+
return latex.substring(start, pos);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
const LEFT_COMMAND: Token = { type: 'command', value: '\\left' };
|
|
170
|
+
const RIGHT_COMMAND: Token = { type: 'command', value: '\\right' };
|
|
171
|
+
|
|
172
|
+
function find_closing_right_command(tokens: Token[], start: number): number {
|
|
173
|
+
let count = 1;
|
|
174
|
+
let pos = start;
|
|
175
|
+
|
|
176
|
+
while (count > 0) {
|
|
177
|
+
if (pos >= tokens.length) {
|
|
178
|
+
return -1;
|
|
22
179
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
180
|
+
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
181
|
+
count += 1;
|
|
182
|
+
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
183
|
+
count -= 1;
|
|
184
|
+
}
|
|
185
|
+
pos += 1;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return pos - 1;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
const BEGIN_COMMAND: Token = { type: 'command', value: '\\begin' };
|
|
193
|
+
const END_COMMAND: Token = { type: 'command', value: '\\end' };
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
function find_closing_end_command(tokens: Token[], start: number): number {
|
|
197
|
+
let count = 1;
|
|
198
|
+
let pos = start;
|
|
199
|
+
|
|
200
|
+
while (count > 0) {
|
|
201
|
+
if (pos >= tokens.length) {
|
|
202
|
+
return -1;
|
|
203
|
+
}
|
|
204
|
+
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
205
|
+
count += 1;
|
|
206
|
+
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
207
|
+
count -= 1;
|
|
208
|
+
}
|
|
209
|
+
pos += 1;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return pos - 1;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function find_closing_curly_bracket_char(latex: string, start: number): number {
|
|
216
|
+
assert(latex[start] === '{');
|
|
217
|
+
let count = 1;
|
|
218
|
+
let pos = start + 1;
|
|
219
|
+
|
|
220
|
+
while (count > 0) {
|
|
221
|
+
if (pos >= latex.length) {
|
|
222
|
+
throw new LatexParserError('Unmatched curly brackets');
|
|
223
|
+
}
|
|
224
|
+
if(pos + 1 < latex.length && (['\\{', '\\}'].includes(latex.substring(pos, pos + 2)))) {
|
|
225
|
+
pos += 2;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (latex[pos] === '{') {
|
|
229
|
+
count += 1;
|
|
230
|
+
} else if (latex[pos] === '}') {
|
|
231
|
+
count -= 1;
|
|
232
|
+
}
|
|
233
|
+
pos += 1;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return pos - 1;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
interface Token {
|
|
241
|
+
type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown';
|
|
242
|
+
value: string;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function tokenize(latex: string): Token[] {
|
|
246
|
+
const tokens: Token[] = [];
|
|
247
|
+
let pos = 0;
|
|
248
|
+
|
|
249
|
+
while (pos < latex.length) {
|
|
250
|
+
const firstChar = latex[pos];
|
|
251
|
+
let token: Token;
|
|
252
|
+
switch (firstChar) {
|
|
253
|
+
case '%': {
|
|
254
|
+
let newPos = pos + 1;
|
|
255
|
+
while (newPos < latex.length && latex[newPos] !== '\n') {
|
|
256
|
+
newPos += 1;
|
|
34
257
|
}
|
|
258
|
+
token = { type: 'comment', value: latex.slice(pos + 1, newPos) };
|
|
259
|
+
pos = newPos;
|
|
35
260
|
break;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
case '
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if (node.type === 'op') {
|
|
49
|
-
res.content = node['name']!;
|
|
50
|
-
} else if (node.type === 'cr') {
|
|
51
|
-
res.content = '\\\\';
|
|
52
|
-
}
|
|
261
|
+
}
|
|
262
|
+
case '{':
|
|
263
|
+
case '}':
|
|
264
|
+
case '_':
|
|
265
|
+
case '^':
|
|
266
|
+
case '&':
|
|
267
|
+
token = { type: 'control', value: firstChar};
|
|
268
|
+
pos++;
|
|
269
|
+
break;
|
|
270
|
+
case '\n':
|
|
271
|
+
token = { type: 'newline', value: firstChar};
|
|
272
|
+
pos++;
|
|
53
273
|
break;
|
|
54
|
-
case '
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
res.content = '\\binom';
|
|
274
|
+
case '\r': {
|
|
275
|
+
if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
|
|
276
|
+
token = { type: 'newline', value: '\n' };
|
|
277
|
+
pos += 2;
|
|
59
278
|
} else {
|
|
60
|
-
|
|
279
|
+
token = { type: 'newline', value: '\n' };
|
|
280
|
+
pos ++;
|
|
61
281
|
}
|
|
62
|
-
res.args = [
|
|
63
|
-
katexNodeToTexNode(node['numer']),
|
|
64
|
-
katexNodeToTexNode(node['denom'])
|
|
65
|
-
];
|
|
66
282
|
break;
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
283
|
+
}
|
|
284
|
+
case ' ': {
|
|
285
|
+
let newPos = pos;
|
|
286
|
+
while (newPos < latex.length && latex[newPos] === ' ') {
|
|
287
|
+
newPos += 1;
|
|
72
288
|
}
|
|
73
|
-
|
|
74
|
-
|
|
289
|
+
token = {type: 'whitespace', value: latex.slice(pos, newPos)};
|
|
290
|
+
pos = newPos;
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
case '\\': {
|
|
294
|
+
if (pos + 1 >= latex.length) {
|
|
295
|
+
throw new LatexParserError('Expecting command name after \\');
|
|
75
296
|
}
|
|
76
|
-
|
|
77
|
-
|
|
297
|
+
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
298
|
+
if (firstTwoChars === '\\\\') {
|
|
299
|
+
token = { type: 'control', value: '\\\\' };
|
|
300
|
+
pos += 2;
|
|
301
|
+
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
|
|
302
|
+
token = { type: 'element', value: firstTwoChars };
|
|
303
|
+
pos += 2;
|
|
304
|
+
} else {
|
|
305
|
+
const command = eat_command_name(latex, pos + 1);
|
|
306
|
+
token = { type: 'command', value: '\\' + command};
|
|
307
|
+
pos += 1 + command.length;
|
|
78
308
|
}
|
|
79
309
|
break;
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
310
|
+
}
|
|
311
|
+
default: {
|
|
312
|
+
if (isdigit(firstChar)) {
|
|
313
|
+
let newPos = pos;
|
|
314
|
+
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
315
|
+
newPos += 1;
|
|
316
|
+
}
|
|
317
|
+
token = { type: 'element', value: latex.slice(pos, newPos) }
|
|
318
|
+
} else if (isalpha(firstChar)) {
|
|
319
|
+
token = { type: 'element', value: firstChar };
|
|
320
|
+
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
321
|
+
token = { type: 'element', value: firstChar }
|
|
322
|
+
} else {
|
|
323
|
+
token = { type: 'unknown', value: firstChar };
|
|
86
324
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
325
|
+
pos += token.value.length;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
tokens.push(token);
|
|
330
|
+
|
|
331
|
+
if (token.type === 'command' && ['\\text', '\\begin', '\\end'].includes(token.value)) {
|
|
332
|
+
if (pos >= latex.length || latex[pos] !== '{') {
|
|
333
|
+
throw new LatexParserError(`No content for ${token.value} command`);
|
|
334
|
+
}
|
|
335
|
+
tokens.push({ type: 'control', value: '{' });
|
|
336
|
+
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
337
|
+
pos++;
|
|
338
|
+
let textInside = latex.slice(pos, posClosingBracket);
|
|
339
|
+
// replace all escape characters with their actual characters
|
|
340
|
+
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
341
|
+
for (const char of chars) {
|
|
342
|
+
textInside = textInside.replaceAll('\\' + char, char);
|
|
343
|
+
}
|
|
344
|
+
tokens.push({ type: 'text', value: textInside });
|
|
345
|
+
tokens.push({ type: 'control', value: '}' });
|
|
346
|
+
pos = posClosingBracket + 1;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
return tokens;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function token_eq(token1: Token, token2: Token) {
|
|
353
|
+
return token1.type == token2.type && token1.value == token2.value;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
export class LatexParserError extends Error {
|
|
358
|
+
constructor(message: string) {
|
|
359
|
+
super(message);
|
|
360
|
+
this.name = 'LatexParserError';
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
type ParseResult = [TexNode, number];
|
|
366
|
+
|
|
367
|
+
const SUB_SYMBOL:Token = { type: 'control', value: '_' };
|
|
368
|
+
const SUP_SYMBOL:Token = { type: 'control', value: '^' };
|
|
369
|
+
|
|
370
|
+
export class LatexParser {
|
|
371
|
+
space_sensitive: boolean;
|
|
372
|
+
newline_sensitive: boolean;
|
|
373
|
+
|
|
374
|
+
constructor(space_sensitive: boolean = false, newline_sensitive: boolean = true) {
|
|
375
|
+
this.space_sensitive = space_sensitive;
|
|
376
|
+
this.newline_sensitive = newline_sensitive;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
parse(tokens: Token[]): TexNode {
|
|
380
|
+
const results: TexNode[] = [];
|
|
381
|
+
let pos = 0;
|
|
382
|
+
while (pos < tokens.length) {
|
|
383
|
+
const results: TexNode[] = [];
|
|
384
|
+
let pos = 0;
|
|
385
|
+
|
|
386
|
+
while (pos < tokens.length) {
|
|
387
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
388
|
+
pos = newPos;
|
|
389
|
+
if (!this.space_sensitive && res.type === 'whitespace') {
|
|
390
|
+
continue;
|
|
99
391
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
right = "}";
|
|
392
|
+
if (!this.newline_sensitive && res.type === 'newline') {
|
|
393
|
+
continue;
|
|
103
394
|
}
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
{ type: is_atom(right)? 'atom': 'symbol', content: right}
|
|
109
|
-
];
|
|
110
|
-
break;
|
|
395
|
+
if (res.type === 'control' && res.content === '&') {
|
|
396
|
+
throw new LatexParserError('Unexpected & outside of an alignment');
|
|
397
|
+
}
|
|
398
|
+
results.push(res);
|
|
111
399
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
break;
|
|
120
|
-
case 'accent': {
|
|
121
|
-
res.type = 'unaryFunc';
|
|
122
|
-
res.content = node['label']!;
|
|
123
|
-
res.args = [
|
|
124
|
-
katexNodeToTexNode(node['base'])
|
|
125
|
-
];
|
|
126
|
-
break;
|
|
400
|
+
|
|
401
|
+
if (results.length === 0) {
|
|
402
|
+
return EMPTY_NODE;
|
|
403
|
+
} else if (results.length === 1) {
|
|
404
|
+
return results[0];
|
|
405
|
+
} else {
|
|
406
|
+
return { type: 'ordgroup', content: '', args: results };
|
|
127
407
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
if (results.length === 0) {
|
|
412
|
+
return EMPTY_NODE;
|
|
413
|
+
} else if (results.length === 1) {
|
|
414
|
+
return results[0];
|
|
415
|
+
} else {
|
|
416
|
+
return { type: 'ordgroup', content: '', args: results };
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
parseNextExpr(tokens: Token[], start: number): ParseResult {
|
|
421
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
422
|
+
let sub: TexNode | null = null;
|
|
423
|
+
let sup: TexNode | null = null;
|
|
424
|
+
let num_prime = 0;
|
|
425
|
+
|
|
426
|
+
num_prime += eat_primes(tokens, pos);
|
|
427
|
+
pos += num_prime;
|
|
428
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
429
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
430
|
+
num_prime += eat_primes(tokens, pos);
|
|
431
|
+
pos += num_prime;
|
|
432
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
433
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
434
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
435
|
+
throw new LatexParserError('Double superscript');
|
|
133
436
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
437
|
+
}
|
|
438
|
+
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
439
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
440
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
441
|
+
throw new LatexParserError('Double superscript');
|
|
442
|
+
}
|
|
443
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
444
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
445
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
446
|
+
throw new LatexParserError('Double superscript');
|
|
141
447
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
]
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
if (sub !== null || sup !== null || num_prime > 0) {
|
|
452
|
+
const res: TexSupsubData = { base };
|
|
453
|
+
if (sub) {
|
|
454
|
+
res.sub = sub;
|
|
455
|
+
}
|
|
456
|
+
if (num_prime > 0) {
|
|
457
|
+
res.sup = { type: 'ordgroup', content: '', args: [] };
|
|
458
|
+
for (let i = 0; i < num_prime; i++) {
|
|
459
|
+
res.sup.args!.push({ type: 'symbol', content: '\\prime' });
|
|
155
460
|
}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
res.type = 'unaryFunc';
|
|
159
|
-
res.content = node['label']!; // '\\overbrace' or '\\unerbrace'
|
|
160
|
-
res.args = [
|
|
161
|
-
katexNodeToTexNode(node['base']),
|
|
162
|
-
];
|
|
163
|
-
break;
|
|
164
|
-
case 'array':
|
|
165
|
-
if (node['colSeparationType'] === 'align') {
|
|
166
|
-
// align environment
|
|
167
|
-
res.type = 'align';
|
|
168
|
-
} else {
|
|
169
|
-
res.type = 'matrix'
|
|
461
|
+
if (sup) {
|
|
462
|
+
res.sup.args!.push(sup);
|
|
170
463
|
}
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
return katexNodeToTexNode((cell.body as KatexParseNode[])[0]);
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
break;
|
|
180
|
-
|
|
181
|
-
case 'text': {
|
|
182
|
-
res.type = 'text';
|
|
183
|
-
let str = "";
|
|
184
|
-
(node.body as KatexParseNode[]).forEach((n) => {
|
|
185
|
-
if(n.mode !== 'text') {
|
|
186
|
-
throw new KatexNodeToTexNodeError("Expecting node.mode==='text'", node)
|
|
187
|
-
}
|
|
188
|
-
str += n.text;
|
|
189
|
-
});
|
|
190
|
-
res.content = str;
|
|
191
|
-
break;
|
|
464
|
+
if (res.sup.args!.length === 1) {
|
|
465
|
+
res.sup = res.sup.args![0];
|
|
466
|
+
}
|
|
467
|
+
} else if (sup) {
|
|
468
|
+
res.sup = sup;
|
|
192
469
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
case '
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
470
|
+
return [{type: 'supsub', content: '', data: res }, pos];
|
|
471
|
+
} else {
|
|
472
|
+
return [base, pos];
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
parseNextExprWithoutSupSub(tokens: Token[], start: number): ParseResult {
|
|
477
|
+
const firstToken = tokens[start];
|
|
478
|
+
const tokenType = firstToken.type;
|
|
479
|
+
switch (tokenType) {
|
|
480
|
+
case 'element':
|
|
481
|
+
case 'text':
|
|
482
|
+
case 'comment':
|
|
483
|
+
case 'whitespace':
|
|
484
|
+
case 'newline':
|
|
485
|
+
return [{ type: tokenType, content: firstToken.value }, start + 1];
|
|
486
|
+
case 'command':
|
|
487
|
+
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
488
|
+
return this.parseBeginEndExpr(tokens, start);
|
|
489
|
+
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
490
|
+
return this.parseLeftRightExpr(tokens, start);
|
|
213
491
|
} else {
|
|
214
|
-
|
|
492
|
+
return this.parseCommandExpr(tokens, start);
|
|
215
493
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
494
|
+
case 'control':
|
|
495
|
+
const controlChar = firstToken.value;
|
|
496
|
+
switch (controlChar) {
|
|
497
|
+
case '{':
|
|
498
|
+
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
499
|
+
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
500
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
501
|
+
case '}':
|
|
502
|
+
throw new LatexParserError("Unmatched '}'");
|
|
503
|
+
case '\\\\':
|
|
504
|
+
return [{ type: 'control', content: '\\\\' }, start + 1];
|
|
505
|
+
case '_': {
|
|
506
|
+
let [sub, pos] = this.parseNextExpr(tokens, start + 1);
|
|
507
|
+
let sup: TexNode | undefined = undefined;
|
|
508
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
509
|
+
[sup, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
510
|
+
}
|
|
511
|
+
const subData = { base: EMPTY_NODE, sub, sup };
|
|
512
|
+
return [{ type: 'supsub', content: '', data: subData }, pos];
|
|
513
|
+
}
|
|
514
|
+
case '^': {
|
|
515
|
+
let [sup, pos] = this.parseNextExpr(tokens, start + 1);
|
|
516
|
+
let sub: TexNode | undefined = undefined;
|
|
517
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
518
|
+
[sub, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
227
519
|
}
|
|
520
|
+
const supData = { base: EMPTY_NODE, sub, sup };
|
|
521
|
+
return [{ type: 'supsub', content: '', data: supData }, pos];
|
|
228
522
|
}
|
|
523
|
+
case '&':
|
|
524
|
+
return [{ type: 'control', content: '&' }, start + 1];
|
|
525
|
+
default:
|
|
526
|
+
throw new LatexParserError('Unknown control sequence');
|
|
229
527
|
}
|
|
230
|
-
throw new KatexNodeToTexNodeError(`Unknown error type in parsed result:`, node);
|
|
231
|
-
case 'comment':
|
|
232
|
-
res.type = 'comment';
|
|
233
|
-
res.content = node.text!;
|
|
234
|
-
break;
|
|
235
528
|
default:
|
|
236
|
-
throw new
|
|
237
|
-
break;
|
|
529
|
+
throw new LatexParserError('Unknown token type');
|
|
238
530
|
}
|
|
239
|
-
return res as TexNode;
|
|
240
|
-
} catch (e) {
|
|
241
|
-
throw e;
|
|
242
531
|
}
|
|
243
|
-
}
|
|
244
532
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
533
|
+
parseCommandExpr(tokens: Token[], start: number): ParseResult {
|
|
534
|
+
assert(tokens[start].type === 'command');
|
|
535
|
+
|
|
536
|
+
const command = tokens[start].value; // command name starts with a \
|
|
537
|
+
|
|
538
|
+
let pos = start + 1;
|
|
539
|
+
|
|
540
|
+
if (['left', 'right', 'begin', 'end'].includes(command.slice(1))) {
|
|
541
|
+
throw new LatexParserError('Unexpected command: ' + command);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
const paramNum = get_command_param_num(command.slice(1));
|
|
545
|
+
if (paramNum === 0) {
|
|
546
|
+
return [{ type: 'symbol', content: command }, pos];
|
|
547
|
+
} else if (paramNum === 1) {
|
|
548
|
+
if (command === '\\sqrt' && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
549
|
+
const posLeftSquareBracket = pos;
|
|
550
|
+
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
551
|
+
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
552
|
+
const exponent = this.parse(exprInside);
|
|
553
|
+
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
554
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
|
|
555
|
+
} else if (command === '\\text') {
|
|
556
|
+
if (pos + 2 >= tokens.length) {
|
|
557
|
+
throw new LatexParserError('Expecting content for \\text command');
|
|
558
|
+
}
|
|
559
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
560
|
+
assert(tokens[pos + 1].type === 'text');
|
|
561
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
562
|
+
const text = tokens[pos + 1].value;
|
|
563
|
+
return [{ type: 'text', content: text }, pos + 3];
|
|
269
564
|
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
const
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
out_tex_list.push(comment);
|
|
565
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
566
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1] }, newPos];
|
|
567
|
+
} else if (paramNum === 2) {
|
|
568
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
569
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
570
|
+
return [{ type: 'binaryFunc', content: command, args: [arg1, arg2] }, pos2];
|
|
277
571
|
} else {
|
|
278
|
-
|
|
572
|
+
throw new Error( 'Invalid number of parameters');
|
|
279
573
|
}
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
parseLeftRightExpr(tokens: Token[], start: number): ParseResult {
|
|
577
|
+
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
578
|
+
|
|
579
|
+
let pos = start + 1;
|
|
580
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
581
|
+
|
|
582
|
+
if (pos >= tokens.length) {
|
|
583
|
+
throw new LatexParserError('Expecting delimiter after \\left');
|
|
286
584
|
}
|
|
287
585
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
586
|
+
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
587
|
+
if (leftDelimiter === null) {
|
|
588
|
+
throw new LatexParserError('Invalid delimiter after \\left');
|
|
589
|
+
}
|
|
590
|
+
pos++;
|
|
591
|
+
const exprInsideStart = pos;
|
|
592
|
+
const idx = find_closing_right_command(tokens, pos);
|
|
593
|
+
if (idx === -1) {
|
|
594
|
+
throw new LatexParserError('No matching \\right');
|
|
595
|
+
}
|
|
596
|
+
const exprInsideEnd = idx;
|
|
597
|
+
pos = idx + 1;
|
|
598
|
+
|
|
599
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
600
|
+
if (pos >= tokens.length) {
|
|
601
|
+
throw new LatexParserError('Expecting \\right after \\left');
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
605
|
+
if (rightDelimiter === null) {
|
|
606
|
+
throw new LatexParserError('Invalid delimiter after \\right');
|
|
607
|
+
}
|
|
608
|
+
pos++;
|
|
609
|
+
|
|
610
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
611
|
+
const body = this.parse(exprInside);
|
|
612
|
+
const args = [
|
|
613
|
+
{ type: 'element', content: leftDelimiter.value },
|
|
614
|
+
body,
|
|
615
|
+
{ type: 'element', content: rightDelimiter.value }
|
|
616
|
+
]
|
|
617
|
+
const res = { type: 'leftright', content: '', args: args };
|
|
618
|
+
return [res, pos];
|
|
291
619
|
}
|
|
292
620
|
|
|
293
|
-
|
|
294
|
-
|
|
621
|
+
parseBeginEndExpr(tokens: Token[], start: number): ParseResult {
|
|
622
|
+
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
623
|
+
|
|
624
|
+
let pos = start + 1;
|
|
625
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
626
|
+
assert(tokens[pos + 1].type === 'text');
|
|
627
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
628
|
+
const envName = tokens[pos + 1].value;
|
|
629
|
+
pos += 3;
|
|
630
|
+
|
|
631
|
+
pos += eat_whitespaces(tokens, pos).length; // ignore whitespaces and '\n' after \begin{envName}
|
|
632
|
+
|
|
633
|
+
const exprInsideStart = pos;
|
|
634
|
+
|
|
635
|
+
const endIdx = find_closing_end_command(tokens, pos);
|
|
636
|
+
if (endIdx === -1) {
|
|
637
|
+
throw new LatexParserError('No matching \\end');
|
|
638
|
+
}
|
|
639
|
+
const exprInsideEnd = endIdx;
|
|
640
|
+
pos = endIdx + 1;
|
|
641
|
+
|
|
642
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
643
|
+
assert(tokens[pos + 1].type === 'text');
|
|
644
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
645
|
+
if (tokens[pos + 1].value !== envName) {
|
|
646
|
+
throw new LatexParserError('Mismatched \\begin and \\end environments');
|
|
647
|
+
}
|
|
648
|
+
pos += 3;
|
|
649
|
+
|
|
650
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
651
|
+
// ignore whitespaces and '\n' before \end{envName}
|
|
652
|
+
while(exprInside.length > 0 && ['whitespace', 'newline'].includes(exprInside[exprInside.length - 1].type)) {
|
|
653
|
+
exprInside.pop();
|
|
654
|
+
}
|
|
655
|
+
const body = this.parseAligned(exprInside);
|
|
656
|
+
const res = { type: 'beginend', content: envName, data: body };
|
|
657
|
+
return [res, pos];
|
|
295
658
|
}
|
|
296
659
|
|
|
297
|
-
|
|
660
|
+
parseAligned(tokens: Token[]): TexNode[][] {
|
|
661
|
+
let pos = 0;
|
|
662
|
+
const allRows: TexNode[][] = [];
|
|
663
|
+
let row: TexNode[] = [];
|
|
664
|
+
allRows.push(row);
|
|
665
|
+
let group: TexNode = { type: 'ordgroup', content: '', args: [] };
|
|
666
|
+
row.push(group);
|
|
667
|
+
|
|
668
|
+
while (pos < tokens.length) {
|
|
669
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
670
|
+
pos = newPos;
|
|
671
|
+
if (res.type === 'whitespace') {
|
|
672
|
+
continue;
|
|
673
|
+
} else if (res.type === 'newline' && !this.newline_sensitive) {
|
|
674
|
+
continue;
|
|
675
|
+
} else if (res.type === 'control' && res.content === '\\\\') {
|
|
676
|
+
row = [];
|
|
677
|
+
group = { type: 'ordgroup', content: '', args: [] };
|
|
678
|
+
row.push(group);
|
|
679
|
+
allRows.push(row);
|
|
680
|
+
} else if (res.type === 'control' && res.content === '&') {
|
|
681
|
+
group = { type: 'ordgroup', content: '', args: [] };
|
|
682
|
+
row.push(group);
|
|
683
|
+
} else {
|
|
684
|
+
group.args!.push(res);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
return allRows;
|
|
688
|
+
}
|
|
298
689
|
}
|
|
299
690
|
|
|
300
691
|
export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
|
|
301
|
-
|
|
302
|
-
const
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
'
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
'\\colon': '\\operatorname{SyMb01-colon}',
|
|
311
|
-
'\\imath': '\\operatorname{SyMb01-imath}',
|
|
312
|
-
'\\\iiiint': '\\operatorname{SyMb01-iiiint}', // \iiint is valid in LaTeX but not supported in KaTeX
|
|
313
|
-
'\\jmath': '\\operatorname{SyMb01-jmath}',
|
|
314
|
-
'\\vdots': '\\operatorname{SyMb01-vdots}',
|
|
315
|
-
'\\notin': '\\operatorname{SyMb01-notin}',
|
|
316
|
-
'\\slash': '\\operatorname{SyMb01-slash}',
|
|
317
|
-
'\\LaTeX': '\\operatorname{SyMb01-LaTeX}',
|
|
318
|
-
'\\TeX': '\\operatorname{SyMb01-TeX}',
|
|
319
|
-
'\\SyMbOlNeWlInE': '\\operatorname{SyMb01-newline}',
|
|
320
|
-
...customTexMacros
|
|
321
|
-
};
|
|
322
|
-
const options = {
|
|
323
|
-
macros: macros,
|
|
324
|
-
displayMode: true,
|
|
325
|
-
strict: "ignore",
|
|
326
|
-
throwOnError: false
|
|
327
|
-
};
|
|
328
|
-
|
|
329
|
-
const tex_list = splitTex(tex);
|
|
330
|
-
|
|
331
|
-
let treeArray: KatexParseNode[] = [];
|
|
332
|
-
|
|
333
|
-
for (const tex_item of tex_list) {
|
|
334
|
-
if (tex_item.startsWith('%')) {
|
|
335
|
-
const tex_node: KatexParseNode = {
|
|
336
|
-
type: 'comment',
|
|
337
|
-
mode: 'math',
|
|
338
|
-
text: tex_item.substring(1),
|
|
339
|
-
};
|
|
340
|
-
treeArray.push(tex_node);
|
|
341
|
-
continue;
|
|
692
|
+
const parser = new LatexParser();
|
|
693
|
+
const original_tokens = tokenize(tex);
|
|
694
|
+
let processed_tokens: Token[] = [];
|
|
695
|
+
for (const token of original_tokens) {
|
|
696
|
+
if (token.type === 'command' && customTexMacros[token.value]) {
|
|
697
|
+
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
698
|
+
processed_tokens = processed_tokens.concat(expanded_tokens);
|
|
699
|
+
} else {
|
|
700
|
+
processed_tokens.push(token);
|
|
342
701
|
}
|
|
343
|
-
const trees = generateParseTree(tex_item, options);
|
|
344
|
-
treeArray = treeArray.concat(trees);
|
|
345
702
|
}
|
|
346
|
-
|
|
347
|
-
let t = {
|
|
348
|
-
type: 'ordgroup',
|
|
349
|
-
mode: 'math',
|
|
350
|
-
body: treeArray as KatexParseNode[],
|
|
351
|
-
loc: {}
|
|
352
|
-
} as KatexParseNode;
|
|
353
|
-
return katexNodeToTexNode(t);
|
|
703
|
+
return parser.parse(processed_tokens);
|
|
354
704
|
}
|