tex2typst 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +401 -424
- package/dist/parser.d.ts +16 -13
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +1 -13
- package/package.json +2 -4
- package/src/parser.ts +413 -480
- package/src/types.ts +1 -15
- package/src/writer.ts +57 -50
package/src/parser.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { TexNode,
|
|
1
|
+
import { TexNode, TexSupsubData } from "./types";
|
|
2
|
+
|
|
2
3
|
|
|
3
4
|
const UNARY_COMMANDS = [
|
|
4
5
|
'sqrt',
|
|
@@ -60,7 +61,6 @@ function assert(condition: boolean, message: string = ''): void {
|
|
|
60
61
|
}
|
|
61
62
|
}
|
|
62
63
|
|
|
63
|
-
|
|
64
64
|
function get_command_param_num(command: string): number {
|
|
65
65
|
if (UNARY_COMMANDS.includes(command)) {
|
|
66
66
|
return 1;
|
|
@@ -71,22 +71,21 @@ function get_command_param_num(command: string): number {
|
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
const LEFT_CURLY_BRACKET: Token = {type: 'control', value: '{'};
|
|
75
|
+
const RIGHT_CURLY_BRACKET: Token = {type: 'control', value: '}'};
|
|
76
|
+
|
|
77
|
+
function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
78
|
+
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
76
79
|
let count = 1;
|
|
77
80
|
let pos = start + 1;
|
|
78
81
|
|
|
79
82
|
while (count > 0) {
|
|
80
|
-
if (pos >=
|
|
83
|
+
if (pos >= tokens.length) {
|
|
81
84
|
throw new LatexParserError('Unmatched curly brackets');
|
|
82
85
|
}
|
|
83
|
-
if
|
|
84
|
-
pos += 2;
|
|
85
|
-
continue;
|
|
86
|
-
}
|
|
87
|
-
if (latex[pos] === '{') {
|
|
86
|
+
if (token_eq(tokens[pos], LEFT_CURLY_BRACKET)) {
|
|
88
87
|
count += 1;
|
|
89
|
-
} else if (
|
|
88
|
+
} else if (token_eq(tokens[pos], RIGHT_CURLY_BRACKET)) {
|
|
90
89
|
count -= 1;
|
|
91
90
|
}
|
|
92
91
|
pos += 1;
|
|
@@ -95,18 +94,21 @@ function find_closing_curly_bracket(latex: string, start: number): number {
|
|
|
95
94
|
return pos - 1;
|
|
96
95
|
}
|
|
97
96
|
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
const LEFT_SQUARE_BRACKET: Token = {type: 'element', value: '['};
|
|
98
|
+
const RIGHT_SQUARE_BRACKET: Token = {type: 'element', value: ']'};
|
|
99
|
+
|
|
100
|
+
function find_closing_square_bracket(tokens: Token[], start: number): number {
|
|
101
|
+
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
100
102
|
let count = 1;
|
|
101
103
|
let pos = start + 1;
|
|
102
104
|
|
|
103
105
|
while (count > 0) {
|
|
104
|
-
if (pos >=
|
|
106
|
+
if (pos >= tokens.length) {
|
|
105
107
|
throw new LatexParserError('Unmatched square brackets');
|
|
106
108
|
}
|
|
107
|
-
if (
|
|
109
|
+
if (token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
108
110
|
count += 1;
|
|
109
|
-
} else if (
|
|
111
|
+
} else if (token_eq(tokens[pos], RIGHT_SQUARE_BRACKET)) {
|
|
110
112
|
count -= 1;
|
|
111
113
|
}
|
|
112
114
|
pos += 1;
|
|
@@ -124,135 +126,235 @@ function isdigit(char: string): boolean {
|
|
|
124
126
|
return '0123456789'.includes(char);
|
|
125
127
|
}
|
|
126
128
|
|
|
129
|
+
function eat_whitespaces(tokens: Token[], start: number): Token[] {
|
|
130
|
+
let pos = start;
|
|
131
|
+
while (pos < tokens.length && ['whitespace', 'newline'].includes(tokens[pos].type)) {
|
|
132
|
+
pos++;
|
|
133
|
+
}
|
|
134
|
+
return tokens.slice(start, pos);
|
|
135
|
+
}
|
|
127
136
|
|
|
128
137
|
|
|
129
|
-
function
|
|
130
|
-
const
|
|
138
|
+
function eat_parenthesis(tokens: Token[], start: number): Token | null {
|
|
139
|
+
const firstToken = tokens[start];
|
|
140
|
+
if (firstToken.type === 'element' && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
|
|
141
|
+
return firstToken;
|
|
142
|
+
} else if (firstToken.type === 'command' && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
143
|
+
return firstToken;
|
|
144
|
+
} else {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function eat_primes(tokens: Token[], start: number): number {
|
|
131
150
|
let pos = start;
|
|
151
|
+
while (pos < tokens.length && token_eq(tokens[pos], { type: 'element', value: "'" })) {
|
|
152
|
+
pos += 1;
|
|
153
|
+
}
|
|
154
|
+
return pos - start;
|
|
155
|
+
}
|
|
132
156
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
157
|
+
|
|
158
|
+
function eat_command_name(latex: string, start: number): string {
|
|
159
|
+
let pos = start;
|
|
160
|
+
while (pos < latex.length && isalpha(latex[pos])) {
|
|
161
|
+
pos += 1;
|
|
162
|
+
}
|
|
163
|
+
return latex.substring(start, pos);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
const LEFT_COMMAND: Token = { type: 'command', value: '\\left' };
|
|
170
|
+
const RIGHT_COMMAND: Token = { type: 'command', value: '\\right' };
|
|
171
|
+
|
|
172
|
+
function find_closing_right_command(tokens: Token[], start: number): number {
|
|
173
|
+
let count = 1;
|
|
174
|
+
let pos = start;
|
|
175
|
+
|
|
176
|
+
while (count > 0) {
|
|
177
|
+
if (pos >= tokens.length) {
|
|
136
178
|
return -1;
|
|
137
179
|
}
|
|
138
|
-
if (
|
|
139
|
-
|
|
140
|
-
} else {
|
|
141
|
-
|
|
180
|
+
if (token_eq(tokens[pos], LEFT_COMMAND)) {
|
|
181
|
+
count += 1;
|
|
182
|
+
} else if (token_eq(tokens[pos], RIGHT_COMMAND)) {
|
|
183
|
+
count -= 1;
|
|
142
184
|
}
|
|
185
|
+
pos += 1;
|
|
143
186
|
}
|
|
144
187
|
|
|
145
|
-
return -1;
|
|
188
|
+
return pos - 1;
|
|
146
189
|
}
|
|
147
190
|
|
|
148
|
-
|
|
191
|
+
|
|
192
|
+
const BEGIN_COMMAND: Token = { type: 'command', value: '\\begin' };
|
|
193
|
+
const END_COMMAND: Token = { type: 'command', value: '\\end' };
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
function find_closing_end_command(tokens: Token[], start: number): number {
|
|
149
197
|
let count = 1;
|
|
150
198
|
let pos = start;
|
|
151
199
|
|
|
152
200
|
while (count > 0) {
|
|
153
|
-
if (pos >=
|
|
154
|
-
return -1;
|
|
155
|
-
}
|
|
156
|
-
const left_idx = find_command(latex, pos, 'left');
|
|
157
|
-
const right_idx = find_command(latex, pos, 'right');
|
|
158
|
-
|
|
159
|
-
if (right_idx === -1) {
|
|
201
|
+
if (pos >= tokens.length) {
|
|
160
202
|
return -1;
|
|
161
203
|
}
|
|
162
|
-
|
|
163
|
-
if (left_idx === -1 || left_idx > right_idx) {
|
|
164
|
-
// a \right is ahead
|
|
165
|
-
count -= 1;
|
|
166
|
-
pos = right_idx + '\\right'.length;
|
|
167
|
-
} else {
|
|
168
|
-
// a \left is ahead
|
|
204
|
+
if (token_eq(tokens[pos], BEGIN_COMMAND)) {
|
|
169
205
|
count += 1;
|
|
170
|
-
|
|
206
|
+
} else if (token_eq(tokens[pos], END_COMMAND)) {
|
|
207
|
+
count -= 1;
|
|
171
208
|
}
|
|
209
|
+
pos += 1;
|
|
172
210
|
}
|
|
173
211
|
|
|
174
|
-
return pos -
|
|
212
|
+
return pos - 1;
|
|
175
213
|
}
|
|
176
214
|
|
|
177
|
-
function
|
|
215
|
+
function find_closing_curly_bracket_char(latex: string, start: number): number {
|
|
216
|
+
assert(latex[start] === '{');
|
|
178
217
|
let count = 1;
|
|
179
|
-
let pos = start;
|
|
218
|
+
let pos = start + 1;
|
|
180
219
|
|
|
181
220
|
while (count > 0) {
|
|
182
221
|
if (pos >= latex.length) {
|
|
183
|
-
|
|
222
|
+
throw new LatexParserError('Unmatched curly brackets');
|
|
184
223
|
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
if (end_idx === -1) {
|
|
189
|
-
return -1;
|
|
224
|
+
if(pos + 1 < latex.length && (['\\{', '\\}'].includes(latex.substring(pos, pos + 2)))) {
|
|
225
|
+
pos += 2;
|
|
226
|
+
continue;
|
|
190
227
|
}
|
|
191
|
-
|
|
192
|
-
if (begin_idx === -1 || begin_idx > end_idx) {
|
|
193
|
-
// an \end is ahead
|
|
194
|
-
count -= 1;
|
|
195
|
-
pos = end_idx + '\\end'.length;
|
|
196
|
-
} else {
|
|
197
|
-
// a \begin is ahead
|
|
228
|
+
if (latex[pos] === '{') {
|
|
198
229
|
count += 1;
|
|
199
|
-
|
|
230
|
+
} else if (latex[pos] === '}') {
|
|
231
|
+
count -= 1;
|
|
200
232
|
}
|
|
233
|
+
pos += 1;
|
|
201
234
|
}
|
|
202
235
|
|
|
203
|
-
return pos -
|
|
236
|
+
return pos - 1;
|
|
204
237
|
}
|
|
205
238
|
|
|
206
|
-
function eat_whitespaces(latex: string, start: number): string {
|
|
207
|
-
let pos = start;
|
|
208
|
-
while (pos < latex.length && [' ', '\t', '\n'].includes(latex[pos])) {
|
|
209
|
-
pos += 1;
|
|
210
|
-
}
|
|
211
|
-
return latex.substring(start, pos);
|
|
212
|
-
}
|
|
213
239
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
pos += 1;
|
|
218
|
-
}
|
|
219
|
-
return latex.substring(start, pos);
|
|
240
|
+
interface Token {
|
|
241
|
+
type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown';
|
|
242
|
+
value: string;
|
|
220
243
|
}
|
|
221
244
|
|
|
222
|
-
function
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
pos += 1;
|
|
226
|
-
}
|
|
227
|
-
return latex.substring(start, pos);
|
|
228
|
-
}
|
|
245
|
+
function tokenize(latex: string): Token[] {
|
|
246
|
+
const tokens: Token[] = [];
|
|
247
|
+
let pos = 0;
|
|
229
248
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
249
|
+
while (pos < latex.length) {
|
|
250
|
+
const firstChar = latex[pos];
|
|
251
|
+
let token: Token;
|
|
252
|
+
switch (firstChar) {
|
|
253
|
+
case '%': {
|
|
254
|
+
let newPos = pos + 1;
|
|
255
|
+
while (newPos < latex.length && latex[newPos] !== '\n') {
|
|
256
|
+
newPos += 1;
|
|
257
|
+
}
|
|
258
|
+
token = { type: 'comment', value: latex.slice(pos + 1, newPos) };
|
|
259
|
+
pos = newPos;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
case '{':
|
|
263
|
+
case '}':
|
|
264
|
+
case '_':
|
|
265
|
+
case '^':
|
|
266
|
+
case '&':
|
|
267
|
+
token = { type: 'control', value: firstChar};
|
|
268
|
+
pos++;
|
|
269
|
+
break;
|
|
270
|
+
case '\n':
|
|
271
|
+
token = { type: 'newline', value: firstChar};
|
|
272
|
+
pos++;
|
|
273
|
+
break;
|
|
274
|
+
case '\r': {
|
|
275
|
+
if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
|
|
276
|
+
token = { type: 'newline', value: '\n' };
|
|
277
|
+
pos += 2;
|
|
278
|
+
} else {
|
|
279
|
+
token = { type: 'newline', value: '\n' };
|
|
280
|
+
pos ++;
|
|
281
|
+
}
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
284
|
+
case ' ': {
|
|
285
|
+
let newPos = pos;
|
|
286
|
+
while (newPos < latex.length && latex[newPos] === ' ') {
|
|
287
|
+
newPos += 1;
|
|
288
|
+
}
|
|
289
|
+
token = {type: 'whitespace', value: latex.slice(pos, newPos)};
|
|
290
|
+
pos = newPos;
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
case '\\': {
|
|
294
|
+
if (pos + 1 >= latex.length) {
|
|
295
|
+
throw new LatexParserError('Expecting command name after \\');
|
|
296
|
+
}
|
|
297
|
+
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
298
|
+
if (firstTwoChars === '\\\\') {
|
|
299
|
+
token = { type: 'control', value: '\\\\' };
|
|
300
|
+
pos += 2;
|
|
301
|
+
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
|
|
302
|
+
token = { type: 'element', value: firstTwoChars };
|
|
303
|
+
pos += 2;
|
|
304
|
+
} else {
|
|
305
|
+
const command = eat_command_name(latex, pos + 1);
|
|
306
|
+
token = { type: 'command', value: '\\' + command};
|
|
307
|
+
pos += 1 + command.length;
|
|
308
|
+
}
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
default: {
|
|
312
|
+
if (isdigit(firstChar)) {
|
|
313
|
+
let newPos = pos;
|
|
314
|
+
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
315
|
+
newPos += 1;
|
|
316
|
+
}
|
|
317
|
+
token = { type: 'element', value: latex.slice(pos, newPos) }
|
|
318
|
+
} else if (isalpha(firstChar)) {
|
|
319
|
+
token = { type: 'element', value: firstChar };
|
|
320
|
+
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
321
|
+
token = { type: 'element', value: firstChar }
|
|
322
|
+
} else {
|
|
323
|
+
token = { type: 'unknown', value: firstChar };
|
|
324
|
+
}
|
|
325
|
+
pos += token.value.length;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
tokens.push(token);
|
|
330
|
+
|
|
331
|
+
if (token.type === 'command' && ['\\text', '\\begin', '\\end'].includes(token.value)) {
|
|
332
|
+
if (pos >= latex.length || latex[pos] !== '{') {
|
|
333
|
+
throw new LatexParserError(`No content for ${token.value} command`);
|
|
334
|
+
}
|
|
335
|
+
tokens.push({ type: 'control', value: '{' });
|
|
336
|
+
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
337
|
+
pos++;
|
|
338
|
+
let textInside = latex.slice(pos, posClosingBracket);
|
|
339
|
+
// replace all escape characters with their actual characters
|
|
340
|
+
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
341
|
+
for (const char of chars) {
|
|
342
|
+
textInside = textInside.replaceAll('\\' + char, char);
|
|
343
|
+
}
|
|
344
|
+
tokens.push({ type: 'text', value: textInside });
|
|
345
|
+
tokens.push({ type: 'control', value: '}' });
|
|
346
|
+
pos = posClosingBracket + 1;
|
|
347
|
+
}
|
|
243
348
|
}
|
|
349
|
+
return tokens;
|
|
244
350
|
}
|
|
245
351
|
|
|
246
|
-
function
|
|
247
|
-
|
|
248
|
-
while (pos < latex.length && latex[pos] === "'") {
|
|
249
|
-
pos += 1;
|
|
250
|
-
}
|
|
251
|
-
return pos - start;
|
|
352
|
+
function token_eq(token1: Token, token2: Token) {
|
|
353
|
+
return token1.type == token2.type && token1.value == token2.value;
|
|
252
354
|
}
|
|
253
355
|
|
|
254
356
|
|
|
255
|
-
class LatexParserError extends Error {
|
|
357
|
+
export class LatexParserError extends Error {
|
|
256
358
|
constructor(message: string) {
|
|
257
359
|
super(message);
|
|
258
360
|
this.name = 'LatexParserError';
|
|
@@ -260,7 +362,10 @@ class LatexParserError extends Error {
|
|
|
260
362
|
}
|
|
261
363
|
|
|
262
364
|
|
|
263
|
-
type ParseResult = [
|
|
365
|
+
type ParseResult = [TexNode, number];
|
|
366
|
+
|
|
367
|
+
const SUB_SYMBOL:Token = { type: 'control', value: '_' };
|
|
368
|
+
const SUP_SYMBOL:Token = { type: 'control', value: '^' };
|
|
264
369
|
|
|
265
370
|
export class LatexParser {
|
|
266
371
|
space_sensitive: boolean;
|
|
@@ -271,74 +376,87 @@ export class LatexParser {
|
|
|
271
376
|
this.newline_sensitive = newline_sensitive;
|
|
272
377
|
}
|
|
273
378
|
|
|
274
|
-
parse(
|
|
275
|
-
const results:
|
|
379
|
+
parse(tokens: Token[]): TexNode {
|
|
380
|
+
const results: TexNode[] = [];
|
|
276
381
|
let pos = 0;
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
382
|
+
while (pos < tokens.length) {
|
|
383
|
+
const results: TexNode[] = [];
|
|
384
|
+
let pos = 0;
|
|
385
|
+
|
|
386
|
+
while (pos < tokens.length) {
|
|
387
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
388
|
+
pos = newPos;
|
|
389
|
+
if (!this.space_sensitive && res.type === 'whitespace') {
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
if (!this.newline_sensitive && res.type === 'newline') {
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
395
|
+
if (res.type === 'control' && res.content === '&') {
|
|
396
|
+
throw new LatexParserError('Unexpected & outside of an alignment');
|
|
397
|
+
}
|
|
398
|
+
results.push(res);
|
|
286
399
|
}
|
|
287
|
-
|
|
288
|
-
|
|
400
|
+
|
|
401
|
+
if (results.length === 0) {
|
|
402
|
+
return EMPTY_NODE;
|
|
403
|
+
} else if (results.length === 1) {
|
|
404
|
+
return results[0];
|
|
405
|
+
} else {
|
|
406
|
+
return { type: 'ordgroup', content: '', args: results };
|
|
289
407
|
}
|
|
290
|
-
results.push(res);
|
|
291
408
|
}
|
|
292
409
|
|
|
410
|
+
|
|
293
411
|
if (results.length === 0) {
|
|
294
412
|
return EMPTY_NODE;
|
|
295
413
|
} else if (results.length === 1) {
|
|
296
414
|
return results[0];
|
|
297
415
|
} else {
|
|
298
|
-
return { type: 'ordgroup', args: results };
|
|
416
|
+
return { type: 'ordgroup', content: '', args: results };
|
|
299
417
|
}
|
|
300
418
|
}
|
|
301
419
|
|
|
302
|
-
parseNextExpr(
|
|
303
|
-
let [base, pos] = this.parseNextExprWithoutSupSub(
|
|
304
|
-
let sub:
|
|
305
|
-
let sup:
|
|
420
|
+
parseNextExpr(tokens: Token[], start: number): ParseResult {
|
|
421
|
+
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
422
|
+
let sub: TexNode | null = null;
|
|
423
|
+
let sup: TexNode | null = null;
|
|
306
424
|
let num_prime = 0;
|
|
307
425
|
|
|
308
|
-
num_prime += eat_primes(
|
|
426
|
+
num_prime += eat_primes(tokens, pos);
|
|
309
427
|
pos += num_prime;
|
|
310
|
-
if (pos <
|
|
311
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(
|
|
312
|
-
num_prime += eat_primes(
|
|
428
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
429
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
430
|
+
num_prime += eat_primes(tokens, pos);
|
|
313
431
|
pos += num_prime;
|
|
314
|
-
if (pos <
|
|
315
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(
|
|
316
|
-
if (eat_primes(
|
|
432
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
433
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
434
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
317
435
|
throw new LatexParserError('Double superscript');
|
|
318
436
|
}
|
|
319
437
|
}
|
|
320
|
-
} else if (pos <
|
|
321
|
-
[sup, pos] = this.parseNextExprWithoutSupSub(
|
|
322
|
-
if (eat_primes(
|
|
438
|
+
} else if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
439
|
+
[sup, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
440
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
323
441
|
throw new LatexParserError('Double superscript');
|
|
324
442
|
}
|
|
325
|
-
if (pos <
|
|
326
|
-
[sub, pos] = this.parseNextExprWithoutSupSub(
|
|
327
|
-
if (eat_primes(
|
|
443
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
444
|
+
[sub, pos] = this.parseNextExprWithoutSupSub(tokens, pos + 1);
|
|
445
|
+
if (eat_primes(tokens, pos) > 0) {
|
|
328
446
|
throw new LatexParserError('Double superscript');
|
|
329
447
|
}
|
|
330
448
|
}
|
|
331
449
|
}
|
|
332
450
|
|
|
333
451
|
if (sub !== null || sup !== null || num_prime > 0) {
|
|
334
|
-
const res = {
|
|
452
|
+
const res: TexSupsubData = { base };
|
|
335
453
|
if (sub) {
|
|
336
454
|
res.sub = sub;
|
|
337
455
|
}
|
|
338
456
|
if (num_prime > 0) {
|
|
339
|
-
res.sup = { type: 'ordgroup', args: [] };
|
|
457
|
+
res.sup = { type: 'ordgroup', content: '', args: [] };
|
|
340
458
|
for (let i = 0; i < num_prime; i++) {
|
|
341
|
-
res.sup.args!.push({ type: '
|
|
459
|
+
res.sup.args!.push({ type: 'symbol', content: '\\prime' });
|
|
342
460
|
}
|
|
343
461
|
if (sup) {
|
|
344
462
|
res.sup.args!.push(sup);
|
|
@@ -349,201 +467,206 @@ export class LatexParser {
|
|
|
349
467
|
} else if (sup) {
|
|
350
468
|
res.sup = sup;
|
|
351
469
|
}
|
|
352
|
-
return [res, pos];
|
|
470
|
+
return [{type: 'supsub', content: '', data: res }, pos];
|
|
353
471
|
} else {
|
|
354
472
|
return [base, pos];
|
|
355
473
|
}
|
|
356
474
|
}
|
|
357
475
|
|
|
358
|
-
parseNextExprWithoutSupSub(
|
|
359
|
-
const
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
if (pos < latex.length && latex[pos] === '_') {
|
|
413
|
-
[sub, pos] = this.parseNextExpr(latex, pos + 1);
|
|
414
|
-
}
|
|
415
|
-
return [{ type: 'supsub', base: EMPTY_NODE, sub, sup }, pos];
|
|
416
|
-
} else if (firstChar === ' ') {
|
|
417
|
-
let pos = start;
|
|
418
|
-
while (pos < latex.length && latex[pos] === ' ') {
|
|
419
|
-
pos += 1;
|
|
420
|
-
}
|
|
421
|
-
return [{ type: 'whitespace', content: latex.slice(start, pos) }, pos];
|
|
422
|
-
} else if (firstChar === '\n') {
|
|
423
|
-
return [{ type: 'newline', content: '\n' }, start + 1];
|
|
424
|
-
} else if (firstChar === '\r') {
|
|
425
|
-
if (start + 1 < latex.length && latex[start + 1] === '\n') {
|
|
426
|
-
return [{ type: 'newline', content: '\n' }, start + 2];
|
|
427
|
-
} else {
|
|
428
|
-
return [{ type: 'newline', content: '\n' }, start + 1];
|
|
429
|
-
}
|
|
430
|
-
} else if (firstChar === '&') {
|
|
431
|
-
return [{ type: 'control', content: '&' }, start + 1];
|
|
432
|
-
} else {
|
|
433
|
-
return [{ type: 'unknown', content: firstChar }, start + 1];
|
|
476
|
+
parseNextExprWithoutSupSub(tokens: Token[], start: number): ParseResult {
|
|
477
|
+
const firstToken = tokens[start];
|
|
478
|
+
const tokenType = firstToken.type;
|
|
479
|
+
switch (tokenType) {
|
|
480
|
+
case 'element':
|
|
481
|
+
case 'text':
|
|
482
|
+
case 'comment':
|
|
483
|
+
case 'whitespace':
|
|
484
|
+
case 'newline':
|
|
485
|
+
return [{ type: tokenType, content: firstToken.value }, start + 1];
|
|
486
|
+
case 'command':
|
|
487
|
+
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
488
|
+
return this.parseBeginEndExpr(tokens, start);
|
|
489
|
+
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
490
|
+
return this.parseLeftRightExpr(tokens, start);
|
|
491
|
+
} else {
|
|
492
|
+
return this.parseCommandExpr(tokens, start);
|
|
493
|
+
}
|
|
494
|
+
case 'control':
|
|
495
|
+
const controlChar = firstToken.value;
|
|
496
|
+
switch (controlChar) {
|
|
497
|
+
case '{':
|
|
498
|
+
const posClosingBracket = find_closing_curly_bracket(tokens, start);
|
|
499
|
+
const exprInside = tokens.slice(start + 1, posClosingBracket);
|
|
500
|
+
return [this.parse(exprInside), posClosingBracket + 1];
|
|
501
|
+
case '}':
|
|
502
|
+
throw new LatexParserError("Unmatched '}'");
|
|
503
|
+
case '\\\\':
|
|
504
|
+
return [{ type: 'control', content: '\\\\' }, start + 1];
|
|
505
|
+
case '_': {
|
|
506
|
+
let [sub, pos] = this.parseNextExpr(tokens, start + 1);
|
|
507
|
+
let sup: TexNode | undefined = undefined;
|
|
508
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
509
|
+
[sup, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
510
|
+
}
|
|
511
|
+
const subData = { base: EMPTY_NODE, sub, sup };
|
|
512
|
+
return [{ type: 'supsub', content: '', data: subData }, pos];
|
|
513
|
+
}
|
|
514
|
+
case '^': {
|
|
515
|
+
let [sup, pos] = this.parseNextExpr(tokens, start + 1);
|
|
516
|
+
let sub: TexNode | undefined = undefined;
|
|
517
|
+
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
518
|
+
[sub, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
519
|
+
}
|
|
520
|
+
const supData = { base: EMPTY_NODE, sub, sup };
|
|
521
|
+
return [{ type: 'supsub', content: '', data: supData }, pos];
|
|
522
|
+
}
|
|
523
|
+
case '&':
|
|
524
|
+
return [{ type: 'control', content: '&' }, start + 1];
|
|
525
|
+
default:
|
|
526
|
+
throw new LatexParserError('Unknown control sequence');
|
|
527
|
+
}
|
|
528
|
+
default:
|
|
529
|
+
throw new LatexParserError('Unknown token type');
|
|
434
530
|
}
|
|
435
531
|
}
|
|
436
532
|
|
|
437
|
-
parseCommandExpr(
|
|
438
|
-
assert(
|
|
533
|
+
parseCommandExpr(tokens: Token[], start: number): ParseResult {
|
|
534
|
+
assert(tokens[start].type === 'command');
|
|
535
|
+
|
|
536
|
+
const command = tokens[start].value; // command name starts with a \
|
|
537
|
+
|
|
439
538
|
let pos = start + 1;
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
539
|
+
|
|
540
|
+
if (['left', 'right', 'begin', 'end'].includes(command.slice(1))) {
|
|
541
|
+
throw new LatexParserError('Unexpected command: ' + command);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
const paramNum = get_command_param_num(command.slice(1));
|
|
443
545
|
if (paramNum === 0) {
|
|
444
|
-
return [{ type: '
|
|
546
|
+
return [{ type: 'symbol', content: command }, pos];
|
|
445
547
|
} else if (paramNum === 1) {
|
|
446
|
-
if (command === 'sqrt' && pos <
|
|
548
|
+
if (command === '\\sqrt' && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
447
549
|
const posLeftSquareBracket = pos;
|
|
448
|
-
const posRightSquareBracket = find_closing_square_bracket(
|
|
449
|
-
const exprInside =
|
|
550
|
+
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
551
|
+
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
450
552
|
const exponent = this.parse(exprInside);
|
|
451
|
-
const [arg1, newPos] = this.parseNextExprWithoutSupSub(
|
|
452
|
-
return [{ type: '
|
|
453
|
-
} else if (command === 'text') {
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
553
|
+
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
554
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
|
|
555
|
+
} else if (command === '\\text') {
|
|
556
|
+
if (pos + 2 >= tokens.length) {
|
|
557
|
+
throw new LatexParserError('Expecting content for \\text command');
|
|
558
|
+
}
|
|
559
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
560
|
+
assert(tokens[pos + 1].type === 'text');
|
|
561
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
562
|
+
const text = tokens[pos + 1].value;
|
|
563
|
+
return [{ type: 'text', content: text }, pos + 3];
|
|
461
564
|
}
|
|
565
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
566
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1] }, newPos];
|
|
462
567
|
} else if (paramNum === 2) {
|
|
463
|
-
const [arg1, pos1] = this.parseNextExprWithoutSupSub(
|
|
464
|
-
const [arg2, pos2] = this.parseNextExprWithoutSupSub(
|
|
465
|
-
return [{ type: '
|
|
568
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
569
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
570
|
+
return [{ type: 'binaryFunc', content: command, args: [arg1, arg2] }, pos2];
|
|
466
571
|
} else {
|
|
467
572
|
throw new Error( 'Invalid number of parameters');
|
|
468
573
|
}
|
|
469
574
|
}
|
|
470
575
|
|
|
471
|
-
parseLeftRightExpr(
|
|
472
|
-
assert(
|
|
473
|
-
|
|
474
|
-
pos
|
|
475
|
-
|
|
576
|
+
parseLeftRightExpr(tokens: Token[], start: number): ParseResult {
|
|
577
|
+
assert(token_eq(tokens[start], LEFT_COMMAND));
|
|
578
|
+
|
|
579
|
+
let pos = start + 1;
|
|
580
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
581
|
+
|
|
582
|
+
if (pos >= tokens.length) {
|
|
476
583
|
throw new LatexParserError('Expecting delimiter after \\left');
|
|
477
584
|
}
|
|
478
|
-
|
|
585
|
+
|
|
586
|
+
const leftDelimiter = eat_parenthesis(tokens, pos);
|
|
479
587
|
if (leftDelimiter === null) {
|
|
480
588
|
throw new LatexParserError('Invalid delimiter after \\left');
|
|
481
589
|
}
|
|
482
|
-
pos
|
|
590
|
+
pos++;
|
|
483
591
|
const exprInsideStart = pos;
|
|
484
|
-
const idx = find_closing_right_command(
|
|
592
|
+
const idx = find_closing_right_command(tokens, pos);
|
|
485
593
|
if (idx === -1) {
|
|
486
594
|
throw new LatexParserError('No matching \\right');
|
|
487
595
|
}
|
|
488
596
|
const exprInsideEnd = idx;
|
|
489
|
-
pos = idx +
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
597
|
+
pos = idx + 1;
|
|
598
|
+
|
|
599
|
+
pos += eat_whitespaces(tokens, pos).length;
|
|
600
|
+
if (pos >= tokens.length) {
|
|
601
|
+
throw new LatexParserError('Expecting \\right after \\left');
|
|
493
602
|
}
|
|
494
|
-
|
|
603
|
+
|
|
604
|
+
const rightDelimiter = eat_parenthesis(tokens, pos);
|
|
495
605
|
if (rightDelimiter === null) {
|
|
496
606
|
throw new LatexParserError('Invalid delimiter after \\right');
|
|
497
607
|
}
|
|
498
|
-
pos
|
|
499
|
-
|
|
608
|
+
pos++;
|
|
609
|
+
|
|
610
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
500
611
|
const body = this.parse(exprInside);
|
|
501
|
-
const
|
|
612
|
+
const args = [
|
|
613
|
+
{ type: 'element', content: leftDelimiter.value },
|
|
614
|
+
body,
|
|
615
|
+
{ type: 'element', content: rightDelimiter.value }
|
|
616
|
+
]
|
|
617
|
+
const res = { type: 'leftright', content: '', args: args };
|
|
502
618
|
return [res, pos];
|
|
503
619
|
}
|
|
504
620
|
|
|
621
|
+
parseBeginEndExpr(tokens: Token[], start: number): ParseResult {
|
|
622
|
+
assert(token_eq(tokens[start], BEGIN_COMMAND));
|
|
505
623
|
|
|
506
|
-
|
|
507
|
-
assert(
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
pos += eat_whitespaces(latex, pos).length; // ignore whitespaces and '\n' after \begin{envName}
|
|
624
|
+
let pos = start + 1;
|
|
625
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
626
|
+
assert(tokens[pos + 1].type === 'text');
|
|
627
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
628
|
+
const envName = tokens[pos + 1].value;
|
|
629
|
+
pos += 3;
|
|
630
|
+
|
|
631
|
+
pos += eat_whitespaces(tokens, pos).length; // ignore whitespaces and '\n' after \begin{envName}
|
|
632
|
+
|
|
516
633
|
const exprInsideStart = pos;
|
|
517
|
-
|
|
634
|
+
|
|
635
|
+
const endIdx = find_closing_end_command(tokens, pos);
|
|
518
636
|
if (endIdx === -1) {
|
|
519
637
|
throw new LatexParserError('No matching \\end');
|
|
520
638
|
}
|
|
521
639
|
const exprInsideEnd = endIdx;
|
|
522
|
-
pos = endIdx +
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
if (
|
|
640
|
+
pos = endIdx + 1;
|
|
641
|
+
|
|
642
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
643
|
+
assert(tokens[pos + 1].type === 'text');
|
|
644
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
645
|
+
if (tokens[pos + 1].value !== envName) {
|
|
528
646
|
throw new LatexParserError('Mismatched \\begin and \\end environments');
|
|
529
647
|
}
|
|
530
|
-
|
|
531
|
-
|
|
648
|
+
pos += 3;
|
|
649
|
+
|
|
650
|
+
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
651
|
+
// ignore whitespaces and '\n' before \end{envName}
|
|
652
|
+
while(exprInside.length > 0 && ['whitespace', 'newline'].includes(exprInside[exprInside.length - 1].type)) {
|
|
653
|
+
exprInside.pop();
|
|
654
|
+
}
|
|
532
655
|
const body = this.parseAligned(exprInside);
|
|
533
|
-
const res = { type: 'beginend', content: envName, body };
|
|
534
|
-
return [res,
|
|
656
|
+
const res = { type: 'beginend', content: envName, data: body };
|
|
657
|
+
return [res, pos];
|
|
535
658
|
}
|
|
536
659
|
|
|
537
|
-
parseAligned(
|
|
660
|
+
parseAligned(tokens: Token[]): TexNode[][] {
|
|
538
661
|
let pos = 0;
|
|
539
|
-
const allRows:
|
|
540
|
-
let row:
|
|
662
|
+
const allRows: TexNode[][] = [];
|
|
663
|
+
let row: TexNode[] = [];
|
|
541
664
|
allRows.push(row);
|
|
542
|
-
let group:
|
|
665
|
+
let group: TexNode = { type: 'ordgroup', content: '', args: [] };
|
|
543
666
|
row.push(group);
|
|
544
667
|
|
|
545
|
-
while (pos <
|
|
546
|
-
const [res, newPos] = this.parseNextExpr(
|
|
668
|
+
while (pos < tokens.length) {
|
|
669
|
+
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
547
670
|
pos = newPos;
|
|
548
671
|
if (res.type === 'whitespace') {
|
|
549
672
|
continue;
|
|
@@ -551,221 +674,31 @@ export class LatexParser {
|
|
|
551
674
|
continue;
|
|
552
675
|
} else if (res.type === 'control' && res.content === '\\\\') {
|
|
553
676
|
row = [];
|
|
554
|
-
group = { type: 'ordgroup', args: [] };
|
|
677
|
+
group = { type: 'ordgroup', content: '', args: [] };
|
|
555
678
|
row.push(group);
|
|
556
679
|
allRows.push(row);
|
|
557
680
|
} else if (res.type === 'control' && res.content === '&') {
|
|
558
|
-
group = { type: 'ordgroup', args: [] };
|
|
681
|
+
group = { type: 'ordgroup', content: '', args: [] };
|
|
559
682
|
row.push(group);
|
|
560
683
|
} else {
|
|
561
684
|
group.args!.push(res);
|
|
562
685
|
}
|
|
563
686
|
}
|
|
564
|
-
|
|
565
687
|
return allRows;
|
|
566
688
|
}
|
|
567
689
|
}
|
|
568
690
|
|
|
569
|
-
// Split tex into a list of tex strings and comments.
|
|
570
|
-
// Each item in the returned list is either a tex snippet or a comment.
|
|
571
|
-
// Each comment item is a string starting with '%'.
|
|
572
|
-
function splitTex(tex: string): string[] {
|
|
573
|
-
const lines = tex.split("\n");
|
|
574
|
-
const out_tex_list: string[] = [];
|
|
575
|
-
let current_tex = "";
|
|
576
|
-
// let inside_begin_depth = 0;
|
|
577
|
-
for (let i = 0; i < lines.length; i++) {
|
|
578
|
-
const line = lines[i];
|
|
579
|
-
// if (line.includes('\\begin{')) {
|
|
580
|
-
// inside_begin_depth += line.split('\\begin{').length - 1;
|
|
581
|
-
// }
|
|
582
|
-
|
|
583
|
-
let index = -1;
|
|
584
|
-
while (index + 1 < line.length) {
|
|
585
|
-
index = line.indexOf('%', index + 1);
|
|
586
|
-
if (index === -1) {
|
|
587
|
-
// No comment in this line
|
|
588
|
-
break;
|
|
589
|
-
}
|
|
590
|
-
if (index === 0 || line[index - 1] !== '\\') {
|
|
591
|
-
// Found a comment
|
|
592
|
-
break;
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
if (index !== -1) {
|
|
596
|
-
current_tex += line.substring(0, index);
|
|
597
|
-
const comment = line.substring(index);
|
|
598
|
-
out_tex_list.push(current_tex);
|
|
599
|
-
current_tex = "";
|
|
600
|
-
out_tex_list.push(comment);
|
|
601
|
-
} else {
|
|
602
|
-
current_tex += line;
|
|
603
|
-
}
|
|
604
|
-
if (i < lines.length - 1) {
|
|
605
|
-
const has_begin_command = line.includes('\\begin{');
|
|
606
|
-
const followed_by_end_command = lines[i + 1].includes('\\end{');
|
|
607
|
-
if(!has_begin_command && !followed_by_end_command) {
|
|
608
|
-
current_tex += '\n';
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
// if (line.includes('\\end{')) {
|
|
613
|
-
// inside_begin_depth -= line.split('\\end{').length - 1;
|
|
614
|
-
// }
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
if (current_tex.length > 0) {
|
|
618
|
-
out_tex_list.push(current_tex);
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
return out_tex_list;
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
export class LatexNodeToTexNodeError extends Error {
|
|
625
|
-
node: LatexParseNode;
|
|
626
|
-
|
|
627
|
-
constructor(message: string, node: LatexParseNode) {
|
|
628
|
-
super(message);
|
|
629
|
-
this.name = "LatexNodeToTexNodeError";
|
|
630
|
-
this.node = node;
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
function latexNodeToTexNode(node: LatexParseNode): TexNode {
|
|
635
|
-
try {
|
|
636
|
-
let res = {} as TexNode;
|
|
637
|
-
switch (node.type) {
|
|
638
|
-
case 'ordgroup':
|
|
639
|
-
res.type = 'ordgroup';
|
|
640
|
-
res.args = (node.args as LatexParseNode[]).map((n: LatexParseNode) => latexNodeToTexNode(n));
|
|
641
|
-
if (res.args!.length === 1) {
|
|
642
|
-
res = res.args![0] as TexNode;
|
|
643
|
-
}
|
|
644
|
-
break;
|
|
645
|
-
case 'empty':
|
|
646
|
-
res.type = 'empty';
|
|
647
|
-
res.content = '';
|
|
648
|
-
break;
|
|
649
|
-
case 'atom':
|
|
650
|
-
res.type = 'atom';
|
|
651
|
-
res.content = node.content!;
|
|
652
|
-
break;
|
|
653
|
-
case 'token':
|
|
654
|
-
case 'token-letter-var':
|
|
655
|
-
case 'token-number':
|
|
656
|
-
case 'token-operator':
|
|
657
|
-
case 'token-parenthesis':
|
|
658
|
-
res.type = 'symbol';
|
|
659
|
-
res.content = node.content!;
|
|
660
|
-
break;
|
|
661
|
-
case 'supsub':
|
|
662
|
-
res.type = 'supsub';
|
|
663
|
-
res.irregularData = {} as TexSupsubData;
|
|
664
|
-
if (node['base']) {
|
|
665
|
-
res.irregularData.base = latexNodeToTexNode(node['base']);
|
|
666
|
-
}
|
|
667
|
-
if (node['sup']) {
|
|
668
|
-
res.irregularData.sup = latexNodeToTexNode(node['sup']);
|
|
669
|
-
}
|
|
670
|
-
if (node['sub']) {
|
|
671
|
-
res.irregularData.sub = latexNodeToTexNode(node['sub']);
|
|
672
|
-
}
|
|
673
|
-
break;
|
|
674
|
-
case 'leftright':
|
|
675
|
-
res.type = 'leftright';
|
|
676
|
-
|
|
677
|
-
const body = latexNodeToTexNode(node.body as LatexParseNode);
|
|
678
|
-
|
|
679
|
-
let left: string = node['left']!;
|
|
680
|
-
if (left === "\\{") {
|
|
681
|
-
left = "{";
|
|
682
|
-
}
|
|
683
|
-
let right: string = node['right']!;
|
|
684
|
-
if (right === "\\}") {
|
|
685
|
-
right = "}";
|
|
686
|
-
}
|
|
687
|
-
const is_atom = (str:string) => (['(', ')', '[', ']', '{', '}'].includes(str));
|
|
688
|
-
res.args = [
|
|
689
|
-
{ type: is_atom(left)? 'atom': 'symbol', content: left },
|
|
690
|
-
body,
|
|
691
|
-
{ type: is_atom(right)? 'atom': 'symbol', content: right}
|
|
692
|
-
];
|
|
693
|
-
break;
|
|
694
|
-
case 'beginend':
|
|
695
|
-
if (node.content?.startsWith('align')) {
|
|
696
|
-
// align, align*, alignat, alignat*, aligned, etc.
|
|
697
|
-
res.type = 'align';
|
|
698
|
-
} else {
|
|
699
|
-
res.type = 'matrix';
|
|
700
|
-
}
|
|
701
|
-
res.content = node.content!;
|
|
702
|
-
res.irregularData = (node.body as LatexParseNode[][]).map((row: LatexParseNode[]) => {
|
|
703
|
-
return row.map((n: LatexParseNode) => latexNodeToTexNode(n));
|
|
704
|
-
});
|
|
705
|
-
break;
|
|
706
|
-
case 'command':
|
|
707
|
-
const num_args = get_command_param_num(node.content!);
|
|
708
|
-
res.content = '\\' + node.content!;
|
|
709
|
-
if (num_args === 0) {
|
|
710
|
-
res.type = 'symbol';
|
|
711
|
-
} else if (num_args === 1) {
|
|
712
|
-
res.type = 'unaryFunc';
|
|
713
|
-
res.args = [
|
|
714
|
-
latexNodeToTexNode(node.arg1 as LatexParseNode)
|
|
715
|
-
]
|
|
716
|
-
if (node.content === 'sqrt') {
|
|
717
|
-
if (node.exponent) {
|
|
718
|
-
res.irregularData = latexNodeToTexNode(node.exponent) as TexNode;
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
} else if (num_args === 2) {
|
|
722
|
-
res.type = 'binaryFunc';
|
|
723
|
-
res.args = [
|
|
724
|
-
latexNodeToTexNode(node.arg1 as LatexParseNode),
|
|
725
|
-
latexNodeToTexNode(node.arg2 as LatexParseNode)
|
|
726
|
-
]
|
|
727
|
-
} else {
|
|
728
|
-
throw new LatexNodeToTexNodeError('Invalid number of arguments', node);
|
|
729
|
-
}
|
|
730
|
-
break;
|
|
731
|
-
case 'text':
|
|
732
|
-
res.type = 'text';
|
|
733
|
-
res.content = node.content!;
|
|
734
|
-
break;
|
|
735
|
-
case 'comment':
|
|
736
|
-
res.type = 'comment';
|
|
737
|
-
res.content = node.content!;
|
|
738
|
-
break;
|
|
739
|
-
case 'whitespace':
|
|
740
|
-
res.type = 'empty';
|
|
741
|
-
break;
|
|
742
|
-
case 'newline':
|
|
743
|
-
res.type = 'newline';
|
|
744
|
-
res.content = '\n';
|
|
745
|
-
break;
|
|
746
|
-
case 'control':
|
|
747
|
-
if (node.content === '\\\\') {
|
|
748
|
-
res.type = 'symbol';
|
|
749
|
-
res.content = node.content!;
|
|
750
|
-
break;
|
|
751
|
-
} else {
|
|
752
|
-
throw new LatexNodeToTexNodeError(`Unknown control sequence: ${node.content}`, node);
|
|
753
|
-
}
|
|
754
|
-
break;
|
|
755
|
-
default:
|
|
756
|
-
throw new LatexNodeToTexNodeError(`Unknown node type: ${node.type}`, node);
|
|
757
|
-
}
|
|
758
|
-
return res as TexNode;
|
|
759
|
-
} catch (e) {
|
|
760
|
-
throw e;
|
|
761
|
-
}
|
|
762
|
-
}
|
|
763
|
-
|
|
764
691
|
export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
|
|
765
692
|
const parser = new LatexParser();
|
|
766
|
-
|
|
767
|
-
|
|
693
|
+
const original_tokens = tokenize(tex);
|
|
694
|
+
let processed_tokens: Token[] = [];
|
|
695
|
+
for (const token of original_tokens) {
|
|
696
|
+
if (token.type === 'command' && customTexMacros[token.value]) {
|
|
697
|
+
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
698
|
+
processed_tokens = processed_tokens.concat(expanded_tokens);
|
|
699
|
+
} else {
|
|
700
|
+
processed_tokens.push(token);
|
|
701
|
+
}
|
|
768
702
|
}
|
|
769
|
-
|
|
770
|
-
return latexNodeToTexNode(node);
|
|
703
|
+
return parser.parse(processed_tokens);
|
|
771
704
|
}
|