tex2typst 0.2.12 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +307 -245
- package/dist/tex-parser.d.ts +28 -0
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +47 -6
- package/dist/writer.d.ts +7 -7
- package/package.json +2 -2
- package/src/index.ts +4 -3
- package/src/{parser.ts → tex-parser.ts} +119 -111
- package/src/types.ts +99 -10
- package/src/writer.ts +255 -197
- package/dist/parser.d.ts +0 -28
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { symbolMap } from "./map";
|
|
2
|
-
import { TexNode, TexSupsubData,
|
|
2
|
+
import { TexNode, TexSupsubData, TexTokenType } from "./types";
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
const UNARY_COMMANDS = [
|
|
@@ -41,24 +41,25 @@ const BINARY_COMMANDS = [
|
|
|
41
41
|
'dbinom',
|
|
42
42
|
'dfrac',
|
|
43
43
|
'tbinom',
|
|
44
|
+
'overset',
|
|
44
45
|
]
|
|
45
46
|
|
|
46
47
|
|
|
47
|
-
export class
|
|
48
|
-
type:
|
|
48
|
+
export class TexToken {
|
|
49
|
+
type: TexTokenType;
|
|
49
50
|
value: string;
|
|
50
51
|
|
|
51
|
-
constructor(type:
|
|
52
|
+
constructor(type: TexTokenType, value: string) {
|
|
52
53
|
this.type = type;
|
|
53
54
|
this.value = value;
|
|
54
55
|
}
|
|
55
56
|
|
|
56
|
-
public eq(token:
|
|
57
|
+
public eq(token: TexToken): boolean {
|
|
57
58
|
return this.type === token.type && this.value === token.value;
|
|
58
59
|
}
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
const EMPTY_NODE: TexNode =
|
|
62
|
+
const EMPTY_NODE: TexNode = new TexNode('empty', '');
|
|
62
63
|
|
|
63
64
|
function assert(condition: boolean, message: string = ''): void {
|
|
64
65
|
if (!condition) {
|
|
@@ -76,10 +77,10 @@ function get_command_param_num(command: string): number {
|
|
|
76
77
|
}
|
|
77
78
|
}
|
|
78
79
|
|
|
79
|
-
const LEFT_CURLY_BRACKET:
|
|
80
|
-
const RIGHT_CURLY_BRACKET:
|
|
80
|
+
const LEFT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '{');
|
|
81
|
+
const RIGHT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '}');
|
|
81
82
|
|
|
82
|
-
function find_closing_curly_bracket(tokens:
|
|
83
|
+
function find_closing_curly_bracket(tokens: TexToken[], start: number): number {
|
|
83
84
|
assert(tokens[start].eq(LEFT_CURLY_BRACKET));
|
|
84
85
|
let count = 1;
|
|
85
86
|
let pos = start + 1;
|
|
@@ -99,10 +100,10 @@ function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
|
99
100
|
return pos - 1;
|
|
100
101
|
}
|
|
101
102
|
|
|
102
|
-
const LEFT_SQUARE_BRACKET:
|
|
103
|
-
const RIGHT_SQUARE_BRACKET:
|
|
103
|
+
const LEFT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, '[');
|
|
104
|
+
const RIGHT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, ']');
|
|
104
105
|
|
|
105
|
-
function find_closing_square_bracket(tokens:
|
|
106
|
+
function find_closing_square_bracket(tokens: TexToken[], start: number): number {
|
|
106
107
|
assert(tokens[start].eq(LEFT_SQUARE_BRACKET));
|
|
107
108
|
let count = 1;
|
|
108
109
|
let pos = start + 1;
|
|
@@ -131,29 +132,29 @@ function isdigit(char: string): boolean {
|
|
|
131
132
|
return '0123456789'.includes(char);
|
|
132
133
|
}
|
|
133
134
|
|
|
134
|
-
function eat_whitespaces(tokens:
|
|
135
|
+
function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
|
|
135
136
|
let pos = start;
|
|
136
|
-
while (pos < tokens.length && [
|
|
137
|
+
while (pos < tokens.length && [TexTokenType.SPACE, TexTokenType.NEWLINE].includes(tokens[pos].type)) {
|
|
137
138
|
pos++;
|
|
138
139
|
}
|
|
139
140
|
return tokens.slice(start, pos);
|
|
140
141
|
}
|
|
141
142
|
|
|
142
143
|
|
|
143
|
-
function eat_parenthesis(tokens:
|
|
144
|
+
function eat_parenthesis(tokens: TexToken[], start: number): TexToken | null {
|
|
144
145
|
const firstToken = tokens[start];
|
|
145
|
-
if (firstToken.type ===
|
|
146
|
+
if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
|
|
146
147
|
return firstToken;
|
|
147
|
-
} else if (firstToken.type ===
|
|
148
|
+
} else if (firstToken.type === TexTokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
148
149
|
return firstToken;
|
|
149
150
|
} else {
|
|
150
151
|
return null;
|
|
151
152
|
}
|
|
152
153
|
}
|
|
153
154
|
|
|
154
|
-
function eat_primes(tokens:
|
|
155
|
+
function eat_primes(tokens: TexToken[], start: number): number {
|
|
155
156
|
let pos = start;
|
|
156
|
-
while (pos < tokens.length && tokens[pos].eq(new
|
|
157
|
+
while (pos < tokens.length && tokens[pos].eq(new TexToken(TexTokenType.ELEMENT, "'"))) {
|
|
157
158
|
pos += 1;
|
|
158
159
|
}
|
|
159
160
|
return pos - start;
|
|
@@ -169,10 +170,10 @@ function eat_command_name(latex: string, start: number): string {
|
|
|
169
170
|
}
|
|
170
171
|
|
|
171
172
|
|
|
172
|
-
const LEFT_COMMAND:
|
|
173
|
-
const RIGHT_COMMAND:
|
|
173
|
+
const LEFT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\left');
|
|
174
|
+
const RIGHT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\right');
|
|
174
175
|
|
|
175
|
-
function find_closing_right_command(tokens:
|
|
176
|
+
function find_closing_right_command(tokens: TexToken[], start: number): number {
|
|
176
177
|
let count = 1;
|
|
177
178
|
let pos = start;
|
|
178
179
|
|
|
@@ -192,11 +193,11 @@ function find_closing_right_command(tokens: Token[], start: number): number {
|
|
|
192
193
|
}
|
|
193
194
|
|
|
194
195
|
|
|
195
|
-
const BEGIN_COMMAND:
|
|
196
|
-
const END_COMMAND:
|
|
196
|
+
const BEGIN_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\begin');
|
|
197
|
+
const END_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\end');
|
|
197
198
|
|
|
198
199
|
|
|
199
|
-
function find_closing_end_command(tokens:
|
|
200
|
+
function find_closing_end_command(tokens: TexToken[], start: number): number {
|
|
200
201
|
let count = 1;
|
|
201
202
|
let pos = start;
|
|
202
203
|
|
|
@@ -240,20 +241,20 @@ function find_closing_curly_bracket_char(latex: string, start: number): number {
|
|
|
240
241
|
}
|
|
241
242
|
|
|
242
243
|
|
|
243
|
-
export function tokenize(latex: string):
|
|
244
|
-
const tokens:
|
|
244
|
+
export function tokenize(latex: string): TexToken[] {
|
|
245
|
+
const tokens: TexToken[] = [];
|
|
245
246
|
let pos = 0;
|
|
246
247
|
|
|
247
248
|
while (pos < latex.length) {
|
|
248
249
|
const firstChar = latex[pos];
|
|
249
|
-
let token:
|
|
250
|
+
let token: TexToken;
|
|
250
251
|
switch (firstChar) {
|
|
251
252
|
case '%': {
|
|
252
253
|
let newPos = pos + 1;
|
|
253
254
|
while (newPos < latex.length && latex[newPos] !== '\n') {
|
|
254
255
|
newPos += 1;
|
|
255
256
|
}
|
|
256
|
-
token = new
|
|
257
|
+
token = new TexToken(TexTokenType.COMMENT, latex.slice(pos + 1, newPos));
|
|
257
258
|
pos = newPos;
|
|
258
259
|
break;
|
|
259
260
|
}
|
|
@@ -262,19 +263,19 @@ export function tokenize(latex: string): Token[] {
|
|
|
262
263
|
case '_':
|
|
263
264
|
case '^':
|
|
264
265
|
case '&':
|
|
265
|
-
token = new
|
|
266
|
+
token = new TexToken(TexTokenType.CONTROL, firstChar);
|
|
266
267
|
pos++;
|
|
267
268
|
break;
|
|
268
269
|
case '\n':
|
|
269
|
-
token = new
|
|
270
|
+
token = new TexToken(TexTokenType.NEWLINE, firstChar);
|
|
270
271
|
pos++;
|
|
271
272
|
break;
|
|
272
273
|
case '\r': {
|
|
273
274
|
if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
|
|
274
|
-
token = new
|
|
275
|
+
token = new TexToken(TexTokenType.NEWLINE, '\n');
|
|
275
276
|
pos += 2;
|
|
276
277
|
} else {
|
|
277
|
-
token = new
|
|
278
|
+
token = new TexToken(TexTokenType.NEWLINE, '\n');
|
|
278
279
|
pos ++;
|
|
279
280
|
}
|
|
280
281
|
break;
|
|
@@ -284,7 +285,7 @@ export function tokenize(latex: string): Token[] {
|
|
|
284
285
|
while (newPos < latex.length && latex[newPos] === ' ') {
|
|
285
286
|
newPos += 1;
|
|
286
287
|
}
|
|
287
|
-
token = new
|
|
288
|
+
token = new TexToken(TexTokenType.SPACE, latex.slice(pos, newPos));
|
|
288
289
|
pos = newPos;
|
|
289
290
|
break;
|
|
290
291
|
}
|
|
@@ -294,12 +295,12 @@ export function tokenize(latex: string): Token[] {
|
|
|
294
295
|
}
|
|
295
296
|
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
296
297
|
if (['\\\\', '\\,'].includes(firstTwoChars)) {
|
|
297
|
-
token = new
|
|
298
|
-
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
|
|
299
|
-
token = new
|
|
298
|
+
token = new TexToken(TexTokenType.CONTROL, firstTwoChars);
|
|
299
|
+
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_', '\\|'].includes(firstTwoChars)) {
|
|
300
|
+
token = new TexToken(TexTokenType.ELEMENT, firstTwoChars);
|
|
300
301
|
} else {
|
|
301
302
|
const command = eat_command_name(latex, pos + 1);
|
|
302
|
-
token = new
|
|
303
|
+
token = new TexToken(TexTokenType.COMMAND, '\\' + command);
|
|
303
304
|
}
|
|
304
305
|
pos += token.value.length;
|
|
305
306
|
break;
|
|
@@ -310,13 +311,13 @@ export function tokenize(latex: string): Token[] {
|
|
|
310
311
|
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
311
312
|
newPos += 1;
|
|
312
313
|
}
|
|
313
|
-
token = new
|
|
314
|
+
token = new TexToken(TexTokenType.ELEMENT, latex.slice(pos, newPos));
|
|
314
315
|
} else if (isalpha(firstChar)) {
|
|
315
|
-
token = new
|
|
316
|
+
token = new TexToken(TexTokenType.ELEMENT, firstChar);
|
|
316
317
|
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
317
|
-
token = new
|
|
318
|
+
token = new TexToken(TexTokenType.ELEMENT, firstChar)
|
|
318
319
|
} else {
|
|
319
|
-
token = new
|
|
320
|
+
token = new TexToken(TexTokenType.UNKNOWN, firstChar);
|
|
320
321
|
}
|
|
321
322
|
pos += token.value.length;
|
|
322
323
|
}
|
|
@@ -324,11 +325,11 @@ export function tokenize(latex: string): Token[] {
|
|
|
324
325
|
|
|
325
326
|
tokens.push(token);
|
|
326
327
|
|
|
327
|
-
if (token.type ===
|
|
328
|
+
if (token.type === TexTokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
|
|
328
329
|
if (pos >= latex.length || latex[pos] !== '{') {
|
|
329
330
|
throw new LatexParserError(`No content for ${token.value} command`);
|
|
330
331
|
}
|
|
331
|
-
tokens.push(new
|
|
332
|
+
tokens.push(new TexToken(TexTokenType.CONTROL, '{'));
|
|
332
333
|
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
333
334
|
pos++;
|
|
334
335
|
let textInside = latex.slice(pos, posClosingBracket);
|
|
@@ -337,8 +338,8 @@ export function tokenize(latex: string): Token[] {
|
|
|
337
338
|
for (const char of chars) {
|
|
338
339
|
textInside = textInside.replaceAll('\\' + char, char);
|
|
339
340
|
}
|
|
340
|
-
tokens.push(new
|
|
341
|
-
tokens.push(new
|
|
341
|
+
tokens.push(new TexToken(TexTokenType.TEXT, textInside));
|
|
342
|
+
tokens.push(new TexToken(TexTokenType.CONTROL, '}'));
|
|
342
343
|
pos = posClosingBracket + 1;
|
|
343
344
|
}
|
|
344
345
|
}
|
|
@@ -356,8 +357,8 @@ export class LatexParserError extends Error {
|
|
|
356
357
|
|
|
357
358
|
type ParseResult = [TexNode, number];
|
|
358
359
|
|
|
359
|
-
const SUB_SYMBOL:
|
|
360
|
-
const SUP_SYMBOL:
|
|
360
|
+
const SUB_SYMBOL:TexToken = new TexToken(TexTokenType.CONTROL, '_');
|
|
361
|
+
const SUP_SYMBOL:TexToken = new TexToken(TexTokenType.CONTROL, '^');
|
|
361
362
|
|
|
362
363
|
export class LatexParser {
|
|
363
364
|
space_sensitive: boolean;
|
|
@@ -368,7 +369,7 @@ export class LatexParser {
|
|
|
368
369
|
this.newline_sensitive = newline_sensitive;
|
|
369
370
|
}
|
|
370
371
|
|
|
371
|
-
parse(tokens:
|
|
372
|
+
parse(tokens: TexToken[]): TexNode {
|
|
372
373
|
const results: TexNode[] = [];
|
|
373
374
|
let pos = 0;
|
|
374
375
|
while (pos < tokens.length) {
|
|
@@ -378,11 +379,13 @@ export class LatexParser {
|
|
|
378
379
|
while (pos < tokens.length) {
|
|
379
380
|
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
380
381
|
pos = newPos;
|
|
381
|
-
if
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
382
|
+
if(res.type === 'whitespace') {
|
|
383
|
+
if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
if (!this.newline_sensitive && res.content === '\n') {
|
|
387
|
+
continue;
|
|
388
|
+
}
|
|
386
389
|
}
|
|
387
390
|
if (res.type === 'control' && res.content === '&') {
|
|
388
391
|
throw new LatexParserError('Unexpected & outside of an alignment');
|
|
@@ -395,7 +398,7 @@ export class LatexParser {
|
|
|
395
398
|
} else if (results.length === 1) {
|
|
396
399
|
return results[0];
|
|
397
400
|
} else {
|
|
398
|
-
return
|
|
401
|
+
return new TexNode('ordgroup', '', results);
|
|
399
402
|
}
|
|
400
403
|
}
|
|
401
404
|
|
|
@@ -405,11 +408,11 @@ export class LatexParser {
|
|
|
405
408
|
} else if (results.length === 1) {
|
|
406
409
|
return results[0];
|
|
407
410
|
} else {
|
|
408
|
-
return
|
|
411
|
+
return new TexNode('ordgroup', '', results);
|
|
409
412
|
}
|
|
410
413
|
}
|
|
411
414
|
|
|
412
|
-
parseNextExpr(tokens:
|
|
415
|
+
parseNextExpr(tokens: TexToken[], start: number): ParseResult {
|
|
413
416
|
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
414
417
|
let sub: TexNode | null = null;
|
|
415
418
|
let sup: TexNode | null = null;
|
|
@@ -446,9 +449,9 @@ export class LatexParser {
|
|
|
446
449
|
res.sub = sub;
|
|
447
450
|
}
|
|
448
451
|
if (num_prime > 0) {
|
|
449
|
-
res.sup =
|
|
452
|
+
res.sup = new TexNode('ordgroup', '', []);
|
|
450
453
|
for (let i = 0; i < num_prime; i++) {
|
|
451
|
-
res.sup.args!.push(
|
|
454
|
+
res.sup.args!.push(new TexNode('element', "'"));
|
|
452
455
|
}
|
|
453
456
|
if (sup) {
|
|
454
457
|
res.sup.args!.push(sup);
|
|
@@ -459,27 +462,26 @@ export class LatexParser {
|
|
|
459
462
|
} else if (sup) {
|
|
460
463
|
res.sup = sup;
|
|
461
464
|
}
|
|
462
|
-
return [
|
|
465
|
+
return [new TexNode('supsub', '', [], res), pos];
|
|
463
466
|
} else {
|
|
464
467
|
return [base, pos];
|
|
465
468
|
}
|
|
466
469
|
}
|
|
467
470
|
|
|
468
|
-
parseNextExprWithoutSupSub(tokens:
|
|
471
|
+
parseNextExprWithoutSupSub(tokens: TexToken[], start: number): ParseResult {
|
|
469
472
|
const firstToken = tokens[start];
|
|
470
473
|
const tokenType = firstToken.type;
|
|
471
474
|
switch (tokenType) {
|
|
472
|
-
case
|
|
473
|
-
return [
|
|
474
|
-
case
|
|
475
|
-
return [
|
|
476
|
-
case
|
|
477
|
-
return [
|
|
478
|
-
case
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
case TokenType.COMMAND:
|
|
475
|
+
case TexTokenType.ELEMENT:
|
|
476
|
+
return [new TexNode('element', firstToken.value), start + 1];
|
|
477
|
+
case TexTokenType.TEXT:
|
|
478
|
+
return [new TexNode('text', firstToken.value), start + 1];
|
|
479
|
+
case TexTokenType.COMMENT:
|
|
480
|
+
return [new TexNode('comment', firstToken.value), start + 1];
|
|
481
|
+
case TexTokenType.SPACE:
|
|
482
|
+
case TexTokenType.NEWLINE:
|
|
483
|
+
return [new TexNode('whitespace', firstToken.value), start + 1];
|
|
484
|
+
case TexTokenType.COMMAND:
|
|
483
485
|
if (firstToken.eq(BEGIN_COMMAND)) {
|
|
484
486
|
return this.parseBeginEndExpr(tokens, start);
|
|
485
487
|
} else if (firstToken.eq(LEFT_COMMAND)) {
|
|
@@ -487,7 +489,7 @@ export class LatexParser {
|
|
|
487
489
|
} else {
|
|
488
490
|
return this.parseCommandExpr(tokens, start);
|
|
489
491
|
}
|
|
490
|
-
case
|
|
492
|
+
case TexTokenType.CONTROL:
|
|
491
493
|
const controlChar = firstToken.value;
|
|
492
494
|
switch (controlChar) {
|
|
493
495
|
case '{':
|
|
@@ -497,9 +499,9 @@ export class LatexParser {
|
|
|
497
499
|
case '}':
|
|
498
500
|
throw new LatexParserError("Unmatched '}'");
|
|
499
501
|
case '\\\\':
|
|
500
|
-
return [
|
|
502
|
+
return [new TexNode('control', '\\\\'), start + 1];
|
|
501
503
|
case '\\,':
|
|
502
|
-
return [
|
|
504
|
+
return [new TexNode('control', '\\,'), start + 1];
|
|
503
505
|
case '_': {
|
|
504
506
|
return [ EMPTY_NODE, start];
|
|
505
507
|
}
|
|
@@ -507,7 +509,7 @@ export class LatexParser {
|
|
|
507
509
|
return [ EMPTY_NODE, start];
|
|
508
510
|
}
|
|
509
511
|
case '&':
|
|
510
|
-
return [
|
|
512
|
+
return [new TexNode('control', '&'), start + 1];
|
|
511
513
|
default:
|
|
512
514
|
throw new LatexParserError('Unknown control sequence');
|
|
513
515
|
}
|
|
@@ -516,8 +518,8 @@ export class LatexParser {
|
|
|
516
518
|
}
|
|
517
519
|
}
|
|
518
520
|
|
|
519
|
-
parseCommandExpr(tokens:
|
|
520
|
-
assert(tokens[start].type ===
|
|
521
|
+
parseCommandExpr(tokens: TexToken[], start: number): ParseResult {
|
|
522
|
+
assert(tokens[start].type === TexTokenType.COMMAND);
|
|
521
523
|
|
|
522
524
|
const command = tokens[start].value; // command name starts with a \
|
|
523
525
|
|
|
@@ -532,9 +534,9 @@ export class LatexParser {
|
|
|
532
534
|
switch (paramNum) {
|
|
533
535
|
case 0:
|
|
534
536
|
if (!symbolMap.has(command.slice(1))) {
|
|
535
|
-
return [
|
|
537
|
+
return [new TexNode('unknownMacro', command), pos];
|
|
536
538
|
}
|
|
537
|
-
return [
|
|
539
|
+
return [new TexNode('symbol', command), pos];
|
|
538
540
|
case 1: {
|
|
539
541
|
if (command === '\\sqrt' && pos < tokens.length && tokens[pos].eq(LEFT_SQUARE_BRACKET)) {
|
|
540
542
|
const posLeftSquareBracket = pos;
|
|
@@ -542,31 +544,31 @@ export class LatexParser {
|
|
|
542
544
|
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
543
545
|
const exponent = this.parse(exprInside);
|
|
544
546
|
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
545
|
-
return [
|
|
547
|
+
return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
|
|
546
548
|
} else if (command === '\\text') {
|
|
547
549
|
if (pos + 2 >= tokens.length) {
|
|
548
550
|
throw new LatexParserError('Expecting content for \\text command');
|
|
549
551
|
}
|
|
550
552
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
551
|
-
assert(tokens[pos + 1].type ===
|
|
553
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
552
554
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
553
555
|
const text = tokens[pos + 1].value;
|
|
554
|
-
return [
|
|
556
|
+
return [new TexNode('text', text), pos + 3];
|
|
555
557
|
}
|
|
556
558
|
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
557
|
-
return [
|
|
559
|
+
return [new TexNode('unaryFunc', command, [arg1]), newPos];
|
|
558
560
|
}
|
|
559
561
|
case 2: {
|
|
560
562
|
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
561
563
|
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
562
|
-
return [
|
|
564
|
+
return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
|
|
563
565
|
}
|
|
564
566
|
default:
|
|
565
567
|
throw new Error( 'Invalid number of parameters');
|
|
566
568
|
}
|
|
567
569
|
}
|
|
568
570
|
|
|
569
|
-
parseLeftRightExpr(tokens:
|
|
571
|
+
parseLeftRightExpr(tokens: TexToken[], start: number): ParseResult {
|
|
570
572
|
assert(tokens[start].eq(LEFT_COMMAND));
|
|
571
573
|
|
|
572
574
|
let pos = start + 1;
|
|
@@ -603,20 +605,20 @@ export class LatexParser {
|
|
|
603
605
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
604
606
|
const body = this.parse(exprInside);
|
|
605
607
|
const args: TexNode[] = [
|
|
606
|
-
|
|
608
|
+
new TexNode('element', leftDelimiter.value),
|
|
607
609
|
body,
|
|
608
|
-
|
|
610
|
+
new TexNode('element', rightDelimiter.value)
|
|
609
611
|
]
|
|
610
|
-
const res
|
|
612
|
+
const res = new TexNode('leftright', '', args);
|
|
611
613
|
return [res, pos];
|
|
612
614
|
}
|
|
613
615
|
|
|
614
|
-
parseBeginEndExpr(tokens:
|
|
616
|
+
parseBeginEndExpr(tokens: TexToken[], start: number): ParseResult {
|
|
615
617
|
assert(tokens[start].eq(BEGIN_COMMAND));
|
|
616
618
|
|
|
617
619
|
let pos = start + 1;
|
|
618
620
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
619
|
-
assert(tokens[pos + 1].type ===
|
|
621
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
620
622
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
621
623
|
const envName = tokens[pos + 1].value;
|
|
622
624
|
pos += 3;
|
|
@@ -633,7 +635,7 @@ export class LatexParser {
|
|
|
633
635
|
pos = endIdx + 1;
|
|
634
636
|
|
|
635
637
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
636
|
-
assert(tokens[pos + 1].type ===
|
|
638
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
637
639
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
638
640
|
if (tokens[pos + 1].value !== envName) {
|
|
639
641
|
throw new LatexParserError('Mismatched \\begin and \\end environments');
|
|
@@ -641,37 +643,43 @@ export class LatexParser {
|
|
|
641
643
|
pos += 3;
|
|
642
644
|
|
|
643
645
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
644
|
-
// ignore
|
|
645
|
-
while(exprInside.length > 0 && [
|
|
646
|
+
// ignore spaces and '\n' before \end{envName}
|
|
647
|
+
while(exprInside.length > 0 && [TexTokenType.SPACE, TexTokenType.NEWLINE].includes(exprInside[exprInside.length - 1].type)) {
|
|
646
648
|
exprInside.pop();
|
|
647
649
|
}
|
|
648
650
|
const body = this.parseAligned(exprInside);
|
|
649
|
-
const res
|
|
651
|
+
const res = new TexNode('beginend', envName, [], body);
|
|
650
652
|
return [res, pos];
|
|
651
653
|
}
|
|
652
654
|
|
|
653
|
-
parseAligned(tokens:
|
|
655
|
+
parseAligned(tokens: TexToken[]): TexNode[][] {
|
|
654
656
|
let pos = 0;
|
|
655
657
|
const allRows: TexNode[][] = [];
|
|
656
658
|
let row: TexNode[] = [];
|
|
657
659
|
allRows.push(row);
|
|
658
|
-
let group
|
|
660
|
+
let group = new TexNode('ordgroup', '', []);
|
|
659
661
|
row.push(group);
|
|
660
662
|
|
|
661
663
|
while (pos < tokens.length) {
|
|
662
664
|
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
663
665
|
pos = newPos;
|
|
666
|
+
|
|
664
667
|
if (res.type === 'whitespace') {
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
668
|
+
if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
|
|
669
|
+
continue;
|
|
670
|
+
}
|
|
671
|
+
if (!this.newline_sensitive && res.content === '\n') {
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
if (res.type === 'control' && res.content === '\\\\') {
|
|
669
677
|
row = [];
|
|
670
|
-
group =
|
|
678
|
+
group = new TexNode('ordgroup', '', []);
|
|
671
679
|
row.push(group);
|
|
672
680
|
allRows.push(row);
|
|
673
681
|
} else if (res.type === 'control' && res.content === '&') {
|
|
674
|
-
group =
|
|
682
|
+
group = new TexNode('ordgroup', '', []);
|
|
675
683
|
row.push(group);
|
|
676
684
|
} else {
|
|
677
685
|
group.args!.push(res);
|
|
@@ -682,14 +690,14 @@ export class LatexParser {
|
|
|
682
690
|
}
|
|
683
691
|
|
|
684
692
|
// Remove all whitespace before or after _ or ^
|
|
685
|
-
function passIgnoreWhitespaceBeforeScriptMark(tokens:
|
|
686
|
-
const is_script_mark = (token:
|
|
687
|
-
let out_tokens:
|
|
693
|
+
function passIgnoreWhitespaceBeforeScriptMark(tokens: TexToken[]): TexToken[] {
|
|
694
|
+
const is_script_mark = (token: TexToken) => token.eq(SUB_SYMBOL) || token.eq(SUP_SYMBOL);
|
|
695
|
+
let out_tokens: TexToken[] = [];
|
|
688
696
|
for (let i = 0; i < tokens.length; i++) {
|
|
689
|
-
if (tokens[i].type ===
|
|
697
|
+
if (tokens[i].type === TexTokenType.SPACE && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
690
698
|
continue;
|
|
691
699
|
}
|
|
692
|
-
if (tokens[i].type ===
|
|
700
|
+
if (tokens[i].type === TexTokenType.SPACE && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
693
701
|
continue;
|
|
694
702
|
}
|
|
695
703
|
out_tokens.push(tokens[i]);
|
|
@@ -698,10 +706,10 @@ function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] {
|
|
|
698
706
|
}
|
|
699
707
|
|
|
700
708
|
// expand custom tex macros
|
|
701
|
-
function passExpandCustomTexMacros(tokens:
|
|
702
|
-
let out_tokens:
|
|
709
|
+
function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: string]: string}): TexToken[] {
|
|
710
|
+
let out_tokens: TexToken[] = [];
|
|
703
711
|
for (const token of tokens) {
|
|
704
|
-
if (token.type ===
|
|
712
|
+
if (token.type === TexTokenType.COMMAND && customTexMacros[token.value]) {
|
|
705
713
|
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
706
714
|
out_tokens = out_tokens.concat(expanded_tokens);
|
|
707
715
|
} else {
|