tex2typst 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +278 -269
- package/dist/tex-parser.d.ts +28 -0
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +44 -6
- package/dist/writer.d.ts +7 -7
- package/package.json +1 -1
- package/src/index.ts +4 -3
- package/src/{parser.ts → tex-parser.ts} +117 -110
- package/src/types.ts +97 -10
- package/src/writer.ts +224 -225
- package/dist/parser.d.ts +0 -28
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { symbolMap } from "./map";
|
|
2
|
-
import { TexNode, TexSupsubData,
|
|
2
|
+
import { TexNode, TexSupsubData, TexTokenType } from "./types";
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
const UNARY_COMMANDS = [
|
|
@@ -45,21 +45,21 @@ const BINARY_COMMANDS = [
|
|
|
45
45
|
]
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
export class
|
|
49
|
-
type:
|
|
48
|
+
export class TexToken {
|
|
49
|
+
type: TexTokenType;
|
|
50
50
|
value: string;
|
|
51
51
|
|
|
52
|
-
constructor(type:
|
|
52
|
+
constructor(type: TexTokenType, value: string) {
|
|
53
53
|
this.type = type;
|
|
54
54
|
this.value = value;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
public eq(token:
|
|
57
|
+
public eq(token: TexToken): boolean {
|
|
58
58
|
return this.type === token.type && this.value === token.value;
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
-
const EMPTY_NODE: TexNode =
|
|
62
|
+
const EMPTY_NODE: TexNode = new TexNode('empty', '');
|
|
63
63
|
|
|
64
64
|
function assert(condition: boolean, message: string = ''): void {
|
|
65
65
|
if (!condition) {
|
|
@@ -77,10 +77,10 @@ function get_command_param_num(command: string): number {
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
const LEFT_CURLY_BRACKET:
|
|
81
|
-
const RIGHT_CURLY_BRACKET:
|
|
80
|
+
const LEFT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '{');
|
|
81
|
+
const RIGHT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '}');
|
|
82
82
|
|
|
83
|
-
function find_closing_curly_bracket(tokens:
|
|
83
|
+
function find_closing_curly_bracket(tokens: TexToken[], start: number): number {
|
|
84
84
|
assert(tokens[start].eq(LEFT_CURLY_BRACKET));
|
|
85
85
|
let count = 1;
|
|
86
86
|
let pos = start + 1;
|
|
@@ -100,10 +100,10 @@ function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
|
100
100
|
return pos - 1;
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
-
const LEFT_SQUARE_BRACKET:
|
|
104
|
-
const RIGHT_SQUARE_BRACKET:
|
|
103
|
+
const LEFT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, '[');
|
|
104
|
+
const RIGHT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, ']');
|
|
105
105
|
|
|
106
|
-
function find_closing_square_bracket(tokens:
|
|
106
|
+
function find_closing_square_bracket(tokens: TexToken[], start: number): number {
|
|
107
107
|
assert(tokens[start].eq(LEFT_SQUARE_BRACKET));
|
|
108
108
|
let count = 1;
|
|
109
109
|
let pos = start + 1;
|
|
@@ -132,29 +132,29 @@ function isdigit(char: string): boolean {
|
|
|
132
132
|
return '0123456789'.includes(char);
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
-
function eat_whitespaces(tokens:
|
|
135
|
+
function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
|
|
136
136
|
let pos = start;
|
|
137
|
-
while (pos < tokens.length && [
|
|
137
|
+
while (pos < tokens.length && [TexTokenType.SPACE, TexTokenType.NEWLINE].includes(tokens[pos].type)) {
|
|
138
138
|
pos++;
|
|
139
139
|
}
|
|
140
140
|
return tokens.slice(start, pos);
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
function eat_parenthesis(tokens:
|
|
144
|
+
function eat_parenthesis(tokens: TexToken[], start: number): TexToken | null {
|
|
145
145
|
const firstToken = tokens[start];
|
|
146
|
-
if (firstToken.type ===
|
|
146
|
+
if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
|
|
147
147
|
return firstToken;
|
|
148
|
-
} else if (firstToken.type ===
|
|
148
|
+
} else if (firstToken.type === TexTokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
149
149
|
return firstToken;
|
|
150
150
|
} else {
|
|
151
151
|
return null;
|
|
152
152
|
}
|
|
153
153
|
}
|
|
154
154
|
|
|
155
|
-
function eat_primes(tokens:
|
|
155
|
+
function eat_primes(tokens: TexToken[], start: number): number {
|
|
156
156
|
let pos = start;
|
|
157
|
-
while (pos < tokens.length && tokens[pos].eq(new
|
|
157
|
+
while (pos < tokens.length && tokens[pos].eq(new TexToken(TexTokenType.ELEMENT, "'"))) {
|
|
158
158
|
pos += 1;
|
|
159
159
|
}
|
|
160
160
|
return pos - start;
|
|
@@ -170,10 +170,10 @@ function eat_command_name(latex: string, start: number): string {
|
|
|
170
170
|
}
|
|
171
171
|
|
|
172
172
|
|
|
173
|
-
const LEFT_COMMAND:
|
|
174
|
-
const RIGHT_COMMAND:
|
|
173
|
+
const LEFT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\left');
|
|
174
|
+
const RIGHT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\right');
|
|
175
175
|
|
|
176
|
-
function find_closing_right_command(tokens:
|
|
176
|
+
function find_closing_right_command(tokens: TexToken[], start: number): number {
|
|
177
177
|
let count = 1;
|
|
178
178
|
let pos = start;
|
|
179
179
|
|
|
@@ -193,11 +193,11 @@ function find_closing_right_command(tokens: Token[], start: number): number {
|
|
|
193
193
|
}
|
|
194
194
|
|
|
195
195
|
|
|
196
|
-
const BEGIN_COMMAND:
|
|
197
|
-
const END_COMMAND:
|
|
196
|
+
const BEGIN_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\begin');
|
|
197
|
+
const END_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\end');
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
function find_closing_end_command(tokens:
|
|
200
|
+
function find_closing_end_command(tokens: TexToken[], start: number): number {
|
|
201
201
|
let count = 1;
|
|
202
202
|
let pos = start;
|
|
203
203
|
|
|
@@ -241,20 +241,20 @@ function find_closing_curly_bracket_char(latex: string, start: number): number {
|
|
|
241
241
|
}
|
|
242
242
|
|
|
243
243
|
|
|
244
|
-
export function tokenize(latex: string):
|
|
245
|
-
const tokens:
|
|
244
|
+
export function tokenize(latex: string): TexToken[] {
|
|
245
|
+
const tokens: TexToken[] = [];
|
|
246
246
|
let pos = 0;
|
|
247
247
|
|
|
248
248
|
while (pos < latex.length) {
|
|
249
249
|
const firstChar = latex[pos];
|
|
250
|
-
let token:
|
|
250
|
+
let token: TexToken;
|
|
251
251
|
switch (firstChar) {
|
|
252
252
|
case '%': {
|
|
253
253
|
let newPos = pos + 1;
|
|
254
254
|
while (newPos < latex.length && latex[newPos] !== '\n') {
|
|
255
255
|
newPos += 1;
|
|
256
256
|
}
|
|
257
|
-
token = new
|
|
257
|
+
token = new TexToken(TexTokenType.COMMENT, latex.slice(pos + 1, newPos));
|
|
258
258
|
pos = newPos;
|
|
259
259
|
break;
|
|
260
260
|
}
|
|
@@ -263,19 +263,19 @@ export function tokenize(latex: string): Token[] {
|
|
|
263
263
|
case '_':
|
|
264
264
|
case '^':
|
|
265
265
|
case '&':
|
|
266
|
-
token = new
|
|
266
|
+
token = new TexToken(TexTokenType.CONTROL, firstChar);
|
|
267
267
|
pos++;
|
|
268
268
|
break;
|
|
269
269
|
case '\n':
|
|
270
|
-
token = new
|
|
270
|
+
token = new TexToken(TexTokenType.NEWLINE, firstChar);
|
|
271
271
|
pos++;
|
|
272
272
|
break;
|
|
273
273
|
case '\r': {
|
|
274
274
|
if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
|
|
275
|
-
token = new
|
|
275
|
+
token = new TexToken(TexTokenType.NEWLINE, '\n');
|
|
276
276
|
pos += 2;
|
|
277
277
|
} else {
|
|
278
|
-
token = new
|
|
278
|
+
token = new TexToken(TexTokenType.NEWLINE, '\n');
|
|
279
279
|
pos ++;
|
|
280
280
|
}
|
|
281
281
|
break;
|
|
@@ -285,7 +285,7 @@ export function tokenize(latex: string): Token[] {
|
|
|
285
285
|
while (newPos < latex.length && latex[newPos] === ' ') {
|
|
286
286
|
newPos += 1;
|
|
287
287
|
}
|
|
288
|
-
token = new
|
|
288
|
+
token = new TexToken(TexTokenType.SPACE, latex.slice(pos, newPos));
|
|
289
289
|
pos = newPos;
|
|
290
290
|
break;
|
|
291
291
|
}
|
|
@@ -295,12 +295,12 @@ export function tokenize(latex: string): Token[] {
|
|
|
295
295
|
}
|
|
296
296
|
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
297
297
|
if (['\\\\', '\\,'].includes(firstTwoChars)) {
|
|
298
|
-
token = new
|
|
298
|
+
token = new TexToken(TexTokenType.CONTROL, firstTwoChars);
|
|
299
299
|
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_', '\\|'].includes(firstTwoChars)) {
|
|
300
|
-
token = new
|
|
300
|
+
token = new TexToken(TexTokenType.ELEMENT, firstTwoChars);
|
|
301
301
|
} else {
|
|
302
302
|
const command = eat_command_name(latex, pos + 1);
|
|
303
|
-
token = new
|
|
303
|
+
token = new TexToken(TexTokenType.COMMAND, '\\' + command);
|
|
304
304
|
}
|
|
305
305
|
pos += token.value.length;
|
|
306
306
|
break;
|
|
@@ -311,13 +311,13 @@ export function tokenize(latex: string): Token[] {
|
|
|
311
311
|
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
312
312
|
newPos += 1;
|
|
313
313
|
}
|
|
314
|
-
token = new
|
|
314
|
+
token = new TexToken(TexTokenType.ELEMENT, latex.slice(pos, newPos));
|
|
315
315
|
} else if (isalpha(firstChar)) {
|
|
316
|
-
token = new
|
|
316
|
+
token = new TexToken(TexTokenType.ELEMENT, firstChar);
|
|
317
317
|
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
318
|
-
token = new
|
|
318
|
+
token = new TexToken(TexTokenType.ELEMENT, firstChar)
|
|
319
319
|
} else {
|
|
320
|
-
token = new
|
|
320
|
+
token = new TexToken(TexTokenType.UNKNOWN, firstChar);
|
|
321
321
|
}
|
|
322
322
|
pos += token.value.length;
|
|
323
323
|
}
|
|
@@ -325,11 +325,11 @@ export function tokenize(latex: string): Token[] {
|
|
|
325
325
|
|
|
326
326
|
tokens.push(token);
|
|
327
327
|
|
|
328
|
-
if (token.type ===
|
|
328
|
+
if (token.type === TexTokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
|
|
329
329
|
if (pos >= latex.length || latex[pos] !== '{') {
|
|
330
330
|
throw new LatexParserError(`No content for ${token.value} command`);
|
|
331
331
|
}
|
|
332
|
-
tokens.push(new
|
|
332
|
+
tokens.push(new TexToken(TexTokenType.CONTROL, '{'));
|
|
333
333
|
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
334
334
|
pos++;
|
|
335
335
|
let textInside = latex.slice(pos, posClosingBracket);
|
|
@@ -338,8 +338,8 @@ export function tokenize(latex: string): Token[] {
|
|
|
338
338
|
for (const char of chars) {
|
|
339
339
|
textInside = textInside.replaceAll('\\' + char, char);
|
|
340
340
|
}
|
|
341
|
-
tokens.push(new
|
|
342
|
-
tokens.push(new
|
|
341
|
+
tokens.push(new TexToken(TexTokenType.TEXT, textInside));
|
|
342
|
+
tokens.push(new TexToken(TexTokenType.CONTROL, '}'));
|
|
343
343
|
pos = posClosingBracket + 1;
|
|
344
344
|
}
|
|
345
345
|
}
|
|
@@ -357,8 +357,8 @@ export class LatexParserError extends Error {
|
|
|
357
357
|
|
|
358
358
|
type ParseResult = [TexNode, number];
|
|
359
359
|
|
|
360
|
-
const SUB_SYMBOL:
|
|
361
|
-
const SUP_SYMBOL:
|
|
360
|
+
const SUB_SYMBOL:TexToken = new TexToken(TexTokenType.CONTROL, '_');
|
|
361
|
+
const SUP_SYMBOL:TexToken = new TexToken(TexTokenType.CONTROL, '^');
|
|
362
362
|
|
|
363
363
|
export class LatexParser {
|
|
364
364
|
space_sensitive: boolean;
|
|
@@ -369,7 +369,7 @@ export class LatexParser {
|
|
|
369
369
|
this.newline_sensitive = newline_sensitive;
|
|
370
370
|
}
|
|
371
371
|
|
|
372
|
-
parse(tokens:
|
|
372
|
+
parse(tokens: TexToken[]): TexNode {
|
|
373
373
|
const results: TexNode[] = [];
|
|
374
374
|
let pos = 0;
|
|
375
375
|
while (pos < tokens.length) {
|
|
@@ -379,11 +379,13 @@ export class LatexParser {
|
|
|
379
379
|
while (pos < tokens.length) {
|
|
380
380
|
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
381
381
|
pos = newPos;
|
|
382
|
-
if
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
382
|
+
if(res.type === 'whitespace') {
|
|
383
|
+
if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
if (!this.newline_sensitive && res.content === '\n') {
|
|
387
|
+
continue;
|
|
388
|
+
}
|
|
387
389
|
}
|
|
388
390
|
if (res.type === 'control' && res.content === '&') {
|
|
389
391
|
throw new LatexParserError('Unexpected & outside of an alignment');
|
|
@@ -396,7 +398,7 @@ export class LatexParser {
|
|
|
396
398
|
} else if (results.length === 1) {
|
|
397
399
|
return results[0];
|
|
398
400
|
} else {
|
|
399
|
-
return
|
|
401
|
+
return new TexNode('ordgroup', '', results);
|
|
400
402
|
}
|
|
401
403
|
}
|
|
402
404
|
|
|
@@ -406,11 +408,11 @@ export class LatexParser {
|
|
|
406
408
|
} else if (results.length === 1) {
|
|
407
409
|
return results[0];
|
|
408
410
|
} else {
|
|
409
|
-
return
|
|
411
|
+
return new TexNode('ordgroup', '', results);
|
|
410
412
|
}
|
|
411
413
|
}
|
|
412
414
|
|
|
413
|
-
parseNextExpr(tokens:
|
|
415
|
+
parseNextExpr(tokens: TexToken[], start: number): ParseResult {
|
|
414
416
|
let [base, pos] = this.parseNextExprWithoutSupSub(tokens, start);
|
|
415
417
|
let sub: TexNode | null = null;
|
|
416
418
|
let sup: TexNode | null = null;
|
|
@@ -447,9 +449,9 @@ export class LatexParser {
|
|
|
447
449
|
res.sub = sub;
|
|
448
450
|
}
|
|
449
451
|
if (num_prime > 0) {
|
|
450
|
-
res.sup =
|
|
452
|
+
res.sup = new TexNode('ordgroup', '', []);
|
|
451
453
|
for (let i = 0; i < num_prime; i++) {
|
|
452
|
-
res.sup.args!.push(
|
|
454
|
+
res.sup.args!.push(new TexNode('element', "'"));
|
|
453
455
|
}
|
|
454
456
|
if (sup) {
|
|
455
457
|
res.sup.args!.push(sup);
|
|
@@ -460,27 +462,26 @@ export class LatexParser {
|
|
|
460
462
|
} else if (sup) {
|
|
461
463
|
res.sup = sup;
|
|
462
464
|
}
|
|
463
|
-
return [
|
|
465
|
+
return [new TexNode('supsub', '', [], res), pos];
|
|
464
466
|
} else {
|
|
465
467
|
return [base, pos];
|
|
466
468
|
}
|
|
467
469
|
}
|
|
468
470
|
|
|
469
|
-
parseNextExprWithoutSupSub(tokens:
|
|
471
|
+
parseNextExprWithoutSupSub(tokens: TexToken[], start: number): ParseResult {
|
|
470
472
|
const firstToken = tokens[start];
|
|
471
473
|
const tokenType = firstToken.type;
|
|
472
474
|
switch (tokenType) {
|
|
473
|
-
case
|
|
474
|
-
return [
|
|
475
|
-
case
|
|
476
|
-
return [
|
|
477
|
-
case
|
|
478
|
-
return [
|
|
479
|
-
case
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
case TokenType.COMMAND:
|
|
475
|
+
case TexTokenType.ELEMENT:
|
|
476
|
+
return [new TexNode('element', firstToken.value), start + 1];
|
|
477
|
+
case TexTokenType.TEXT:
|
|
478
|
+
return [new TexNode('text', firstToken.value), start + 1];
|
|
479
|
+
case TexTokenType.COMMENT:
|
|
480
|
+
return [new TexNode('comment', firstToken.value), start + 1];
|
|
481
|
+
case TexTokenType.SPACE:
|
|
482
|
+
case TexTokenType.NEWLINE:
|
|
483
|
+
return [new TexNode('whitespace', firstToken.value), start + 1];
|
|
484
|
+
case TexTokenType.COMMAND:
|
|
484
485
|
if (firstToken.eq(BEGIN_COMMAND)) {
|
|
485
486
|
return this.parseBeginEndExpr(tokens, start);
|
|
486
487
|
} else if (firstToken.eq(LEFT_COMMAND)) {
|
|
@@ -488,7 +489,7 @@ export class LatexParser {
|
|
|
488
489
|
} else {
|
|
489
490
|
return this.parseCommandExpr(tokens, start);
|
|
490
491
|
}
|
|
491
|
-
case
|
|
492
|
+
case TexTokenType.CONTROL:
|
|
492
493
|
const controlChar = firstToken.value;
|
|
493
494
|
switch (controlChar) {
|
|
494
495
|
case '{':
|
|
@@ -498,9 +499,9 @@ export class LatexParser {
|
|
|
498
499
|
case '}':
|
|
499
500
|
throw new LatexParserError("Unmatched '}'");
|
|
500
501
|
case '\\\\':
|
|
501
|
-
return [
|
|
502
|
+
return [new TexNode('control', '\\\\'), start + 1];
|
|
502
503
|
case '\\,':
|
|
503
|
-
return [
|
|
504
|
+
return [new TexNode('control', '\\,'), start + 1];
|
|
504
505
|
case '_': {
|
|
505
506
|
return [ EMPTY_NODE, start];
|
|
506
507
|
}
|
|
@@ -508,7 +509,7 @@ export class LatexParser {
|
|
|
508
509
|
return [ EMPTY_NODE, start];
|
|
509
510
|
}
|
|
510
511
|
case '&':
|
|
511
|
-
return [
|
|
512
|
+
return [new TexNode('control', '&'), start + 1];
|
|
512
513
|
default:
|
|
513
514
|
throw new LatexParserError('Unknown control sequence');
|
|
514
515
|
}
|
|
@@ -517,8 +518,8 @@ export class LatexParser {
|
|
|
517
518
|
}
|
|
518
519
|
}
|
|
519
520
|
|
|
520
|
-
parseCommandExpr(tokens:
|
|
521
|
-
assert(tokens[start].type ===
|
|
521
|
+
parseCommandExpr(tokens: TexToken[], start: number): ParseResult {
|
|
522
|
+
assert(tokens[start].type === TexTokenType.COMMAND);
|
|
522
523
|
|
|
523
524
|
const command = tokens[start].value; // command name starts with a \
|
|
524
525
|
|
|
@@ -533,9 +534,9 @@ export class LatexParser {
|
|
|
533
534
|
switch (paramNum) {
|
|
534
535
|
case 0:
|
|
535
536
|
if (!symbolMap.has(command.slice(1))) {
|
|
536
|
-
return [
|
|
537
|
+
return [new TexNode('unknownMacro', command), pos];
|
|
537
538
|
}
|
|
538
|
-
return [
|
|
539
|
+
return [new TexNode('symbol', command), pos];
|
|
539
540
|
case 1: {
|
|
540
541
|
if (command === '\\sqrt' && pos < tokens.length && tokens[pos].eq(LEFT_SQUARE_BRACKET)) {
|
|
541
542
|
const posLeftSquareBracket = pos;
|
|
@@ -543,31 +544,31 @@ export class LatexParser {
|
|
|
543
544
|
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
544
545
|
const exponent = this.parse(exprInside);
|
|
545
546
|
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
546
|
-
return [
|
|
547
|
+
return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
|
|
547
548
|
} else if (command === '\\text') {
|
|
548
549
|
if (pos + 2 >= tokens.length) {
|
|
549
550
|
throw new LatexParserError('Expecting content for \\text command');
|
|
550
551
|
}
|
|
551
552
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
552
|
-
assert(tokens[pos + 1].type ===
|
|
553
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
553
554
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
554
555
|
const text = tokens[pos + 1].value;
|
|
555
|
-
return [
|
|
556
|
+
return [new TexNode('text', text), pos + 3];
|
|
556
557
|
}
|
|
557
558
|
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
558
|
-
return [
|
|
559
|
+
return [new TexNode('unaryFunc', command, [arg1]), newPos];
|
|
559
560
|
}
|
|
560
561
|
case 2: {
|
|
561
562
|
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
562
563
|
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
563
|
-
return [
|
|
564
|
+
return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
|
|
564
565
|
}
|
|
565
566
|
default:
|
|
566
567
|
throw new Error( 'Invalid number of parameters');
|
|
567
568
|
}
|
|
568
569
|
}
|
|
569
570
|
|
|
570
|
-
parseLeftRightExpr(tokens:
|
|
571
|
+
parseLeftRightExpr(tokens: TexToken[], start: number): ParseResult {
|
|
571
572
|
assert(tokens[start].eq(LEFT_COMMAND));
|
|
572
573
|
|
|
573
574
|
let pos = start + 1;
|
|
@@ -604,20 +605,20 @@ export class LatexParser {
|
|
|
604
605
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
605
606
|
const body = this.parse(exprInside);
|
|
606
607
|
const args: TexNode[] = [
|
|
607
|
-
|
|
608
|
+
new TexNode('element', leftDelimiter.value),
|
|
608
609
|
body,
|
|
609
|
-
|
|
610
|
+
new TexNode('element', rightDelimiter.value)
|
|
610
611
|
]
|
|
611
|
-
const res
|
|
612
|
+
const res = new TexNode('leftright', '', args);
|
|
612
613
|
return [res, pos];
|
|
613
614
|
}
|
|
614
615
|
|
|
615
|
-
parseBeginEndExpr(tokens:
|
|
616
|
+
parseBeginEndExpr(tokens: TexToken[], start: number): ParseResult {
|
|
616
617
|
assert(tokens[start].eq(BEGIN_COMMAND));
|
|
617
618
|
|
|
618
619
|
let pos = start + 1;
|
|
619
620
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
620
|
-
assert(tokens[pos + 1].type ===
|
|
621
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
621
622
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
622
623
|
const envName = tokens[pos + 1].value;
|
|
623
624
|
pos += 3;
|
|
@@ -634,7 +635,7 @@ export class LatexParser {
|
|
|
634
635
|
pos = endIdx + 1;
|
|
635
636
|
|
|
636
637
|
assert(tokens[pos].eq(LEFT_CURLY_BRACKET));
|
|
637
|
-
assert(tokens[pos + 1].type ===
|
|
638
|
+
assert(tokens[pos + 1].type === TexTokenType.TEXT);
|
|
638
639
|
assert(tokens[pos + 2].eq(RIGHT_CURLY_BRACKET));
|
|
639
640
|
if (tokens[pos + 1].value !== envName) {
|
|
640
641
|
throw new LatexParserError('Mismatched \\begin and \\end environments');
|
|
@@ -642,37 +643,43 @@ export class LatexParser {
|
|
|
642
643
|
pos += 3;
|
|
643
644
|
|
|
644
645
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
645
|
-
// ignore
|
|
646
|
-
while(exprInside.length > 0 && [
|
|
646
|
+
// ignore spaces and '\n' before \end{envName}
|
|
647
|
+
while(exprInside.length > 0 && [TexTokenType.SPACE, TexTokenType.NEWLINE].includes(exprInside[exprInside.length - 1].type)) {
|
|
647
648
|
exprInside.pop();
|
|
648
649
|
}
|
|
649
650
|
const body = this.parseAligned(exprInside);
|
|
650
|
-
const res
|
|
651
|
+
const res = new TexNode('beginend', envName, [], body);
|
|
651
652
|
return [res, pos];
|
|
652
653
|
}
|
|
653
654
|
|
|
654
|
-
parseAligned(tokens:
|
|
655
|
+
parseAligned(tokens: TexToken[]): TexNode[][] {
|
|
655
656
|
let pos = 0;
|
|
656
657
|
const allRows: TexNode[][] = [];
|
|
657
658
|
let row: TexNode[] = [];
|
|
658
659
|
allRows.push(row);
|
|
659
|
-
let group
|
|
660
|
+
let group = new TexNode('ordgroup', '', []);
|
|
660
661
|
row.push(group);
|
|
661
662
|
|
|
662
663
|
while (pos < tokens.length) {
|
|
663
664
|
const [res, newPos] = this.parseNextExpr(tokens, pos);
|
|
664
665
|
pos = newPos;
|
|
666
|
+
|
|
665
667
|
if (res.type === 'whitespace') {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
668
|
+
if (!this.space_sensitive && res.content.replace(/ /g, '').length === 0) {
|
|
669
|
+
continue;
|
|
670
|
+
}
|
|
671
|
+
if (!this.newline_sensitive && res.content === '\n') {
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
if (res.type === 'control' && res.content === '\\\\') {
|
|
670
677
|
row = [];
|
|
671
|
-
group =
|
|
678
|
+
group = new TexNode('ordgroup', '', []);
|
|
672
679
|
row.push(group);
|
|
673
680
|
allRows.push(row);
|
|
674
681
|
} else if (res.type === 'control' && res.content === '&') {
|
|
675
|
-
group =
|
|
682
|
+
group = new TexNode('ordgroup', '', []);
|
|
676
683
|
row.push(group);
|
|
677
684
|
} else {
|
|
678
685
|
group.args!.push(res);
|
|
@@ -683,14 +690,14 @@ export class LatexParser {
|
|
|
683
690
|
}
|
|
684
691
|
|
|
685
692
|
// Remove all whitespace before or after _ or ^
|
|
686
|
-
function passIgnoreWhitespaceBeforeScriptMark(tokens:
|
|
687
|
-
const is_script_mark = (token:
|
|
688
|
-
let out_tokens:
|
|
693
|
+
function passIgnoreWhitespaceBeforeScriptMark(tokens: TexToken[]): TexToken[] {
|
|
694
|
+
const is_script_mark = (token: TexToken) => token.eq(SUB_SYMBOL) || token.eq(SUP_SYMBOL);
|
|
695
|
+
let out_tokens: TexToken[] = [];
|
|
689
696
|
for (let i = 0; i < tokens.length; i++) {
|
|
690
|
-
if (tokens[i].type ===
|
|
697
|
+
if (tokens[i].type === TexTokenType.SPACE && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
691
698
|
continue;
|
|
692
699
|
}
|
|
693
|
-
if (tokens[i].type ===
|
|
700
|
+
if (tokens[i].type === TexTokenType.SPACE && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
694
701
|
continue;
|
|
695
702
|
}
|
|
696
703
|
out_tokens.push(tokens[i]);
|
|
@@ -699,10 +706,10 @@ function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] {
|
|
|
699
706
|
}
|
|
700
707
|
|
|
701
708
|
// expand custom tex macros
|
|
702
|
-
function passExpandCustomTexMacros(tokens:
|
|
703
|
-
let out_tokens:
|
|
709
|
+
function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: string]: string}): TexToken[] {
|
|
710
|
+
let out_tokens: TexToken[] = [];
|
|
704
711
|
for (const token of tokens) {
|
|
705
|
-
if (token.type ===
|
|
712
|
+
if (token.type === TexTokenType.COMMAND && customTexMacros[token.value]) {
|
|
706
713
|
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
707
714
|
out_tokens = out_tokens.concat(expanded_tokens);
|
|
708
715
|
} else {
|