tex2typst 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.js +868 -770
- package/dist/parser.d.ts +2 -5
- package/dist/tex2typst.min.js +1 -1
- package/dist/types.d.ts +23 -2
- package/dist/writer.d.ts +4 -3
- package/package.json +2 -2
- package/src/index.ts +5 -4
- package/src/parser.ts +122 -104
- package/src/types.ts +30 -2
- package/src/writer.ts +274 -189
package/src/parser.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { symbolMap } from "./map";
|
|
2
|
+
import { TexNode, TexSupsubData, Token, TokenType } from "./types";
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
const UNARY_COMMANDS = [
|
|
@@ -42,7 +43,7 @@ const BINARY_COMMANDS = [
|
|
|
42
43
|
'tbinom',
|
|
43
44
|
]
|
|
44
45
|
|
|
45
|
-
const EMPTY_NODE = {
|
|
46
|
+
const EMPTY_NODE: TexNode = { type: 'empty', content: '' };
|
|
46
47
|
|
|
47
48
|
function assert(condition: boolean, message: string = ''): void {
|
|
48
49
|
if (!condition) {
|
|
@@ -60,8 +61,8 @@ function get_command_param_num(command: string): number {
|
|
|
60
61
|
}
|
|
61
62
|
}
|
|
62
63
|
|
|
63
|
-
const LEFT_CURLY_BRACKET: Token = {type:
|
|
64
|
-
const RIGHT_CURLY_BRACKET: Token = {type:
|
|
64
|
+
const LEFT_CURLY_BRACKET: Token = {type: TokenType.CONTROL, value: '{'};
|
|
65
|
+
const RIGHT_CURLY_BRACKET: Token = {type: TokenType.CONTROL, value: '}'};
|
|
65
66
|
|
|
66
67
|
function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
67
68
|
assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
|
|
@@ -83,8 +84,8 @@ function find_closing_curly_bracket(tokens: Token[], start: number): number {
|
|
|
83
84
|
return pos - 1;
|
|
84
85
|
}
|
|
85
86
|
|
|
86
|
-
const LEFT_SQUARE_BRACKET: Token = {type:
|
|
87
|
-
const RIGHT_SQUARE_BRACKET: Token = {type:
|
|
87
|
+
const LEFT_SQUARE_BRACKET: Token = {type: TokenType.ELEMENT, value: '['};
|
|
88
|
+
const RIGHT_SQUARE_BRACKET: Token = {type: TokenType.ELEMENT, value: ']'};
|
|
88
89
|
|
|
89
90
|
function find_closing_square_bracket(tokens: Token[], start: number): number {
|
|
90
91
|
assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
|
|
@@ -117,7 +118,7 @@ function isdigit(char: string): boolean {
|
|
|
117
118
|
|
|
118
119
|
function eat_whitespaces(tokens: Token[], start: number): Token[] {
|
|
119
120
|
let pos = start;
|
|
120
|
-
while (pos < tokens.length && [
|
|
121
|
+
while (pos < tokens.length && [TokenType.WHITESPACE, TokenType.NEWLINE].includes(tokens[pos].type)) {
|
|
121
122
|
pos++;
|
|
122
123
|
}
|
|
123
124
|
return tokens.slice(start, pos);
|
|
@@ -126,9 +127,9 @@ function eat_whitespaces(tokens: Token[], start: number): Token[] {
|
|
|
126
127
|
|
|
127
128
|
function eat_parenthesis(tokens: Token[], start: number): Token | null {
|
|
128
129
|
const firstToken = tokens[start];
|
|
129
|
-
if (firstToken.type ===
|
|
130
|
+
if (firstToken.type === TokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
|
|
130
131
|
return firstToken;
|
|
131
|
-
} else if (firstToken.type ===
|
|
132
|
+
} else if (firstToken.type === TokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
132
133
|
return firstToken;
|
|
133
134
|
} else {
|
|
134
135
|
return null;
|
|
@@ -137,7 +138,7 @@ function eat_parenthesis(tokens: Token[], start: number): Token | null {
|
|
|
137
138
|
|
|
138
139
|
function eat_primes(tokens: Token[], start: number): number {
|
|
139
140
|
let pos = start;
|
|
140
|
-
while (pos < tokens.length && token_eq(tokens[pos], { type:
|
|
141
|
+
while (pos < tokens.length && token_eq(tokens[pos], { type: TokenType.ELEMENT, value: "'" })) {
|
|
141
142
|
pos += 1;
|
|
142
143
|
}
|
|
143
144
|
return pos - start;
|
|
@@ -155,8 +156,8 @@ function eat_command_name(latex: string, start: number): string {
|
|
|
155
156
|
|
|
156
157
|
|
|
157
158
|
|
|
158
|
-
const LEFT_COMMAND: Token = { type:
|
|
159
|
-
const RIGHT_COMMAND: Token = { type:
|
|
159
|
+
const LEFT_COMMAND: Token = { type: TokenType.COMMAND, value: '\\left' };
|
|
160
|
+
const RIGHT_COMMAND: Token = { type: TokenType.COMMAND, value: '\\right' };
|
|
160
161
|
|
|
161
162
|
function find_closing_right_command(tokens: Token[], start: number): number {
|
|
162
163
|
let count = 1;
|
|
@@ -178,8 +179,8 @@ function find_closing_right_command(tokens: Token[], start: number): number {
|
|
|
178
179
|
}
|
|
179
180
|
|
|
180
181
|
|
|
181
|
-
const BEGIN_COMMAND: Token = { type:
|
|
182
|
-
const END_COMMAND: Token = { type:
|
|
182
|
+
const BEGIN_COMMAND: Token = { type: TokenType.COMMAND, value: '\\begin' };
|
|
183
|
+
const END_COMMAND: Token = { type: TokenType.COMMAND, value: '\\end' };
|
|
183
184
|
|
|
184
185
|
|
|
185
186
|
function find_closing_end_command(tokens: Token[], start: number): number {
|
|
@@ -226,12 +227,7 @@ function find_closing_curly_bracket_char(latex: string, start: number): number {
|
|
|
226
227
|
}
|
|
227
228
|
|
|
228
229
|
|
|
229
|
-
|
|
230
|
-
type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown';
|
|
231
|
-
value: string;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
function tokenize(latex: string): Token[] {
|
|
230
|
+
export function tokenize(latex: string): Token[] {
|
|
235
231
|
const tokens: Token[] = [];
|
|
236
232
|
let pos = 0;
|
|
237
233
|
|
|
@@ -244,7 +240,7 @@ function tokenize(latex: string): Token[] {
|
|
|
244
240
|
while (newPos < latex.length && latex[newPos] !== '\n') {
|
|
245
241
|
newPos += 1;
|
|
246
242
|
}
|
|
247
|
-
token = { type:
|
|
243
|
+
token = { type: TokenType.COMMENT, value: latex.slice(pos + 1, newPos) };
|
|
248
244
|
pos = newPos;
|
|
249
245
|
break;
|
|
250
246
|
}
|
|
@@ -253,19 +249,19 @@ function tokenize(latex: string): Token[] {
|
|
|
253
249
|
case '_':
|
|
254
250
|
case '^':
|
|
255
251
|
case '&':
|
|
256
|
-
token = { type:
|
|
252
|
+
token = { type: TokenType.CONTROL, value: firstChar};
|
|
257
253
|
pos++;
|
|
258
254
|
break;
|
|
259
255
|
case '\n':
|
|
260
|
-
token = { type:
|
|
256
|
+
token = { type: TokenType.NEWLINE, value: firstChar};
|
|
261
257
|
pos++;
|
|
262
258
|
break;
|
|
263
259
|
case '\r': {
|
|
264
260
|
if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
|
|
265
|
-
token = { type:
|
|
261
|
+
token = { type: TokenType.NEWLINE, value: '\n' };
|
|
266
262
|
pos += 2;
|
|
267
263
|
} else {
|
|
268
|
-
token = { type:
|
|
264
|
+
token = { type: TokenType.NEWLINE, value: '\n' };
|
|
269
265
|
pos ++;
|
|
270
266
|
}
|
|
271
267
|
break;
|
|
@@ -275,7 +271,7 @@ function tokenize(latex: string): Token[] {
|
|
|
275
271
|
while (newPos < latex.length && latex[newPos] === ' ') {
|
|
276
272
|
newPos += 1;
|
|
277
273
|
}
|
|
278
|
-
token = {type:
|
|
274
|
+
token = {type: TokenType.WHITESPACE, value: latex.slice(pos, newPos)};
|
|
279
275
|
pos = newPos;
|
|
280
276
|
break;
|
|
281
277
|
}
|
|
@@ -285,12 +281,12 @@ function tokenize(latex: string): Token[] {
|
|
|
285
281
|
}
|
|
286
282
|
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
287
283
|
if (['\\\\', '\\,'].includes(firstTwoChars)) {
|
|
288
|
-
token = { type:
|
|
284
|
+
token = { type: TokenType.CONTROL, value: firstTwoChars };
|
|
289
285
|
} else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
|
|
290
|
-
token = { type:
|
|
286
|
+
token = { type: TokenType.ELEMENT, value: firstTwoChars };
|
|
291
287
|
} else {
|
|
292
288
|
const command = eat_command_name(latex, pos + 1);
|
|
293
|
-
token = { type:
|
|
289
|
+
token = { type: TokenType.COMMAND, value: '\\' + command};
|
|
294
290
|
}
|
|
295
291
|
pos += token.value.length;
|
|
296
292
|
break;
|
|
@@ -301,13 +297,13 @@ function tokenize(latex: string): Token[] {
|
|
|
301
297
|
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
302
298
|
newPos += 1;
|
|
303
299
|
}
|
|
304
|
-
token = { type:
|
|
300
|
+
token = { type: TokenType.ELEMENT, value: latex.slice(pos, newPos) }
|
|
305
301
|
} else if (isalpha(firstChar)) {
|
|
306
|
-
token = { type:
|
|
302
|
+
token = { type: TokenType.ELEMENT, value: firstChar };
|
|
307
303
|
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
308
|
-
token = { type:
|
|
304
|
+
token = { type: TokenType.ELEMENT, value: firstChar }
|
|
309
305
|
} else {
|
|
310
|
-
token = { type:
|
|
306
|
+
token = { type: TokenType.UNKNOWN, value: firstChar };
|
|
311
307
|
}
|
|
312
308
|
pos += token.value.length;
|
|
313
309
|
}
|
|
@@ -315,11 +311,11 @@ function tokenize(latex: string): Token[] {
|
|
|
315
311
|
|
|
316
312
|
tokens.push(token);
|
|
317
313
|
|
|
318
|
-
if (token.type ===
|
|
314
|
+
if (token.type === TokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
|
|
319
315
|
if (pos >= latex.length || latex[pos] !== '{') {
|
|
320
316
|
throw new LatexParserError(`No content for ${token.value} command`);
|
|
321
317
|
}
|
|
322
|
-
tokens.push({ type:
|
|
318
|
+
tokens.push({ type: TokenType.CONTROL, value: '{' });
|
|
323
319
|
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
324
320
|
pos++;
|
|
325
321
|
let textInside = latex.slice(pos, posClosingBracket);
|
|
@@ -328,8 +324,8 @@ function tokenize(latex: string): Token[] {
|
|
|
328
324
|
for (const char of chars) {
|
|
329
325
|
textInside = textInside.replaceAll('\\' + char, char);
|
|
330
326
|
}
|
|
331
|
-
tokens.push({ type:
|
|
332
|
-
tokens.push({ type:
|
|
327
|
+
tokens.push({ type: TokenType.TEXT, value: textInside });
|
|
328
|
+
tokens.push({ type: TokenType.CONTROL, value: '}' });
|
|
333
329
|
pos = posClosingBracket + 1;
|
|
334
330
|
}
|
|
335
331
|
}
|
|
@@ -351,8 +347,8 @@ export class LatexParserError extends Error {
|
|
|
351
347
|
|
|
352
348
|
type ParseResult = [TexNode, number];
|
|
353
349
|
|
|
354
|
-
const SUB_SYMBOL:Token = { type:
|
|
355
|
-
const SUP_SYMBOL:Token = { type:
|
|
350
|
+
const SUB_SYMBOL:Token = { type: TokenType.CONTROL, value: '_' };
|
|
351
|
+
const SUP_SYMBOL:Token = { type: TokenType.CONTROL, value: '^' };
|
|
356
352
|
|
|
357
353
|
export class LatexParser {
|
|
358
354
|
space_sensitive: boolean;
|
|
@@ -443,7 +439,7 @@ export class LatexParser {
|
|
|
443
439
|
if (num_prime > 0) {
|
|
444
440
|
res.sup = { type: 'ordgroup', content: '', args: [] };
|
|
445
441
|
for (let i = 0; i < num_prime; i++) {
|
|
446
|
-
res.sup.args!.push({ type: '
|
|
442
|
+
res.sup.args!.push({ type: 'element', content: "'" });
|
|
447
443
|
}
|
|
448
444
|
if (sup) {
|
|
449
445
|
res.sup.args!.push(sup);
|
|
@@ -464,13 +460,17 @@ export class LatexParser {
|
|
|
464
460
|
const firstToken = tokens[start];
|
|
465
461
|
const tokenType = firstToken.type;
|
|
466
462
|
switch (tokenType) {
|
|
467
|
-
case
|
|
468
|
-
|
|
469
|
-
case
|
|
470
|
-
|
|
471
|
-
case
|
|
472
|
-
return [{ type:
|
|
473
|
-
case
|
|
463
|
+
case TokenType.ELEMENT:
|
|
464
|
+
return [{ type: 'element', content: firstToken.value }, start + 1];
|
|
465
|
+
case TokenType.TEXT:
|
|
466
|
+
return [{ type: 'text', content: firstToken.value }, start + 1];
|
|
467
|
+
case TokenType.COMMENT:
|
|
468
|
+
return [{ type: 'comment', content: firstToken.value }, start + 1];
|
|
469
|
+
case TokenType.WHITESPACE:
|
|
470
|
+
return [{ type: 'whitespace', content: firstToken.value }, start + 1];
|
|
471
|
+
case TokenType.NEWLINE:
|
|
472
|
+
return [{ type: 'newline', content: firstToken.value }, start + 1];
|
|
473
|
+
case TokenType.COMMAND:
|
|
474
474
|
if (token_eq(firstToken, BEGIN_COMMAND)) {
|
|
475
475
|
return this.parseBeginEndExpr(tokens, start);
|
|
476
476
|
} else if (token_eq(firstToken, LEFT_COMMAND)) {
|
|
@@ -478,7 +478,7 @@ export class LatexParser {
|
|
|
478
478
|
} else {
|
|
479
479
|
return this.parseCommandExpr(tokens, start);
|
|
480
480
|
}
|
|
481
|
-
case
|
|
481
|
+
case TokenType.CONTROL:
|
|
482
482
|
const controlChar = firstToken.value;
|
|
483
483
|
switch (controlChar) {
|
|
484
484
|
case '{':
|
|
@@ -492,22 +492,10 @@ export class LatexParser {
|
|
|
492
492
|
case '\\,':
|
|
493
493
|
return [{ type: 'control', content: '\\,' }, start + 1];
|
|
494
494
|
case '_': {
|
|
495
|
-
|
|
496
|
-
let sup: TexNode | undefined = undefined;
|
|
497
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
|
|
498
|
-
[sup, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
499
|
-
}
|
|
500
|
-
const subData = { base: EMPTY_NODE, sub, sup };
|
|
501
|
-
return [{ type: 'supsub', content: '', data: subData }, pos];
|
|
495
|
+
return [ EMPTY_NODE, start];
|
|
502
496
|
}
|
|
503
497
|
case '^': {
|
|
504
|
-
|
|
505
|
-
let sub: TexNode | undefined = undefined;
|
|
506
|
-
if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
|
|
507
|
-
[sub, pos] = this.parseNextExpr(tokens, pos + 1);
|
|
508
|
-
}
|
|
509
|
-
const supData = { base: EMPTY_NODE, sub, sup };
|
|
510
|
-
return [{ type: 'supsub', content: '', data: supData }, pos];
|
|
498
|
+
return [ EMPTY_NODE, start];
|
|
511
499
|
}
|
|
512
500
|
case '&':
|
|
513
501
|
return [{ type: 'control', content: '&' }, start + 1];
|
|
@@ -520,7 +508,7 @@ export class LatexParser {
|
|
|
520
508
|
}
|
|
521
509
|
|
|
522
510
|
parseCommandExpr(tokens: Token[], start: number): ParseResult {
|
|
523
|
-
assert(tokens[start].type ===
|
|
511
|
+
assert(tokens[start].type === TokenType.COMMAND);
|
|
524
512
|
|
|
525
513
|
const command = tokens[start].value; // command name starts with a \
|
|
526
514
|
|
|
@@ -530,35 +518,42 @@ export class LatexParser {
|
|
|
530
518
|
throw new LatexParserError('Unexpected command: ' + command);
|
|
531
519
|
}
|
|
532
520
|
|
|
521
|
+
|
|
533
522
|
const paramNum = get_command_param_num(command.slice(1));
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
const posLeftSquareBracket = pos;
|
|
539
|
-
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
540
|
-
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
541
|
-
const exponent = this.parse(exprInside);
|
|
542
|
-
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
543
|
-
return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
|
|
544
|
-
} else if (command === '\\text') {
|
|
545
|
-
if (pos + 2 >= tokens.length) {
|
|
546
|
-
throw new LatexParserError('Expecting content for \\text command');
|
|
523
|
+
switch (paramNum) {
|
|
524
|
+
case 0:
|
|
525
|
+
if (!symbolMap.has(command.slice(1))) {
|
|
526
|
+
return [{ type: 'unknownMacro', content: command }, pos];
|
|
547
527
|
}
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
528
|
+
return [{ type: 'symbol', content: command }, pos];
|
|
529
|
+
case 1: {
|
|
530
|
+
if (command === '\\sqrt' && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
|
|
531
|
+
const posLeftSquareBracket = pos;
|
|
532
|
+
const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
|
|
533
|
+
const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
|
|
534
|
+
const exponent = this.parse(exprInside);
|
|
535
|
+
const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
|
|
536
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
|
|
537
|
+
} else if (command === '\\text') {
|
|
538
|
+
if (pos + 2 >= tokens.length) {
|
|
539
|
+
throw new LatexParserError('Expecting content for \\text command');
|
|
540
|
+
}
|
|
541
|
+
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
542
|
+
assert(tokens[pos + 1].type === TokenType.TEXT);
|
|
543
|
+
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
544
|
+
const text = tokens[pos + 1].value;
|
|
545
|
+
return [{ type: 'text', content: text }, pos + 3];
|
|
546
|
+
}
|
|
547
|
+
let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
548
|
+
return [{ type: 'unaryFunc', content: command, args: [arg1] }, newPos];
|
|
553
549
|
}
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
throw new Error( 'Invalid number of parameters');
|
|
550
|
+
case 2: {
|
|
551
|
+
const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
|
|
552
|
+
const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
|
|
553
|
+
return [{ type: 'binaryFunc', content: command, args: [arg1, arg2] }, pos2];
|
|
554
|
+
}
|
|
555
|
+
default:
|
|
556
|
+
throw new Error( 'Invalid number of parameters');
|
|
562
557
|
}
|
|
563
558
|
}
|
|
564
559
|
|
|
@@ -598,12 +593,12 @@ export class LatexParser {
|
|
|
598
593
|
|
|
599
594
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
600
595
|
const body = this.parse(exprInside);
|
|
601
|
-
const args = [
|
|
596
|
+
const args: TexNode[] = [
|
|
602
597
|
{ type: 'element', content: leftDelimiter.value },
|
|
603
598
|
body,
|
|
604
599
|
{ type: 'element', content: rightDelimiter.value }
|
|
605
600
|
]
|
|
606
|
-
const res = { type: 'leftright', content: '', args: args };
|
|
601
|
+
const res: TexNode = { type: 'leftright', content: '', args: args };
|
|
607
602
|
return [res, pos];
|
|
608
603
|
}
|
|
609
604
|
|
|
@@ -612,7 +607,7 @@ export class LatexParser {
|
|
|
612
607
|
|
|
613
608
|
let pos = start + 1;
|
|
614
609
|
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
615
|
-
assert(tokens[pos + 1].type ===
|
|
610
|
+
assert(tokens[pos + 1].type === TokenType.TEXT);
|
|
616
611
|
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
617
612
|
const envName = tokens[pos + 1].value;
|
|
618
613
|
pos += 3;
|
|
@@ -629,7 +624,7 @@ export class LatexParser {
|
|
|
629
624
|
pos = endIdx + 1;
|
|
630
625
|
|
|
631
626
|
assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
|
|
632
|
-
assert(tokens[pos + 1].type ===
|
|
627
|
+
assert(tokens[pos + 1].type === TokenType.TEXT);
|
|
633
628
|
assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
|
|
634
629
|
if (tokens[pos + 1].value !== envName) {
|
|
635
630
|
throw new LatexParserError('Mismatched \\begin and \\end environments');
|
|
@@ -638,11 +633,11 @@ export class LatexParser {
|
|
|
638
633
|
|
|
639
634
|
const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
|
|
640
635
|
// ignore whitespaces and '\n' before \end{envName}
|
|
641
|
-
while(exprInside.length > 0 && [
|
|
636
|
+
while(exprInside.length > 0 && [TokenType.WHITESPACE, TokenType.NEWLINE].includes(exprInside[exprInside.length - 1].type)) {
|
|
642
637
|
exprInside.pop();
|
|
643
638
|
}
|
|
644
639
|
const body = this.parseAligned(exprInside);
|
|
645
|
-
const res = { type: 'beginend', content: envName, data: body };
|
|
640
|
+
const res: TexNode = { type: 'beginend', content: envName, data: body };
|
|
646
641
|
return [res, pos];
|
|
647
642
|
}
|
|
648
643
|
|
|
@@ -677,17 +672,40 @@ export class LatexParser {
|
|
|
677
672
|
}
|
|
678
673
|
}
|
|
679
674
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
const
|
|
683
|
-
let
|
|
684
|
-
for (
|
|
685
|
-
if (
|
|
675
|
+
// Remove all whitespace before or after _ or ^
|
|
676
|
+
function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] {
|
|
677
|
+
const is_script_mark = (token: Token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
|
|
678
|
+
let out_tokens: Token[] = [];
|
|
679
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
680
|
+
if (tokens[i].type === TokenType.WHITESPACE && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
if (tokens[i].type === TokenType.WHITESPACE && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
out_tokens.push(tokens[i]);
|
|
687
|
+
}
|
|
688
|
+
return out_tokens;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// expand custom tex macros
|
|
692
|
+
function passExpandCustomTexMacros(tokens: Token[], customTexMacros: {[key: string]: string}): Token[] {
|
|
693
|
+
let out_tokens: Token[] = [];
|
|
694
|
+
for (const token of tokens) {
|
|
695
|
+
if (token.type === TokenType.COMMAND && customTexMacros[token.value]) {
|
|
686
696
|
const expanded_tokens = tokenize(customTexMacros[token.value]);
|
|
687
|
-
|
|
697
|
+
out_tokens = out_tokens.concat(expanded_tokens);
|
|
688
698
|
} else {
|
|
689
|
-
|
|
699
|
+
out_tokens.push(token);
|
|
690
700
|
}
|
|
691
701
|
}
|
|
692
|
-
return
|
|
702
|
+
return out_tokens;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
|
|
706
|
+
const parser = new LatexParser();
|
|
707
|
+
let tokens = tokenize(tex);
|
|
708
|
+
tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
|
|
709
|
+
tokens = passExpandCustomTexMacros(tokens, customTexMacros);
|
|
710
|
+
return parser.parse(tokens);
|
|
693
711
|
}
|
package/src/types.ts
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
export enum TokenType {
|
|
2
|
+
ELEMENT,
|
|
3
|
+
COMMAND,
|
|
4
|
+
TEXT,
|
|
5
|
+
COMMENT,
|
|
6
|
+
WHITESPACE,
|
|
7
|
+
NEWLINE,
|
|
8
|
+
CONTROL,
|
|
9
|
+
UNKNOWN,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface Token {
|
|
13
|
+
type: TokenType;
|
|
14
|
+
value: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
1
18
|
export interface TexSupsubData {
|
|
2
19
|
base: TexNode;
|
|
3
20
|
sup?: TexNode;
|
|
@@ -9,7 +26,8 @@ export type TexSqrtData = TexNode;
|
|
|
9
26
|
export type TexArrayData = TexNode[][];
|
|
10
27
|
|
|
11
28
|
export interface TexNode {
|
|
12
|
-
type:
|
|
29
|
+
type: 'element' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'ordgroup' | 'supsub'
|
|
30
|
+
| 'unaryFunc' | 'binaryFunc' | 'leftright' | 'beginend' | 'symbol' | 'empty' | 'unknownMacro';
|
|
13
31
|
content: string;
|
|
14
32
|
args?: TexNode[];
|
|
15
33
|
// position?: Position;
|
|
@@ -19,10 +37,20 @@ export interface TexNode {
|
|
|
19
37
|
data?: TexSqrtData | TexSupsubData | TexArrayData;
|
|
20
38
|
}
|
|
21
39
|
|
|
40
|
+
export interface TypstSupsubData {
|
|
41
|
+
base: TypstNode;
|
|
42
|
+
sup?: TypstNode;
|
|
43
|
+
sub?: TypstNode;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type TypstArrayData = TypstNode[][];
|
|
47
|
+
|
|
22
48
|
export interface TypstNode {
|
|
23
|
-
type: 'atom' | 'symbol' | 'text' | 'softSpace' | 'comment' | 'newline'
|
|
49
|
+
type: 'atom' | 'symbol' | 'text' | 'softSpace' | 'comment' | 'newline'
|
|
50
|
+
| 'empty' | 'group' | 'supsub' | 'unaryFunc' | 'binaryFunc' | 'align' | 'matrix' | 'unknown';
|
|
24
51
|
content: string;
|
|
25
52
|
args?: TypstNode[];
|
|
53
|
+
data?: TypstSupsubData | TypstArrayData;
|
|
26
54
|
}
|
|
27
55
|
|
|
28
56
|
export interface Tex2TypstOptions {
|