tex2typst 0.3.17 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/index.js +230 -172
- package/dist/tex-parser.d.ts +0 -1
- package/dist/tex-tokenizer.d.ts +4 -0
- package/dist/tex2typst.min.js +11 -11
- package/dist/types.d.ts +13 -7
- package/dist/typst-parser.d.ts +0 -1
- package/dist/typst-tokenizer.d.ts +2 -0
- package/dist/typst-writer.d.ts +3 -1
- package/package.json +1 -1
- package/src/convert.ts +129 -69
- package/src/index.ts +1 -1
- package/src/map.ts +2 -0
- package/src/tex-parser.ts +6 -137
- package/src/tex-tokenizer.ts +138 -0
- package/src/types.ts +20 -7
- package/src/typst-parser.ts +1 -74
- package/src/typst-tokenizer.ts +76 -0
- package/src/typst-writer.ts +36 -18
- package/TODO.md +0 -1
- package/docs/api-reference.md +0 -64
- package/tools/make-shorthand-map.py +0 -33
- package/tools/make-symbol-map.py +0 -35
package/src/convert.ts
CHANGED
|
@@ -22,9 +22,6 @@ function tex_token_to_typst(token: string): string {
|
|
|
22
22
|
return token;
|
|
23
23
|
} else if (token === '/') {
|
|
24
24
|
return '\\/';
|
|
25
|
-
} else if (token === '\\|') {
|
|
26
|
-
// \| in LaTeX is double vertical bar looks like ||
|
|
27
|
-
return 'parallel';
|
|
28
25
|
} else if (token === '\\\\') {
|
|
29
26
|
return '\\';
|
|
30
27
|
} else if (['\\$', '\\#', '\\&', '\\_'].includes(token)) {
|
|
@@ -49,40 +46,52 @@ function tex_token_to_typst(token: string): string {
|
|
|
49
46
|
function convert_overset(node: TexNode, options: Tex2TypstOptions): TypstNode {
|
|
50
47
|
const [sup, base] = node.args!;
|
|
51
48
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
}
|
|
56
|
-
// \overset{def}{=} is also considered as eq.def
|
|
57
|
-
if (n.type === 'ordgroup' && n.args!.length === 3) {
|
|
58
|
-
const [a1, a2, a3] = n.args!;
|
|
59
|
-
const d = new TexNode('element', 'd');
|
|
60
|
-
const e = new TexNode('element', 'e');
|
|
61
|
-
const f = new TexNode('element', 'f');
|
|
62
|
-
if (a1.eq(d) && a2.eq(e) && a3.eq(f)) {
|
|
49
|
+
if (options.optimize) {
|
|
50
|
+
const is_def = (n: TexNode): boolean => {
|
|
51
|
+
if (n.eq(new TexNode('text', 'def'))) {
|
|
63
52
|
return true;
|
|
64
53
|
}
|
|
54
|
+
// \overset{def}{=} is also considered as eq.def
|
|
55
|
+
if (n.type === 'ordgroup' && n.args!.length === 3) {
|
|
56
|
+
const [a1, a2, a3] = n.args!;
|
|
57
|
+
const d = new TexNode('element', 'd');
|
|
58
|
+
const e = new TexNode('element', 'e');
|
|
59
|
+
const f = new TexNode('element', 'f');
|
|
60
|
+
if (a1.eq(d) && a2.eq(e) && a3.eq(f)) {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return false;
|
|
65
|
+
};
|
|
66
|
+
const is_eq = (n: TexNode): boolean => n.eq(new TexNode('element', '='));
|
|
67
|
+
if (is_def(sup) && is_eq(base)) {
|
|
68
|
+
return new TypstNode('symbol', 'eq.def');
|
|
65
69
|
}
|
|
66
|
-
return false;
|
|
67
|
-
};
|
|
68
|
-
const is_eq = (n: TexNode): boolean => n.eq(new TexNode('element', '='));
|
|
69
|
-
if (is_def(sup) && is_eq(base)) {
|
|
70
|
-
return new TypstNode('symbol', 'eq.def');
|
|
71
70
|
}
|
|
72
71
|
const limits_call = new TypstNode(
|
|
73
72
|
'funcCall',
|
|
74
73
|
'limits',
|
|
75
74
|
[convert_tex_node_to_typst(base, options)]
|
|
76
75
|
);
|
|
77
|
-
return new TypstNode(
|
|
78
|
-
'supsub',
|
|
79
|
-
'',
|
|
80
|
-
[],
|
|
81
|
-
{
|
|
76
|
+
return new TypstNode('supsub', '', [], {
|
|
82
77
|
base: limits_call,
|
|
83
78
|
sup: convert_tex_node_to_typst(sup, options),
|
|
84
|
-
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// \underset{X}{Y} -> limits(Y)_X
|
|
83
|
+
function convert_underset(node: TexNode, options: Tex2TypstOptions): TypstNode {
|
|
84
|
+
const [sub, base] = node.args!;
|
|
85
|
+
|
|
86
|
+
const limits_call = new TypstNode(
|
|
87
|
+
'funcCall',
|
|
88
|
+
'limits',
|
|
89
|
+
[convert_tex_node_to_typst(base, options)]
|
|
85
90
|
);
|
|
91
|
+
return new TypstNode('supsub', '', [], {
|
|
92
|
+
base: limits_call,
|
|
93
|
+
sub: convert_tex_node_to_typst(sub, options),
|
|
94
|
+
});
|
|
86
95
|
}
|
|
87
96
|
|
|
88
97
|
|
|
@@ -117,7 +126,7 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
|
|
|
117
126
|
case 'supsub': {
|
|
118
127
|
let { base, sup, sub } = node.data as TexSupsubData;
|
|
119
128
|
|
|
120
|
-
//
|
|
129
|
+
// special hook for overbrace
|
|
121
130
|
if (base && base.type === 'unaryFunc' && base.content === '\\overbrace' && sup) {
|
|
122
131
|
return new TypstNode(
|
|
123
132
|
'funcCall',
|
|
@@ -150,40 +159,61 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
|
|
|
150
159
|
return new TypstNode('supsub', '', [], data);
|
|
151
160
|
}
|
|
152
161
|
case 'leftright': {
|
|
153
|
-
const [left,
|
|
154
|
-
|
|
155
|
-
|
|
162
|
+
const [left, _body, right] = node.args!;
|
|
163
|
+
const [typ_left, typ_body, typ_right] = node.args!.map((n) => convert_tex_node_to_typst(n, options));
|
|
164
|
+
|
|
165
|
+
if (options.optimize) {
|
|
166
|
+
// optimization off: "lr(bar.v.double a + 1/2 bar.v.double)"
|
|
167
|
+
// optimization on : "norm(a + 1/2)"
|
|
168
|
+
if (left.content === '\\|' && right.content === '\\|') {
|
|
169
|
+
return new TypstNode('funcCall', 'norm', [typ_body]);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// These pairs will be handled by Typst compiler by default. No need to add lr()
|
|
173
|
+
if ([
|
|
174
|
+
"[]", "()", "\\{\\}",
|
|
175
|
+
"\\lfloor\\rfloor",
|
|
176
|
+
"\\lceil\\rceil",
|
|
177
|
+
"\\lfloor\\rceil",
|
|
178
|
+
].includes(left.content + right.content)) {
|
|
179
|
+
return new TypstNode('group', '', [typ_left, typ_body, typ_right]);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const group = new TypstNode(
|
|
156
184
|
'group',
|
|
157
185
|
'',
|
|
158
|
-
|
|
186
|
+
[typ_left, typ_body, typ_right]
|
|
159
187
|
);
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
188
|
+
|
|
189
|
+
// "\left\{ a + \frac{1}{3} \right." -> "lr(\{ a + 1/3)"
|
|
190
|
+
// "\left. a + \frac{1}{3} \right\}" -> "lr( a + \frac{1}{3} \})"
|
|
191
|
+
// Note that: In lr(), if one side of delimiter doesn't present (i.e. derived from "\\left." or "\\right."),
|
|
192
|
+
// "(", ")", "{", "[", should be escaped with "\" to be the other side of delimiter.
|
|
193
|
+
// Simple "lr({ a+1/3)" doesn't compile in Typst.
|
|
194
|
+
const escape_curly_or_paren = function(s: string): string {
|
|
195
|
+
if (["(", ")", "{", "["].includes(s)) {
|
|
196
|
+
return "\\" + s;
|
|
197
|
+
} else {
|
|
198
|
+
return s;
|
|
199
|
+
}
|
|
200
|
+
};
|
|
170
201
|
if (right.content === '.') {
|
|
171
|
-
|
|
172
|
-
|
|
202
|
+
typ_left.content = escape_curly_or_paren(typ_left.content);
|
|
203
|
+
group.args = [typ_left, typ_body];
|
|
173
204
|
} else if (left.content === '.') {
|
|
174
|
-
|
|
175
|
-
|
|
205
|
+
typ_right.content = escape_curly_or_paren(typ_right.content);
|
|
206
|
+
group.args = [typ_body, typ_right];
|
|
176
207
|
}
|
|
177
|
-
return new TypstNode(
|
|
178
|
-
'funcCall',
|
|
179
|
-
'lr',
|
|
180
|
-
[group]
|
|
181
|
-
);
|
|
208
|
+
return new TypstNode('funcCall', 'lr', [group]);
|
|
182
209
|
}
|
|
183
210
|
case 'binaryFunc': {
|
|
184
211
|
if (node.content === '\\overset') {
|
|
185
212
|
return convert_overset(node, options);
|
|
186
213
|
}
|
|
214
|
+
if (node.content === '\\underset') {
|
|
215
|
+
return convert_underset(node, options);
|
|
216
|
+
}
|
|
187
217
|
// \frac{a}{b} -> a / b
|
|
188
218
|
if (node.content === '\\frac') {
|
|
189
219
|
if (options.fracToSlash) {
|
|
@@ -246,17 +276,14 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
|
|
|
246
276
|
}
|
|
247
277
|
// \operatorname{opname} -> op("opname")
|
|
248
278
|
if (node.content === '\\operatorname') {
|
|
249
|
-
const text = arg0.content;
|
|
250
279
|
|
|
251
|
-
if (
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
'op',
|
|
257
|
-
[arg0]
|
|
258
|
-
);
|
|
280
|
+
if (options.optimize) {
|
|
281
|
+
const text = arg0.content;
|
|
282
|
+
if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
283
|
+
return new TypstNode('symbol', text);
|
|
284
|
+
}
|
|
259
285
|
}
|
|
286
|
+
return new TypstNode('funcCall', 'op', [arg0]);
|
|
260
287
|
}
|
|
261
288
|
// \hspace{1cm} -> #h(1cm)
|
|
262
289
|
// TODO: reverse conversion support for this
|
|
@@ -409,6 +436,8 @@ const TYPST_UNARY_FUNCTIONS: string[] = [
|
|
|
409
436
|
'frak',
|
|
410
437
|
'floor',
|
|
411
438
|
'ceil',
|
|
439
|
+
'norm',
|
|
440
|
+
'limits',
|
|
412
441
|
];
|
|
413
442
|
|
|
414
443
|
const TYPST_BINARY_FUNCTIONS: string[] = [
|
|
@@ -428,8 +457,6 @@ function apply_escape_if_needed(c: string) {
|
|
|
428
457
|
function typst_token_to_tex(token: string): string {
|
|
429
458
|
if (/^[a-zA-Z0-9]$/.test(token)) {
|
|
430
459
|
return token;
|
|
431
|
-
} else if (token === 'thin') {
|
|
432
|
-
return '\\,';
|
|
433
460
|
} else if (reverseSymbolMap.has(token)) {
|
|
434
461
|
return '\\' + reverseSymbolMap.get(token)!;
|
|
435
462
|
}
|
|
@@ -491,6 +518,8 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
|
|
|
491
518
|
let left_delim = apply_escape_if_needed(data.leftDelim);
|
|
492
519
|
assert(data.rightDelim !== null, "leftDelim has value but rightDelim not");
|
|
493
520
|
let right_delim = apply_escape_if_needed(data.rightDelim!);
|
|
521
|
+
// TODO: should be TeXNode('leftright', ...)
|
|
522
|
+
// But currently writer will output `\left |` while people commonly prefer `\left|`.
|
|
494
523
|
return new TexNode('ordgroup', '', [
|
|
495
524
|
new TexNode('element', '\\left' + left_delim),
|
|
496
525
|
...node.args!.map(convert_typst_node_to_tex),
|
|
@@ -500,17 +529,29 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
|
|
|
500
529
|
return new TexNode('ordgroup', '', node.args!.map(convert_typst_node_to_tex));
|
|
501
530
|
}
|
|
502
531
|
}
|
|
532
|
+
// special hook for norm
|
|
533
|
+
// `\| a \|` <- `norm(a)`
|
|
534
|
+
// `\left\| a + \frac{1}{3} \right\|` <- `norm(a + 1/3)`
|
|
535
|
+
if (node.content === 'norm') {
|
|
536
|
+
const arg0 = node.args![0];
|
|
537
|
+
const tex_node_type = node.isOverHigh() ? 'leftright' : 'ordgroup';
|
|
538
|
+
return new TexNode(tex_node_type, '', [
|
|
539
|
+
new TexNode('symbol', "\\|"),
|
|
540
|
+
convert_typst_node_to_tex(arg0),
|
|
541
|
+
new TexNode('symbol', "\\|")
|
|
542
|
+
]);
|
|
543
|
+
}
|
|
503
544
|
// special hook for floor, ceil
|
|
504
|
-
//
|
|
545
|
+
// `\lfloor a \rfloor` <- `floor(a)`
|
|
546
|
+
// `\lceil a \rceil` <- `ceil(a)`
|
|
547
|
+
// `\left\lfloor a \right\rfloor` <- `floor(a)`
|
|
548
|
+
// `\left\lceil a \right\rceil` <- `ceil(a)`
|
|
505
549
|
if (node.content === 'floor' || node.content === 'ceil') {
|
|
506
|
-
|
|
507
|
-
|
|
550
|
+
const left = "\\l" + node.content;
|
|
551
|
+
const right = "\\r" + node.content;
|
|
508
552
|
const arg0 = node.args![0];
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
right = "\\right" + right;
|
|
512
|
-
}
|
|
513
|
-
return new TexNode('ordgroup', '', [
|
|
553
|
+
const tex_node_type = node.isOverHigh() ? 'leftright' : 'ordgroup';
|
|
554
|
+
return new TexNode(tex_node_type, '', [
|
|
514
555
|
new TexNode('symbol', left),
|
|
515
556
|
convert_typst_node_to_tex(arg0),
|
|
516
557
|
new TexNode('symbol', right)
|
|
@@ -552,15 +593,34 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
|
|
|
552
593
|
}
|
|
553
594
|
case 'supsub': {
|
|
554
595
|
const { base, sup, sub } = node.data as TypstSupsubData;
|
|
555
|
-
const base_tex = convert_typst_node_to_tex(base);
|
|
556
596
|
let sup_tex: TexNode | undefined;
|
|
557
597
|
let sub_tex: TexNode | undefined;
|
|
598
|
+
|
|
558
599
|
if (sup) {
|
|
559
600
|
sup_tex = convert_typst_node_to_tex(sup);
|
|
560
601
|
}
|
|
561
602
|
if (sub) {
|
|
562
603
|
sub_tex = convert_typst_node_to_tex(sub);
|
|
563
604
|
}
|
|
605
|
+
|
|
606
|
+
// special hook for limits
|
|
607
|
+
// `limits(+)^a` -> `\overset{a}{+}`
|
|
608
|
+
// `limits(+)_a` -> `\underset{a}{+}`
|
|
609
|
+
// `limits(+)_a^b` -> `\overset{b}{\underset{a}{+}}`
|
|
610
|
+
if (base.eq(new TypstNode('funcCall', 'limits'))) {
|
|
611
|
+
const body_in_limits = convert_typst_node_to_tex(base.args![0]);
|
|
612
|
+
if (sup_tex !== undefined && sub_tex === undefined) {
|
|
613
|
+
return new TexNode('binaryFunc', '\\overset', [sup_tex, body_in_limits]);
|
|
614
|
+
} else if (sup_tex === undefined && sub_tex !== undefined) {
|
|
615
|
+
return new TexNode('binaryFunc', '\\underset', [sub_tex, body_in_limits]);
|
|
616
|
+
} else {
|
|
617
|
+
const underset_call = new TexNode('binaryFunc', '\\underset', [sub_tex!, body_in_limits]);
|
|
618
|
+
return new TexNode('binaryFunc', '\\overset', [sup_tex!, underset_call]);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
const base_tex = convert_typst_node_to_tex(base);
|
|
623
|
+
|
|
564
624
|
const res = new TexNode('supsub', '', [], {
|
|
565
625
|
base: base_tex,
|
|
566
626
|
sup: sup_tex,
|
package/src/index.ts
CHANGED
|
@@ -11,11 +11,11 @@ import { shorthandMap } from "./typst-shorthands";
|
|
|
11
11
|
export function tex2typst(tex: string, options?: Tex2TypstOptions): string {
|
|
12
12
|
const opt: Tex2TypstOptions = {
|
|
13
13
|
nonStrict: true,
|
|
14
|
-
preferTypstIntrinsic: true,
|
|
15
14
|
preferShorthands: true,
|
|
16
15
|
keepSpaces: false,
|
|
17
16
|
fracToSlash: true,
|
|
18
17
|
inftyToOo: false,
|
|
18
|
+
optimize: true,
|
|
19
19
|
nonAsciiWrapper: "",
|
|
20
20
|
customTexMacros: {}
|
|
21
21
|
};
|
package/src/map.ts
CHANGED
package/src/tex-parser.ts
CHANGED
|
@@ -1,53 +1,8 @@
|
|
|
1
1
|
import { symbolMap } from "./map";
|
|
2
2
|
import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
|
|
3
3
|
import { assert } from "./util";
|
|
4
|
-
import { JSLex, Scanner } from "./jslex";
|
|
5
4
|
import { array_find } from "./generic";
|
|
6
|
-
|
|
7
|
-
const UNARY_COMMANDS = [
|
|
8
|
-
'sqrt',
|
|
9
|
-
'text',
|
|
10
|
-
|
|
11
|
-
'bar',
|
|
12
|
-
'bold',
|
|
13
|
-
'boldsymbol',
|
|
14
|
-
'ddot',
|
|
15
|
-
'dot',
|
|
16
|
-
'hat',
|
|
17
|
-
'mathbb',
|
|
18
|
-
'mathbf',
|
|
19
|
-
'mathcal',
|
|
20
|
-
'mathfrak',
|
|
21
|
-
'mathit',
|
|
22
|
-
'mathrm',
|
|
23
|
-
'mathscr',
|
|
24
|
-
'mathsf',
|
|
25
|
-
'mathtt',
|
|
26
|
-
'operatorname',
|
|
27
|
-
'overbrace',
|
|
28
|
-
'overline',
|
|
29
|
-
'pmb',
|
|
30
|
-
'rm',
|
|
31
|
-
'tilde',
|
|
32
|
-
'underbrace',
|
|
33
|
-
'underline',
|
|
34
|
-
'vec',
|
|
35
|
-
'widehat',
|
|
36
|
-
'widetilde',
|
|
37
|
-
'overleftarrow',
|
|
38
|
-
'overrightarrow',
|
|
39
|
-
'hspace',
|
|
40
|
-
]
|
|
41
|
-
|
|
42
|
-
const BINARY_COMMANDS = [
|
|
43
|
-
'frac',
|
|
44
|
-
'tfrac',
|
|
45
|
-
'binom',
|
|
46
|
-
'dbinom',
|
|
47
|
-
'dfrac',
|
|
48
|
-
'tbinom',
|
|
49
|
-
'overset',
|
|
50
|
-
]
|
|
5
|
+
import { TEX_BINARY_COMMANDS, TEX_UNARY_COMMANDS, tokenize_tex } from "./tex-tokenizer";
|
|
51
6
|
|
|
52
7
|
const IGNORED_COMMANDS = [
|
|
53
8
|
'bigl', 'bigr',
|
|
@@ -59,9 +14,9 @@ const IGNORED_COMMANDS = [
|
|
|
59
14
|
const EMPTY_NODE: TexNode = new TexNode('empty', '');
|
|
60
15
|
|
|
61
16
|
function get_command_param_num(command: string): number {
|
|
62
|
-
if (
|
|
17
|
+
if (TEX_UNARY_COMMANDS.includes(command)) {
|
|
63
18
|
return 1;
|
|
64
|
-
} else if (
|
|
19
|
+
} else if (TEX_BINARY_COMMANDS.includes(command)) {
|
|
65
20
|
return 2;
|
|
66
21
|
} else {
|
|
67
22
|
return 0;
|
|
@@ -86,7 +41,7 @@ function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
|
|
|
86
41
|
|
|
87
42
|
function eat_parenthesis(tokens: TexToken[], start: number): TexToken | null {
|
|
88
43
|
const firstToken = tokens[start];
|
|
89
|
-
if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}', '.'].includes(firstToken.value)) {
|
|
44
|
+
if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}', '.', '\\|'].includes(firstToken.value)) {
|
|
90
45
|
return firstToken;
|
|
91
46
|
} else if (firstToken.type === TexTokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
|
|
92
47
|
return firstToken;
|
|
@@ -142,93 +97,6 @@ function find_closing_end_command(tokens: TexToken[], start: number): number {
|
|
|
142
97
|
}
|
|
143
98
|
|
|
144
99
|
|
|
145
|
-
function unescape(str: string): string {
|
|
146
|
-
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
147
|
-
for (const char of chars) {
|
|
148
|
-
str = str.replaceAll('\\' + char, char);
|
|
149
|
-
}
|
|
150
|
-
return str;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
|
|
154
|
-
[
|
|
155
|
-
String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
|
|
156
|
-
const text = s.text()!;
|
|
157
|
-
const command = text.substring(0, text.indexOf('{'));
|
|
158
|
-
const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
|
|
159
|
-
return [
|
|
160
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
161
|
-
new TexToken(TexTokenType.CONTROL, '{'),
|
|
162
|
-
new TexToken(TexTokenType.TEXT, unescape(text_inside)),
|
|
163
|
-
new TexToken(TexTokenType.CONTROL, '}')
|
|
164
|
-
]
|
|
165
|
-
}
|
|
166
|
-
],
|
|
167
|
-
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
|
|
168
|
-
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
169
|
-
[String.raw`\\[\\,:; ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
170
|
-
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
171
|
-
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
|
|
172
|
-
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
173
|
-
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
174
|
-
const text = s.text()!;
|
|
175
|
-
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
|
|
176
|
-
const match = text.match(regex);
|
|
177
|
-
assert(match !== null);
|
|
178
|
-
const command = match![1];
|
|
179
|
-
if (BINARY_COMMANDS.includes(command.substring(1))) {
|
|
180
|
-
const arg1 = match![2].trimStart();
|
|
181
|
-
const arg2 = match![3];
|
|
182
|
-
return [
|
|
183
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
184
|
-
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
185
|
-
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
186
|
-
];
|
|
187
|
-
} else {
|
|
188
|
-
s.reject();
|
|
189
|
-
return [];
|
|
190
|
-
}
|
|
191
|
-
}],
|
|
192
|
-
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
193
|
-
const text = s.text()!;
|
|
194
|
-
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
|
|
195
|
-
const match = text.match(regex);
|
|
196
|
-
assert(match !== null);
|
|
197
|
-
const command = match![1];
|
|
198
|
-
if (UNARY_COMMANDS.includes(command.substring(1))) {
|
|
199
|
-
const arg1 = match![2].trimStart();
|
|
200
|
-
return [
|
|
201
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
202
|
-
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
203
|
-
];
|
|
204
|
-
} else {
|
|
205
|
-
s.reject();
|
|
206
|
-
return [];
|
|
207
|
-
}
|
|
208
|
-
}],
|
|
209
|
-
[String.raw`\\[a-zA-Z]+`, (s) => {
|
|
210
|
-
const command = s.text()!;
|
|
211
|
-
return [ new TexToken(TexTokenType.COMMAND, command), ];
|
|
212
|
-
}],
|
|
213
|
-
// Numbers like "123", "3.14"
|
|
214
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
215
|
-
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
216
|
-
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
217
|
-
// non-ASCII characters
|
|
218
|
-
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
219
|
-
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
|
|
220
|
-
]);
|
|
221
|
-
|
|
222
|
-
const spec = {
|
|
223
|
-
"start": rules_map
|
|
224
|
-
};
|
|
225
|
-
|
|
226
|
-
export function tokenize_tex(input: string): TexToken[] {
|
|
227
|
-
const lexer = new JSLex<TexToken>(spec);
|
|
228
|
-
return lexer.collect(input);
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
|
|
232
100
|
export class LatexParserError extends Error {
|
|
233
101
|
constructor(message: string) {
|
|
234
102
|
super(message);
|
|
@@ -394,6 +262,7 @@ export class LatexParser {
|
|
|
394
262
|
case '}':
|
|
395
263
|
throw new LatexParserError("Unmatched '}'");
|
|
396
264
|
case '\\\\':
|
|
265
|
+
case '\\!':
|
|
397
266
|
case '\\,':
|
|
398
267
|
case '\\:':
|
|
399
268
|
case '\\;':
|
|
@@ -552,7 +421,7 @@ export class LatexParser {
|
|
|
552
421
|
while (pos < tokens.length) {
|
|
553
422
|
const whitespaceCount = eat_whitespaces(tokens, pos).length;
|
|
554
423
|
pos += whitespaceCount;
|
|
555
|
-
|
|
424
|
+
|
|
556
425
|
if (pos >= tokens.length || !tokens[pos].eq(LEFT_CURLY_BRACKET)) {
|
|
557
426
|
break;
|
|
558
427
|
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { TexToken, TexTokenType } from "./types";
|
|
2
|
+
import { assert } from "./util";
|
|
3
|
+
import { JSLex, Scanner } from "./jslex";
|
|
4
|
+
|
|
5
|
+
export const TEX_UNARY_COMMANDS = [
|
|
6
|
+
'sqrt',
|
|
7
|
+
'text',
|
|
8
|
+
|
|
9
|
+
'bar',
|
|
10
|
+
'bold',
|
|
11
|
+
'boldsymbol',
|
|
12
|
+
'ddot',
|
|
13
|
+
'dot',
|
|
14
|
+
'hat',
|
|
15
|
+
'mathbb',
|
|
16
|
+
'mathbf',
|
|
17
|
+
'mathcal',
|
|
18
|
+
'mathfrak',
|
|
19
|
+
'mathit',
|
|
20
|
+
'mathrm',
|
|
21
|
+
'mathscr',
|
|
22
|
+
'mathsf',
|
|
23
|
+
'mathtt',
|
|
24
|
+
'operatorname',
|
|
25
|
+
'overbrace',
|
|
26
|
+
'overline',
|
|
27
|
+
'pmb',
|
|
28
|
+
'rm',
|
|
29
|
+
'tilde',
|
|
30
|
+
'underbrace',
|
|
31
|
+
'underline',
|
|
32
|
+
'vec',
|
|
33
|
+
'widehat',
|
|
34
|
+
'widetilde',
|
|
35
|
+
'overleftarrow',
|
|
36
|
+
'overrightarrow',
|
|
37
|
+
'hspace',
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
export const TEX_BINARY_COMMANDS = [
|
|
41
|
+
'frac',
|
|
42
|
+
'tfrac',
|
|
43
|
+
'binom',
|
|
44
|
+
'dbinom',
|
|
45
|
+
'dfrac',
|
|
46
|
+
'tbinom',
|
|
47
|
+
'overset',
|
|
48
|
+
'underset',
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
function unescape(str: string): string {
|
|
53
|
+
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
54
|
+
for (const char of chars) {
|
|
55
|
+
str = str.replaceAll('\\' + char, char);
|
|
56
|
+
}
|
|
57
|
+
return str;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
|
|
61
|
+
[
|
|
62
|
+
String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
|
|
63
|
+
const text = s.text()!;
|
|
64
|
+
const command = text.substring(0, text.indexOf('{'));
|
|
65
|
+
const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
|
|
66
|
+
return [
|
|
67
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
68
|
+
new TexToken(TexTokenType.CONTROL, '{'),
|
|
69
|
+
new TexToken(TexTokenType.TEXT, unescape(text_inside)),
|
|
70
|
+
new TexToken(TexTokenType.CONTROL, '}')
|
|
71
|
+
]
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
|
|
75
|
+
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
76
|
+
[String.raw`\\[\\,:;! ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
77
|
+
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
78
|
+
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
|
|
79
|
+
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
80
|
+
// e.g. match `\frac13`, `\frac1 b`, `\frac a b`
|
|
81
|
+
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
82
|
+
const text = s.text()!;
|
|
83
|
+
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
|
|
84
|
+
const match = text.match(regex);
|
|
85
|
+
assert(match !== null);
|
|
86
|
+
const command = match![1];
|
|
87
|
+
if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
|
|
88
|
+
const arg1 = match![2].trimStart();
|
|
89
|
+
const arg2 = match![3];
|
|
90
|
+
return [
|
|
91
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
92
|
+
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
93
|
+
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
94
|
+
];
|
|
95
|
+
} else {
|
|
96
|
+
s.reject();
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
}],
|
|
100
|
+
// e.g. match `\sqrt3`, `\sqrt a`
|
|
101
|
+
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
102
|
+
const text = s.text()!;
|
|
103
|
+
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
|
|
104
|
+
const match = text.match(regex);
|
|
105
|
+
assert(match !== null);
|
|
106
|
+
const command = match![1];
|
|
107
|
+
if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
|
|
108
|
+
const arg1 = match![2].trimStart();
|
|
109
|
+
return [
|
|
110
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
111
|
+
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
112
|
+
];
|
|
113
|
+
} else {
|
|
114
|
+
s.reject();
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
}],
|
|
118
|
+
[String.raw`\\[a-zA-Z]+`, (s) => {
|
|
119
|
+
const command = s.text()!;
|
|
120
|
+
return [ new TexToken(TexTokenType.COMMAND, command), ];
|
|
121
|
+
}],
|
|
122
|
+
// Numbers like "123", "3.14"
|
|
123
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
124
|
+
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
125
|
+
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
126
|
+
// non-ASCII characters
|
|
127
|
+
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
128
|
+
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
|
|
129
|
+
]);
|
|
130
|
+
|
|
131
|
+
const spec = {
|
|
132
|
+
"start": rules_map
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
export function tokenize_tex(input: string): TexToken[] {
|
|
136
|
+
const lexer = new JSLex<TexToken>(spec);
|
|
137
|
+
return lexer.collect(input);
|
|
138
|
+
}
|