tex2typst 0.3.18 → 0.3.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +124 -109
- package/dist/tex-parser.d.ts +0 -1
- package/dist/tex-tokenizer.d.ts +4 -0
- package/dist/tex2typst.min.js +13 -13
- package/dist/types.d.ts +1 -2
- package/dist/typst-parser.d.ts +0 -1
- package/dist/typst-tokenizer.d.ts +2 -0
- package/package.json +1 -1
- package/src/convert.ts +20 -18
- package/src/index.ts +0 -1
- package/src/map.ts +3 -0
- package/src/tex-parser.ts +5 -138
- package/src/tex-tokenizer.ts +138 -0
- package/src/types.ts +1 -2
- package/src/typst-parser.ts +1 -74
- package/src/typst-tokenizer.ts +76 -0
- package/src/typst-writer.ts +23 -7
package/src/map.ts
CHANGED
package/src/tex-parser.ts
CHANGED
|
@@ -1,54 +1,8 @@
|
|
|
1
1
|
import { symbolMap } from "./map";
|
|
2
2
|
import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
|
|
3
3
|
import { assert } from "./util";
|
|
4
|
-
import { JSLex, Scanner } from "./jslex";
|
|
5
4
|
import { array_find } from "./generic";
|
|
6
|
-
|
|
7
|
-
const UNARY_COMMANDS = [
|
|
8
|
-
'sqrt',
|
|
9
|
-
'text',
|
|
10
|
-
|
|
11
|
-
'bar',
|
|
12
|
-
'bold',
|
|
13
|
-
'boldsymbol',
|
|
14
|
-
'ddot',
|
|
15
|
-
'dot',
|
|
16
|
-
'hat',
|
|
17
|
-
'mathbb',
|
|
18
|
-
'mathbf',
|
|
19
|
-
'mathcal',
|
|
20
|
-
'mathfrak',
|
|
21
|
-
'mathit',
|
|
22
|
-
'mathrm',
|
|
23
|
-
'mathscr',
|
|
24
|
-
'mathsf',
|
|
25
|
-
'mathtt',
|
|
26
|
-
'operatorname',
|
|
27
|
-
'overbrace',
|
|
28
|
-
'overline',
|
|
29
|
-
'pmb',
|
|
30
|
-
'rm',
|
|
31
|
-
'tilde',
|
|
32
|
-
'underbrace',
|
|
33
|
-
'underline',
|
|
34
|
-
'vec',
|
|
35
|
-
'widehat',
|
|
36
|
-
'widetilde',
|
|
37
|
-
'overleftarrow',
|
|
38
|
-
'overrightarrow',
|
|
39
|
-
'hspace',
|
|
40
|
-
]
|
|
41
|
-
|
|
42
|
-
const BINARY_COMMANDS = [
|
|
43
|
-
'frac',
|
|
44
|
-
'tfrac',
|
|
45
|
-
'binom',
|
|
46
|
-
'dbinom',
|
|
47
|
-
'dfrac',
|
|
48
|
-
'tbinom',
|
|
49
|
-
'overset',
|
|
50
|
-
'underset',
|
|
51
|
-
]
|
|
5
|
+
import { TEX_BINARY_COMMANDS, TEX_UNARY_COMMANDS, tokenize_tex } from "./tex-tokenizer";
|
|
52
6
|
|
|
53
7
|
const IGNORED_COMMANDS = [
|
|
54
8
|
'bigl', 'bigr',
|
|
@@ -60,9 +14,9 @@ const IGNORED_COMMANDS = [
|
|
|
60
14
|
const EMPTY_NODE: TexNode = new TexNode('empty', '');
|
|
61
15
|
|
|
62
16
|
function get_command_param_num(command: string): number {
|
|
63
|
-
if (
|
|
17
|
+
if (TEX_UNARY_COMMANDS.includes(command)) {
|
|
64
18
|
return 1;
|
|
65
|
-
} else if (
|
|
19
|
+
} else if (TEX_BINARY_COMMANDS.includes(command)) {
|
|
66
20
|
return 2;
|
|
67
21
|
} else {
|
|
68
22
|
return 0;
|
|
@@ -143,93 +97,6 @@ function find_closing_end_command(tokens: TexToken[], start: number): number {
|
|
|
143
97
|
}
|
|
144
98
|
|
|
145
99
|
|
|
146
|
-
function unescape(str: string): string {
|
|
147
|
-
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
148
|
-
for (const char of chars) {
|
|
149
|
-
str = str.replaceAll('\\' + char, char);
|
|
150
|
-
}
|
|
151
|
-
return str;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
|
|
155
|
-
[
|
|
156
|
-
String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
|
|
157
|
-
const text = s.text()!;
|
|
158
|
-
const command = text.substring(0, text.indexOf('{'));
|
|
159
|
-
const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
|
|
160
|
-
return [
|
|
161
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
162
|
-
new TexToken(TexTokenType.CONTROL, '{'),
|
|
163
|
-
new TexToken(TexTokenType.TEXT, unescape(text_inside)),
|
|
164
|
-
new TexToken(TexTokenType.CONTROL, '}')
|
|
165
|
-
]
|
|
166
|
-
}
|
|
167
|
-
],
|
|
168
|
-
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
|
|
169
|
-
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
170
|
-
[String.raw`\\[\\,:;! ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
171
|
-
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
172
|
-
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
|
|
173
|
-
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
174
|
-
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
175
|
-
const text = s.text()!;
|
|
176
|
-
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
|
|
177
|
-
const match = text.match(regex);
|
|
178
|
-
assert(match !== null);
|
|
179
|
-
const command = match![1];
|
|
180
|
-
if (BINARY_COMMANDS.includes(command.substring(1))) {
|
|
181
|
-
const arg1 = match![2].trimStart();
|
|
182
|
-
const arg2 = match![3];
|
|
183
|
-
return [
|
|
184
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
185
|
-
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
186
|
-
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
187
|
-
];
|
|
188
|
-
} else {
|
|
189
|
-
s.reject();
|
|
190
|
-
return [];
|
|
191
|
-
}
|
|
192
|
-
}],
|
|
193
|
-
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
194
|
-
const text = s.text()!;
|
|
195
|
-
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
|
|
196
|
-
const match = text.match(regex);
|
|
197
|
-
assert(match !== null);
|
|
198
|
-
const command = match![1];
|
|
199
|
-
if (UNARY_COMMANDS.includes(command.substring(1))) {
|
|
200
|
-
const arg1 = match![2].trimStart();
|
|
201
|
-
return [
|
|
202
|
-
new TexToken(TexTokenType.COMMAND, command),
|
|
203
|
-
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
204
|
-
];
|
|
205
|
-
} else {
|
|
206
|
-
s.reject();
|
|
207
|
-
return [];
|
|
208
|
-
}
|
|
209
|
-
}],
|
|
210
|
-
[String.raw`\\[a-zA-Z]+`, (s) => {
|
|
211
|
-
const command = s.text()!;
|
|
212
|
-
return [ new TexToken(TexTokenType.COMMAND, command), ];
|
|
213
|
-
}],
|
|
214
|
-
// Numbers like "123", "3.14"
|
|
215
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
216
|
-
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
217
|
-
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
218
|
-
// non-ASCII characters
|
|
219
|
-
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
220
|
-
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
|
|
221
|
-
]);
|
|
222
|
-
|
|
223
|
-
const spec = {
|
|
224
|
-
"start": rules_map
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
export function tokenize_tex(input: string): TexToken[] {
|
|
228
|
-
const lexer = new JSLex<TexToken>(spec);
|
|
229
|
-
return lexer.collect(input);
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
|
|
233
100
|
export class LatexParserError extends Error {
|
|
234
101
|
constructor(message: string) {
|
|
235
102
|
super(message);
|
|
@@ -395,7 +262,7 @@ export class LatexParser {
|
|
|
395
262
|
case '}':
|
|
396
263
|
throw new LatexParserError("Unmatched '}'");
|
|
397
264
|
case '\\\\':
|
|
398
|
-
case '\\!':
|
|
265
|
+
case '\\!':
|
|
399
266
|
case '\\,':
|
|
400
267
|
case '\\:':
|
|
401
268
|
case '\\;':
|
|
@@ -554,7 +421,7 @@ export class LatexParser {
|
|
|
554
421
|
while (pos < tokens.length) {
|
|
555
422
|
const whitespaceCount = eat_whitespaces(tokens, pos).length;
|
|
556
423
|
pos += whitespaceCount;
|
|
557
|
-
|
|
424
|
+
|
|
558
425
|
if (pos >= tokens.length || !tokens[pos].eq(LEFT_CURLY_BRACKET)) {
|
|
559
426
|
break;
|
|
560
427
|
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { TexToken, TexTokenType } from "./types";
|
|
2
|
+
import { assert } from "./util";
|
|
3
|
+
import { JSLex, Scanner } from "./jslex";
|
|
4
|
+
|
|
5
|
+
export const TEX_UNARY_COMMANDS = [
|
|
6
|
+
'sqrt',
|
|
7
|
+
'text',
|
|
8
|
+
|
|
9
|
+
'bar',
|
|
10
|
+
'bold',
|
|
11
|
+
'boldsymbol',
|
|
12
|
+
'ddot',
|
|
13
|
+
'dot',
|
|
14
|
+
'hat',
|
|
15
|
+
'mathbb',
|
|
16
|
+
'mathbf',
|
|
17
|
+
'mathcal',
|
|
18
|
+
'mathfrak',
|
|
19
|
+
'mathit',
|
|
20
|
+
'mathrm',
|
|
21
|
+
'mathscr',
|
|
22
|
+
'mathsf',
|
|
23
|
+
'mathtt',
|
|
24
|
+
'operatorname',
|
|
25
|
+
'overbrace',
|
|
26
|
+
'overline',
|
|
27
|
+
'pmb',
|
|
28
|
+
'rm',
|
|
29
|
+
'tilde',
|
|
30
|
+
'underbrace',
|
|
31
|
+
'underline',
|
|
32
|
+
'vec',
|
|
33
|
+
'widehat',
|
|
34
|
+
'widetilde',
|
|
35
|
+
'overleftarrow',
|
|
36
|
+
'overrightarrow',
|
|
37
|
+
'hspace',
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
export const TEX_BINARY_COMMANDS = [
|
|
41
|
+
'frac',
|
|
42
|
+
'tfrac',
|
|
43
|
+
'binom',
|
|
44
|
+
'dbinom',
|
|
45
|
+
'dfrac',
|
|
46
|
+
'tbinom',
|
|
47
|
+
'overset',
|
|
48
|
+
'underset',
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
function unescape(str: string): string {
|
|
53
|
+
const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
|
|
54
|
+
for (const char of chars) {
|
|
55
|
+
str = str.replaceAll('\\' + char, char);
|
|
56
|
+
}
|
|
57
|
+
return str;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
|
|
61
|
+
[
|
|
62
|
+
String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
|
|
63
|
+
const text = s.text()!;
|
|
64
|
+
const command = text.substring(0, text.indexOf('{'));
|
|
65
|
+
const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
|
|
66
|
+
return [
|
|
67
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
68
|
+
new TexToken(TexTokenType.CONTROL, '{'),
|
|
69
|
+
new TexToken(TexTokenType.TEXT, unescape(text_inside)),
|
|
70
|
+
new TexToken(TexTokenType.CONTROL, '}')
|
|
71
|
+
]
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
|
|
75
|
+
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
76
|
+
[String.raw`\\[\\,:;! ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
|
|
77
|
+
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
78
|
+
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
|
|
79
|
+
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
80
|
+
// e.g. match `\frac13`, `\frac1 b`, `\frac a b`
|
|
81
|
+
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
82
|
+
const text = s.text()!;
|
|
83
|
+
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
|
|
84
|
+
const match = text.match(regex);
|
|
85
|
+
assert(match !== null);
|
|
86
|
+
const command = match![1];
|
|
87
|
+
if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
|
|
88
|
+
const arg1 = match![2].trimStart();
|
|
89
|
+
const arg2 = match![3];
|
|
90
|
+
return [
|
|
91
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
92
|
+
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
93
|
+
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
94
|
+
];
|
|
95
|
+
} else {
|
|
96
|
+
s.reject();
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
}],
|
|
100
|
+
// e.g. match `\sqrt3`, `\sqrt a`
|
|
101
|
+
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
102
|
+
const text = s.text()!;
|
|
103
|
+
const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
|
|
104
|
+
const match = text.match(regex);
|
|
105
|
+
assert(match !== null);
|
|
106
|
+
const command = match![1];
|
|
107
|
+
if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
|
|
108
|
+
const arg1 = match![2].trimStart();
|
|
109
|
+
return [
|
|
110
|
+
new TexToken(TexTokenType.COMMAND, command),
|
|
111
|
+
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
112
|
+
];
|
|
113
|
+
} else {
|
|
114
|
+
s.reject();
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
}],
|
|
118
|
+
[String.raw`\\[a-zA-Z]+`, (s) => {
|
|
119
|
+
const command = s.text()!;
|
|
120
|
+
return [ new TexToken(TexTokenType.COMMAND, command), ];
|
|
121
|
+
}],
|
|
122
|
+
// Numbers like "123", "3.14"
|
|
123
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
124
|
+
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
125
|
+
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
126
|
+
// non-ASCII characters
|
|
127
|
+
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
|
|
128
|
+
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
|
|
129
|
+
]);
|
|
130
|
+
|
|
131
|
+
const spec = {
|
|
132
|
+
"start": rules_map
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
export function tokenize_tex(input: string): TexToken[] {
|
|
136
|
+
const lexer = new JSLex<TexToken>(spec);
|
|
137
|
+
return lexer.collect(input);
|
|
138
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -393,11 +393,10 @@ export const TYPST_FALSE: TypstPrimitiveValue = false;
|
|
|
393
393
|
* ATTENTION:
|
|
394
394
|
* Don't use any options except those explicitly documented in
|
|
395
395
|
* https://github.com/qwinsi/tex2typst/blob/main/docs/api-reference.md
|
|
396
|
-
* Any undocumented options may break in the future!
|
|
396
|
+
* Any undocumented options may be not working at present or break in the future!
|
|
397
397
|
*/
|
|
398
398
|
export interface Tex2TypstOptions {
|
|
399
399
|
nonStrict?: boolean; /** default is true */
|
|
400
|
-
preferTypstIntrinsic?: boolean; /** default is true */
|
|
401
400
|
preferShorthands?: boolean; /** default is true */
|
|
402
401
|
keepSpaces?: boolean; /** default is false */
|
|
403
402
|
fracToSlash?: boolean; /** default is true */
|
package/src/typst-parser.ts
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
import { array_find } from "./generic";
|
|
3
3
|
import { TYPST_NONE, TypstLrData, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
|
|
4
|
+
import { tokenize_typst } from "./typst-tokenizer";
|
|
4
5
|
import { assert, isalpha } from "./util";
|
|
5
|
-
import { reverseShorthandMap } from "./typst-shorthands";
|
|
6
|
-
import { JSLex, Scanner } from "./jslex";
|
|
7
6
|
|
|
8
|
-
const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
|
|
9
7
|
|
|
10
8
|
// TODO: In Typst, y' ' is not the same as y''.
|
|
11
9
|
// The parser should be able to parse the former correctly.
|
|
@@ -18,77 +16,6 @@ function eat_primes(tokens: TypstToken[], start: number): number {
|
|
|
18
16
|
}
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
function generate_regex_for_shorthands(): string {
|
|
22
|
-
const regex_list = TYPST_SHORTHANDS.map((s) => {
|
|
23
|
-
s = s.replaceAll('|', '\\|');
|
|
24
|
-
s = s.replaceAll('.', '\\.');
|
|
25
|
-
s = s.replaceAll('[', '\\[');
|
|
26
|
-
s = s.replaceAll(']', '\\]');
|
|
27
|
-
return s;
|
|
28
|
-
});
|
|
29
|
-
return `(${regex_list.join('|')})`;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
34
|
-
|
|
35
|
-
const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
|
|
36
|
-
[String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
|
|
37
|
-
[String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
38
|
-
[String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
|
|
39
|
-
[String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
|
|
40
|
-
[String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
|
|
41
|
-
[String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
42
|
-
[String.raw`\\\n`, (s) => {
|
|
43
|
-
return [
|
|
44
|
-
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
45
|
-
new TypstToken(TypstTokenType.NEWLINE, "\n"),
|
|
46
|
-
]
|
|
47
|
-
}],
|
|
48
|
-
[String.raw`\\\s`, (s) => {
|
|
49
|
-
return [
|
|
50
|
-
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
51
|
-
new TypstToken(TypstTokenType.SPACE, " "),
|
|
52
|
-
]
|
|
53
|
-
}],
|
|
54
|
-
// this backslash is dummy and will be ignored in later stages
|
|
55
|
-
[String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
|
|
56
|
-
[
|
|
57
|
-
String.raw`"([^"]|(\\"))*"`,
|
|
58
|
-
(s) => {
|
|
59
|
-
const text = s.text()!.substring(1, s.text()!.length - 1);
|
|
60
|
-
// replace all escape characters with their actual characters
|
|
61
|
-
text.replaceAll('\\"', '"');
|
|
62
|
-
return new TypstToken(TypstTokenType.TEXT, text);
|
|
63
|
-
}
|
|
64
|
-
],
|
|
65
|
-
[
|
|
66
|
-
REGEX_SHORTHANDS,
|
|
67
|
-
(s) => {
|
|
68
|
-
const shorthand = s.text()!;
|
|
69
|
-
const symbol = reverseShorthandMap.get(shorthand)!;
|
|
70
|
-
return new TypstToken(TypstTokenType.SYMBOL, symbol);
|
|
71
|
-
}
|
|
72
|
-
],
|
|
73
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
74
|
-
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
75
|
-
[String.raw`[a-zA-Z\.]+`, (s) => {
|
|
76
|
-
return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
|
|
77
|
-
}],
|
|
78
|
-
[String.raw`#none`, (s) => new TypstToken(TypstTokenType.NONE, s.text()!)],
|
|
79
|
-
[String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
80
|
-
]);
|
|
81
|
-
|
|
82
|
-
const spec = {
|
|
83
|
-
"start": rules_map
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
export function tokenize_typst(input: string): TypstToken[] {
|
|
87
|
-
const lexer = new JSLex<TypstToken>(spec);
|
|
88
|
-
return lexer.collect(input);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
|
|
92
19
|
function _find_closing_match(tokens: TypstToken[], start: number,
|
|
93
20
|
leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number {
|
|
94
21
|
assert(tokens[start].isOneOf(leftBrackets));
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { TypstToken, TypstTokenType } from "./types";
|
|
2
|
+
import { reverseShorthandMap } from "./typst-shorthands";
|
|
3
|
+
import { JSLex, Scanner } from "./jslex";
|
|
4
|
+
|
|
5
|
+
const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
function generate_regex_for_shorthands(): string {
|
|
9
|
+
const regex_list = TYPST_SHORTHANDS.map((s) => {
|
|
10
|
+
s = s.replaceAll('|', '\\|');
|
|
11
|
+
s = s.replaceAll('.', '\\.');
|
|
12
|
+
s = s.replaceAll('[', '\\[');
|
|
13
|
+
s = s.replaceAll(']', '\\]');
|
|
14
|
+
return s;
|
|
15
|
+
});
|
|
16
|
+
return `(${regex_list.join('|')})`;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
const REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
21
|
+
|
|
22
|
+
const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
|
|
23
|
+
[String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
|
|
24
|
+
[String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
25
|
+
[String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
|
|
26
|
+
[String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
|
|
27
|
+
[String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
|
|
28
|
+
[String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
29
|
+
[String.raw`\\\n`, (s) => {
|
|
30
|
+
return [
|
|
31
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
32
|
+
new TypstToken(TypstTokenType.NEWLINE, "\n"),
|
|
33
|
+
]
|
|
34
|
+
}],
|
|
35
|
+
[String.raw`\\\s`, (s) => {
|
|
36
|
+
return [
|
|
37
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
38
|
+
new TypstToken(TypstTokenType.SPACE, " "),
|
|
39
|
+
]
|
|
40
|
+
}],
|
|
41
|
+
// this backslash is dummy and will be ignored in later stages
|
|
42
|
+
[String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
|
|
43
|
+
[
|
|
44
|
+
String.raw`"([^"]|(\\"))*"`,
|
|
45
|
+
(s) => {
|
|
46
|
+
const text = s.text()!.substring(1, s.text()!.length - 1);
|
|
47
|
+
// replace all escape characters with their actual characters
|
|
48
|
+
text.replaceAll('\\"', '"');
|
|
49
|
+
return new TypstToken(TypstTokenType.TEXT, text);
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
[
|
|
53
|
+
REGEX_SHORTHANDS,
|
|
54
|
+
(s) => {
|
|
55
|
+
const shorthand = s.text()!;
|
|
56
|
+
const symbol = reverseShorthandMap.get(shorthand)!;
|
|
57
|
+
return new TypstToken(TypstTokenType.SYMBOL, symbol);
|
|
58
|
+
}
|
|
59
|
+
],
|
|
60
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
61
|
+
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
62
|
+
[String.raw`[a-zA-Z\.]+`, (s) => {
|
|
63
|
+
return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
|
|
64
|
+
}],
|
|
65
|
+
[String.raw`#none`, (s) => new TypstToken(TypstTokenType.NONE, s.text()!)],
|
|
66
|
+
[String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
67
|
+
]);
|
|
68
|
+
|
|
69
|
+
const spec = {
|
|
70
|
+
"start": rules_map
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
export function tokenize_typst(input: string): TypstToken[] {
|
|
74
|
+
const lexer = new JSLex<TypstToken>(spec);
|
|
75
|
+
return lexer.collect(input);
|
|
76
|
+
}
|
package/src/typst-writer.ts
CHANGED
|
@@ -11,6 +11,8 @@ const TYPST_RIGHT_PARENTHESIS: TypstToken = new TypstToken(TypstTokenType.ELEMEN
|
|
|
11
11
|
const TYPST_COMMA: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ',');
|
|
12
12
|
const TYPST_NEWLINE: TypstToken = new TypstToken(TypstTokenType.SYMBOL, '\n');
|
|
13
13
|
|
|
14
|
+
const SOFT_SPACE = new TypstToken(TypstTokenType.CONTROL, ' ');
|
|
15
|
+
|
|
14
16
|
function typst_primitive_to_string(value: TypstPrimitiveValue) {
|
|
15
17
|
switch (typeof value) {
|
|
16
18
|
case 'string':
|
|
@@ -177,7 +179,7 @@ export class TypstWriter {
|
|
|
177
179
|
trailing_space_needed = this.appendWithBracketsIfNeeded(sup);
|
|
178
180
|
}
|
|
179
181
|
if (trailing_space_needed) {
|
|
180
|
-
this.queue.push(
|
|
182
|
+
this.queue.push(SOFT_SPACE);
|
|
181
183
|
}
|
|
182
184
|
break;
|
|
183
185
|
}
|
|
@@ -208,7 +210,16 @@ export class TypstWriter {
|
|
|
208
210
|
}
|
|
209
211
|
case 'fraction': {
|
|
210
212
|
const [numerator, denominator] = node.args!;
|
|
211
|
-
this.
|
|
213
|
+
const pos = this.queue.length;
|
|
214
|
+
const no_wrap = this.appendWithBracketsIfNeeded(numerator);
|
|
215
|
+
|
|
216
|
+
// This is a dirty hack to force `C \frac{xy}{z}`to translate to `C (x y)/z` instead of `C(x y)/z`
|
|
217
|
+
// To solve this properly, we should implement a Typst formatter
|
|
218
|
+
const wrapped = !no_wrap;
|
|
219
|
+
if (wrapped) {
|
|
220
|
+
this.queue.splice(pos, 0, SOFT_SPACE);
|
|
221
|
+
}
|
|
222
|
+
|
|
212
223
|
this.queue.push(new TypstToken(TypstTokenType.ELEMENT, '/'));
|
|
213
224
|
this.appendWithBracketsIfNeeded(denominator);
|
|
214
225
|
break;
|
|
@@ -332,20 +343,25 @@ export class TypstWriter {
|
|
|
332
343
|
}
|
|
333
344
|
|
|
334
345
|
protected flushQueue() {
|
|
335
|
-
const
|
|
346
|
+
const dummy_token = new TypstToken(TypstTokenType.SYMBOL, '');
|
|
336
347
|
|
|
337
348
|
// delete soft spaces if they are not needed
|
|
338
349
|
for(let i = 0; i < this.queue.length; i++) {
|
|
339
350
|
let token = this.queue[i];
|
|
340
351
|
if (token.eq(SOFT_SPACE)) {
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
352
|
+
const to_delete = (i === 0)
|
|
353
|
+
|| (i === this.queue.length - 1)
|
|
354
|
+
|| (this.queue[i - 1].type === TypstTokenType.SPACE)
|
|
355
|
+
|| this.queue[i - 1].isOneOf([TYPST_LEFT_PARENTHESIS, TYPST_NEWLINE])
|
|
356
|
+
|| this.queue[i + 1].isOneOf([TYPST_RIGHT_PARENTHESIS, TYPST_COMMA, TYPST_NEWLINE]);
|
|
357
|
+
if (to_delete) {
|
|
358
|
+
this.queue[i] = dummy_token;
|
|
345
359
|
}
|
|
346
360
|
}
|
|
347
361
|
}
|
|
348
362
|
|
|
363
|
+
this.queue = this.queue.filter((token) => !token.eq(dummy_token));
|
|
364
|
+
|
|
349
365
|
this.queue.forEach((token) => {
|
|
350
366
|
this.writeBuffer(token)
|
|
351
367
|
});
|