tex2typst 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +353 -263
- package/dist/jslex.d.ts +105 -0
- package/dist/tex-parser.d.ts +1 -1
- package/dist/tex2typst.min.js +13 -20
- package/dist/typst-parser.d.ts +1 -1
- package/docs/api-reference.md +1 -1
- package/package.json +1 -1
- package/src/convert.ts +16 -0
- package/src/jslex.ts +304 -0
- package/src/tex-parser.ts +44 -137
- package/src/typst-parser.ts +66 -140
package/src/typst-parser.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
import { array_find } from "./generic";
|
|
3
3
|
import { TYPST_NONE, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
|
|
4
|
-
import { assert, isalpha
|
|
4
|
+
import { assert, isalpha } from "./util";
|
|
5
5
|
import { reverseShorthandMap } from "./typst-shorthands";
|
|
6
|
-
|
|
6
|
+
import { JSLex, Scanner } from "./jslex";
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
const TYPST_EMPTY_NODE = new TypstNode('empty', '');
|
|
@@ -21,150 +21,76 @@ function eat_primes(tokens: TypstToken[], start: number): number {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
function
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
24
|
+
function generate_regex_for_shorthands(): string {
|
|
25
|
+
const regex_list = TYPST_SHORTHANDS.map((s) => {
|
|
26
|
+
s = s.replaceAll('|', '\\|');
|
|
27
|
+
s = s.replaceAll('.', '\\.');
|
|
28
|
+
s = s.replaceAll('[', '\\[');
|
|
29
|
+
s = s.replaceAll(']', '\\]');
|
|
30
|
+
return s;
|
|
31
|
+
});
|
|
32
|
+
return `(${regex_list.join('|')})`;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
|
-
function try_eat_shorthand(typst: string, start: number): string | null {
|
|
33
|
-
for (const shorthand of TYPST_SHORTHANDS) {
|
|
34
|
-
if (typst.startsWith(shorthand, start)) {
|
|
35
|
-
return shorthand;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
return null;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
export function tokenize_typst(typst: string): TypstToken[] {
|
|
43
|
-
const tokens: TypstToken[] = [];
|
|
44
|
-
|
|
45
|
-
let pos = 0;
|
|
46
35
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
pos = newPos;
|
|
78
|
-
break;
|
|
79
|
-
}
|
|
80
|
-
case '/': {
|
|
81
|
-
if (pos < typst.length && typst[pos + 1] === '/') {
|
|
82
|
-
let newPos = pos + 2;
|
|
83
|
-
while (newPos < typst.length && typst[newPos] !== '\n') {
|
|
84
|
-
newPos++;
|
|
85
|
-
}
|
|
86
|
-
token = new TypstToken(TypstTokenType.COMMENT, typst.slice(pos + 2, newPos));
|
|
87
|
-
pos = newPos;
|
|
88
|
-
} else {
|
|
89
|
-
token = new TypstToken(TypstTokenType.ELEMENT, '/');
|
|
90
|
-
pos++;
|
|
91
|
-
}
|
|
92
|
-
break;
|
|
93
|
-
}
|
|
94
|
-
case '\\': {
|
|
95
|
-
if (pos + 1 >= typst.length) {
|
|
96
|
-
throw new Error('Expecting a character after \\');
|
|
97
|
-
}
|
|
98
|
-
const firstTwoChars = typst.substring(pos, pos + 2);
|
|
99
|
-
if (['\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
|
|
100
|
-
token = new TypstToken(TypstTokenType.ELEMENT, firstTwoChars);
|
|
101
|
-
pos += 2;
|
|
102
|
-
} else if (['\\\n', '\\ '].includes(firstTwoChars)) {
|
|
103
|
-
token = new TypstToken(TypstTokenType.CONTROL, '\\');
|
|
104
|
-
pos += 1;
|
|
105
|
-
} else {
|
|
106
|
-
// this backslash is dummy and will be ignored in later stages
|
|
107
|
-
token = new TypstToken(TypstTokenType.CONTROL, '');
|
|
108
|
-
pos++;
|
|
109
|
-
}
|
|
110
|
-
break;
|
|
111
|
-
}
|
|
112
|
-
case '"': {
|
|
113
|
-
let newPos = pos + 1;
|
|
114
|
-
while (newPos < typst.length) {
|
|
115
|
-
if (typst[newPos] === '"' && typst[newPos - 1] !== '\\') {
|
|
116
|
-
break;
|
|
117
|
-
}
|
|
118
|
-
newPos++;
|
|
119
|
-
}
|
|
120
|
-
let text = typst.substring(pos + 1, newPos);
|
|
121
|
-
// replace all escape characters with their actual characters
|
|
122
|
-
const chars = ['"', '\\'];
|
|
123
|
-
for (const char of chars) {
|
|
124
|
-
text = text.replaceAll('\\' + char, char);
|
|
125
|
-
}
|
|
126
|
-
token = new TypstToken(TypstTokenType.TEXT, text);
|
|
127
|
-
pos = newPos + 1;
|
|
128
|
-
break;
|
|
129
|
-
}
|
|
130
|
-
default: {
|
|
131
|
-
const shorthand = try_eat_shorthand(typst, pos);
|
|
132
|
-
if (shorthand !== null) {
|
|
133
|
-
token = new TypstToken(TypstTokenType.SYMBOL, reverseShorthandMap.get(shorthand)!);
|
|
134
|
-
pos += shorthand.length;
|
|
135
|
-
break;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if (isdigit(firstChar)) {
|
|
139
|
-
let newPos = pos;
|
|
140
|
-
while (newPos < typst.length && isdigit(typst[newPos])) {
|
|
141
|
-
newPos += 1;
|
|
142
|
-
}
|
|
143
|
-
if(newPos < typst.length && typst[newPos] === '.') {
|
|
144
|
-
newPos += 1;
|
|
145
|
-
while (newPos < typst.length && isdigit(typst[newPos])) {
|
|
146
|
-
newPos += 1;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
token = new TypstToken(TypstTokenType.ELEMENT, typst.slice(pos, newPos));
|
|
150
|
-
} else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
|
|
151
|
-
token = new TypstToken(TypstTokenType.ELEMENT, firstChar)
|
|
152
|
-
} else if (isalpha(firstChar)) {
|
|
153
|
-
const identifier = eat_identifier_name(typst, pos);
|
|
154
|
-
const _type = identifier.length === 1 ? TypstTokenType.ELEMENT : TypstTokenType.SYMBOL;
|
|
155
|
-
token = new TypstToken(_type, identifier);
|
|
156
|
-
} else {
|
|
157
|
-
token = new TypstToken(TypstTokenType.ELEMENT, firstChar);
|
|
158
|
-
}
|
|
159
|
-
pos += token.value.length;
|
|
160
|
-
}
|
|
36
|
+
const REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
37
|
+
|
|
38
|
+
const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
|
|
39
|
+
[String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
|
|
40
|
+
[String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
41
|
+
[String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
|
|
42
|
+
[String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
|
|
43
|
+
[String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
|
|
44
|
+
[String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
45
|
+
[String.raw`\\\n`, (s) => {
|
|
46
|
+
return [
|
|
47
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
48
|
+
new TypstToken(TypstTokenType.NEWLINE, "\n"),
|
|
49
|
+
]
|
|
50
|
+
}],
|
|
51
|
+
[String.raw`\\\s`, (s) => {
|
|
52
|
+
return [
|
|
53
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
54
|
+
new TypstToken(TypstTokenType.SPACE, " "),
|
|
55
|
+
]
|
|
56
|
+
}],
|
|
57
|
+
// this backslash is dummy and will be ignored in later stages
|
|
58
|
+
[String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
|
|
59
|
+
[
|
|
60
|
+
String.raw`"([^"]|(\\"))*"`,
|
|
61
|
+
(s) => {
|
|
62
|
+
const text = s.text()!.substring(1, s.text()!.length - 1);
|
|
63
|
+
// replace all escape characters with their actual characters
|
|
64
|
+
text.replaceAll('\\"', '"');
|
|
65
|
+
return new TypstToken(TypstTokenType.TEXT, text);
|
|
161
66
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
67
|
+
],
|
|
68
|
+
[
|
|
69
|
+
REGEX_SHORTHANDS,
|
|
70
|
+
(s) => {
|
|
71
|
+
const shorthand = s.text()!;
|
|
72
|
+
const symbol = reverseShorthandMap.get(shorthand)!;
|
|
73
|
+
return new TypstToken(TypstTokenType.SYMBOL, symbol);
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
77
|
+
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
78
|
+
[String.raw`[a-zA-Z\.]+`, (s) => {
|
|
79
|
+
return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
|
|
80
|
+
}],
|
|
81
|
+
[String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
|
|
82
|
+
]);
|
|
83
|
+
|
|
84
|
+
const spec = {
|
|
85
|
+
"start": rules_map
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
export function tokenize_typst(input: string): TypstToken[] {
|
|
89
|
+
const lexer = new JSLex<TypstToken>(spec);
|
|
90
|
+
return lexer.collect(input);
|
|
166
91
|
}
|
|
167
92
|
|
|
93
|
+
|
|
168
94
|
function find_closing_match(tokens: TypstToken[], start: number): number {
|
|
169
95
|
assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
|
|
170
96
|
let count = 1;
|