tex2typst 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +122 -238
- package/dist/tex2typst.min.js +10 -11
- package/package.json +1 -1
- package/src/convert.ts +7 -3
- package/src/lex.ts +70 -197
- package/src/tex-tokenizer.ts +29 -34
- package/src/typst-tokenizer.ts +43 -39
package/package.json
CHANGED
package/src/convert.ts
CHANGED
|
@@ -87,9 +87,6 @@ function tex_token_to_typst(token: TexToken, options: Tex2TypstOptions): TypstTo
|
|
|
87
87
|
if (token.value === '\\\\') {
|
|
88
88
|
// \\ -> \
|
|
89
89
|
return new TypstToken(TypstTokenType.CONTROL, '\\');
|
|
90
|
-
} else if (token.value === '\\!') {
|
|
91
|
-
// \! -> #h(-math.thin.amount)
|
|
92
|
-
return new TypstToken(TypstTokenType.SYMBOL, '#h(-math.thin.amount)');
|
|
93
90
|
} else if (token.value === '~') {
|
|
94
91
|
// ~ -> space.nobreak
|
|
95
92
|
const typst_symbol = symbolMap.get('~')!;
|
|
@@ -215,6 +212,13 @@ export function convert_tex_node_to_typst(abstractNode: TexNode, options: Tex2Ty
|
|
|
215
212
|
switch (abstractNode.type) {
|
|
216
213
|
case 'terminal': {
|
|
217
214
|
const node = abstractNode as TexTerminal;
|
|
215
|
+
// \! -> #h(-math.thin.amount)
|
|
216
|
+
if (node.head.eq(new TexToken(TexTokenType.CONTROL, '\\!'))) {
|
|
217
|
+
return new TypstFuncCall(
|
|
218
|
+
new TypstToken(TypstTokenType.SYMBOL, '#h'),
|
|
219
|
+
[new TypstToken(TypstTokenType.LITERAL, '-math.thin.amount').toNode()]
|
|
220
|
+
);
|
|
221
|
+
}
|
|
218
222
|
return tex_token_to_typst(node.head, options).toNode();
|
|
219
223
|
}
|
|
220
224
|
case 'text': {
|
package/src/lex.ts
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Last modified: 2026-
|
|
2
|
+
* Last modified: 2026-05-30
|
|
3
3
|
* Adapted from jslex - A lexer in JavaScript. https://github.com/jimbojw/jslex
|
|
4
4
|
* Licensed under MIT license
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
export type ScannerCallback<T> = (a:
|
|
7
|
+
export type ScannerCallback<T> = (a: ScannerState) => ScanResult<T>;
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
|
|
10
|
+
type TypeEOF = null;
|
|
11
|
+
const EOF: TypeEOF = null;
|
|
12
12
|
|
|
13
13
|
interface IRule<T> {
|
|
14
14
|
re: RegExp;
|
|
@@ -21,10 +21,6 @@ interface IMatch<T> {
|
|
|
21
21
|
reMatchArray: RegExpMatchArray;
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
// End of File marker
|
|
26
|
-
const EOF = {};
|
|
27
|
-
|
|
28
24
|
/**
|
|
29
25
|
* Utility function for comparing two matches.
|
|
30
26
|
* @param {object} m1 Left-hand side match.
|
|
@@ -41,146 +37,66 @@ function matchcompare<T>(m1: IMatch<T>, m2: IMatch<T>): number {
|
|
|
41
37
|
}
|
|
42
38
|
}
|
|
43
39
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
private _pos: number = 0;
|
|
50
|
-
|
|
51
|
-
// current line number
|
|
52
|
-
private _line: number = 0;
|
|
53
|
-
|
|
54
|
-
// current column number
|
|
55
|
-
private _col: number = 0;
|
|
56
|
-
|
|
57
|
-
private _offset: number = 0;
|
|
58
|
-
private _less: number | null = null;
|
|
59
|
-
private _go: boolean = false;
|
|
60
|
-
private _newstate: string | null = null;
|
|
61
|
-
private _state: string;
|
|
62
|
-
|
|
63
|
-
private _text: string | null = null;
|
|
64
|
-
private _leng: number | null = null;
|
|
65
|
-
private _reMatchArray: RegExpMatchArray | null = null;
|
|
66
|
-
|
|
67
|
-
constructor(input: string, lexer: JSLex<T>) {
|
|
68
|
-
this._input = input;
|
|
69
|
-
this._lexer = lexer;
|
|
70
|
-
this._state = lexer.states[0];
|
|
71
|
-
}
|
|
40
|
+
enum ScanResultStatus {
|
|
41
|
+
ACCEPTED = 0,
|
|
42
|
+
REJECTED = 1,
|
|
43
|
+
ERROR = 2,
|
|
44
|
+
}
|
|
72
45
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
46
|
+
export class ScanResult<T> {
|
|
47
|
+
public result: T | T[];
|
|
48
|
+
public status: ScanResultStatus;
|
|
49
|
+
constructor(status: ScanResultStatus, result: T | T[]) {
|
|
50
|
+
this.result = result;
|
|
51
|
+
this.status = status;
|
|
78
52
|
}
|
|
79
53
|
|
|
80
|
-
public leng(): number | null {
|
|
81
|
-
return this._leng;
|
|
82
|
-
}
|
|
83
54
|
|
|
84
|
-
public
|
|
85
|
-
return
|
|
55
|
+
public static Accepted<U>(result: U | U[]): ScanResult<U> {
|
|
56
|
+
return new ScanResult(ScanResultStatus.ACCEPTED, result);
|
|
86
57
|
}
|
|
87
58
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
*/
|
|
91
|
-
public pos(): number {
|
|
92
|
-
return this._pos;
|
|
59
|
+
public static Rejected<U>(): ScanResult<U> {
|
|
60
|
+
return new ScanResult(ScanResultStatus.REJECTED, []);
|
|
93
61
|
}
|
|
94
62
|
|
|
95
|
-
public
|
|
96
|
-
return
|
|
63
|
+
public static Error<U>(message: string): ScanResult<U> {
|
|
64
|
+
return new ScanResult(ScanResultStatus.ERROR, []);
|
|
97
65
|
}
|
|
66
|
+
}
|
|
98
67
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
68
|
+
interface ScannerState {
|
|
69
|
+
pos: number;
|
|
70
|
+
text: string;
|
|
71
|
+
reMatchArray: RegExpMatchArray;
|
|
72
|
+
}
|
|
102
73
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
*/
|
|
107
|
-
public input(): string {
|
|
108
|
-
return this._input.charAt(this._pos + this._leng! + this._offset++);
|
|
109
|
-
}
|
|
74
|
+
export class Scanner<T> {
|
|
75
|
+
private readonly _input: string;
|
|
76
|
+
private readonly rules: IRule<T>[];
|
|
110
77
|
|
|
111
|
-
/**
|
|
112
|
-
* Similar to unput() in lex, but does not allow modifying the stream.
|
|
113
|
-
* @return {int} The offset position after the operation.
|
|
114
|
-
*/
|
|
115
|
-
public unput(): number {
|
|
116
|
-
return this._offset = this._offset > 0 ? this._offset-- : 0;
|
|
117
|
-
}
|
|
118
78
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
* the rest to the input stream, such that they will be used in the next pattern-matching operation.
|
|
122
|
-
* @param {int} n Number of characters to retain.
|
|
123
|
-
* @return {int} Length of the stream after the operation has completed.
|
|
124
|
-
*/
|
|
125
|
-
public less(n: number): number {
|
|
126
|
-
this._less = n;
|
|
127
|
-
this._offset = 0;
|
|
128
|
-
this._text = this._text!.substring(0, n);
|
|
129
|
-
return this._leng = this._text.length;
|
|
130
|
-
}
|
|
79
|
+
// position within input stream
|
|
80
|
+
private _pos: number = 0;
|
|
131
81
|
|
|
132
|
-
/**
|
|
133
|
-
* Like less(), but instead of retaining the first n characters, it chops off the last n.
|
|
134
|
-
* @param {int} n Number of characters to chop.
|
|
135
|
-
* @return {int} Length of the stream after the operation has completed.
|
|
136
|
-
*/
|
|
137
|
-
public pushback(n: number): number {
|
|
138
|
-
return this.less(this._leng! - n);
|
|
139
|
-
}
|
|
140
82
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
*/
|
|
145
|
-
public reject(): void {
|
|
146
|
-
this._go = true;
|
|
83
|
+
constructor(input: string, rules: IRule<T>[]) {
|
|
84
|
+
this._input = input;
|
|
85
|
+
this.rules = rules;
|
|
147
86
|
}
|
|
148
87
|
|
|
149
|
-
/**
|
|
150
|
-
* Analogous to BEGIN in lex - sets the named state (start condition).
|
|
151
|
-
* @param {string|int} state Name of state to switch to, or ordinal number (0 is first, etc).
|
|
152
|
-
* @return {string} The new state on successful switch, throws exception on failure.
|
|
153
|
-
*/
|
|
154
|
-
public begin(state: string | number): string {
|
|
155
|
-
if (this._lexer.specification[state]) {
|
|
156
|
-
return this._newstate = state as string;
|
|
157
|
-
}
|
|
158
|
-
const s = this._lexer.states[parseInt(state as string)];
|
|
159
|
-
if (s) {
|
|
160
|
-
return this._newstate = s;
|
|
161
|
-
}
|
|
162
|
-
throw "Unknown state '" + state + "' requested";
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/**
|
|
166
|
-
* Simple accessor for reading in the current state.
|
|
167
|
-
* @return {string} The current state.
|
|
168
|
-
*/
|
|
169
|
-
public state(): string {
|
|
170
|
-
return this._state;
|
|
171
|
-
}
|
|
172
88
|
|
|
173
89
|
/**
|
|
174
90
|
* Scan method to be returned to caller - grabs the next token and fires appropriate calback.
|
|
175
91
|
* @return {T} The next token extracted from the stream.
|
|
176
92
|
*/
|
|
177
|
-
public scan(): T | T[] {
|
|
93
|
+
public scan(): T | T[] | TypeEOF {
|
|
178
94
|
if(this._pos >= this._input.length) {
|
|
179
|
-
return EOF
|
|
95
|
+
return EOF;
|
|
180
96
|
}
|
|
181
97
|
|
|
182
98
|
const str = this._input.substring(this._pos);
|
|
183
|
-
const rules = this.
|
|
99
|
+
const rules = this.rules;
|
|
184
100
|
const matches: IMatch<T>[] = [];
|
|
185
101
|
for (let i = 0; i < rules.length; i++) {
|
|
186
102
|
const rule = rules[i];
|
|
@@ -197,86 +113,45 @@ export class Scanner<T> {
|
|
|
197
113
|
throw new Error("No match found for input '" + str + "'");
|
|
198
114
|
}
|
|
199
115
|
matches.sort(matchcompare);
|
|
200
|
-
this._go = true;
|
|
201
116
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
this._leng = matched_text.length;
|
|
213
|
-
this._reMatchArray = m.reMatchArray;
|
|
214
|
-
result = m.rule.action(this);
|
|
215
|
-
if (this._newstate && this._newstate != this._state) {
|
|
216
|
-
this._state = this._newstate;
|
|
217
|
-
break;
|
|
117
|
+
for (const m of matches) {
|
|
118
|
+
const matched_text = m.reMatchArray[0];
|
|
119
|
+
const result = m.rule.action({
|
|
120
|
+
pos: this._pos,
|
|
121
|
+
text: matched_text,
|
|
122
|
+
reMatchArray: m.reMatchArray,
|
|
123
|
+
});
|
|
124
|
+
if (result.status === ScanResultStatus.ACCEPTED) {
|
|
125
|
+
this._pos += matched_text.length;
|
|
126
|
+
return result.result;
|
|
218
127
|
}
|
|
219
128
|
}
|
|
220
|
-
|
|
221
|
-
const len = text.length;
|
|
222
|
-
this._pos += len + this._offset;
|
|
223
|
-
|
|
224
|
-
const nlm = text.match(/\n/g);
|
|
225
|
-
if (nlm !== null) {
|
|
226
|
-
this._line += nlm.length;
|
|
227
|
-
this._col = len - text.lastIndexOf("\n") - 1;
|
|
228
|
-
} else {
|
|
229
|
-
this._col += len;
|
|
230
|
-
}
|
|
231
|
-
return result!;
|
|
129
|
+
throw new Error("No match found for input '" + str + "'");
|
|
232
130
|
}
|
|
233
131
|
}
|
|
234
132
|
|
|
235
133
|
export class JSLex<T> {
|
|
236
|
-
public
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
constructor(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
throw "Duplicate state declaration encountered for state '" + s + "'";
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
this.specification[s] = [] as IRule<T>[];
|
|
253
|
-
|
|
254
|
-
for (const [k,v] of rule_map.entries()) {
|
|
255
|
-
let re: RegExp;
|
|
256
|
-
try {
|
|
257
|
-
// FIXME: e.g. "neg|norm" becomes /^neg|norm/,
|
|
258
|
-
// but what we really want is /^(neg|norm)/ .
|
|
259
|
-
// This will cause error when tokenize input like "...norm..."
|
|
260
|
-
re = new RegExp('^' + k);
|
|
261
|
-
} catch (err) {
|
|
262
|
-
throw "Invalid regexp '" + k + "' in state '" + s + "' (" + (err as Error).message + ")";
|
|
263
|
-
}
|
|
264
|
-
this.specification[s].push({
|
|
265
|
-
re: re,
|
|
266
|
-
action: v
|
|
267
|
-
});
|
|
134
|
+
public readonly rules: IRule<T>[] = [];
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
constructor(ruleMap: Map<string, ScannerCallback<T>>) {
|
|
138
|
+
for (const [k, v] of ruleMap.entries()) {
|
|
139
|
+
let re: RegExp;
|
|
140
|
+
try {
|
|
141
|
+
// FIXME: e.g. "neg|norm" becomes /^neg|norm/,
|
|
142
|
+
// but what we really want is /^(neg|norm)/ .
|
|
143
|
+
// This will cause error when tokenize input like "...norm..."
|
|
144
|
+
re = new RegExp('^' + k);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
throw "Invalid regexp '" + k + "' (" + (err as Error).message + ")";
|
|
268
147
|
}
|
|
148
|
+
this.rules.push({
|
|
149
|
+
re: re,
|
|
150
|
+
action: v
|
|
151
|
+
});
|
|
269
152
|
}
|
|
270
153
|
}
|
|
271
154
|
|
|
272
|
-
/**
|
|
273
|
-
* Scanner function - makes a new scanner object which is used to get tokens one at a time.
|
|
274
|
-
* @param {string} input Input text to tokenize.
|
|
275
|
-
* @return {function} Scanner function.
|
|
276
|
-
*/
|
|
277
|
-
public scanner(input: string): Scanner<T> {
|
|
278
|
-
return new Scanner(input, this);
|
|
279
|
-
}
|
|
280
155
|
|
|
281
156
|
/**
|
|
282
157
|
* Similar to lex's yylex() function, consumes all input, calling calback for each token.
|
|
@@ -284,15 +159,13 @@ export class JSLex<T> {
|
|
|
284
159
|
* @param {function} callback Function to execute for each token.
|
|
285
160
|
*/
|
|
286
161
|
public lex(input: string, callback: (arg0: T | T[]) => void) {
|
|
287
|
-
const scanner = this.
|
|
162
|
+
const scanner = new Scanner(input, this.rules);
|
|
288
163
|
while (true) {
|
|
289
164
|
const token = scanner.scan();
|
|
290
165
|
if (token === EOF) {
|
|
291
|
-
|
|
292
|
-
}
|
|
293
|
-
if (token !== undefined) {
|
|
294
|
-
callback(token);
|
|
166
|
+
break;
|
|
295
167
|
}
|
|
168
|
+
callback(token);
|
|
296
169
|
}
|
|
297
170
|
}
|
|
298
171
|
|
package/src/tex-tokenizer.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { TexToken, TexTokenType } from "./tex-types";
|
|
2
|
-
import { JSLex,
|
|
2
|
+
import { JSLex, ScannerCallback, ScanResult } from "./lex";
|
|
3
3
|
|
|
4
4
|
export const TEX_UNARY_COMMANDS = [
|
|
5
5
|
'sqrt',
|
|
@@ -74,8 +74,8 @@ const rules_map = new Map<string, ScannerCallback<TexToken>>([
|
|
|
74
74
|
// match `\begin{array}{cc}`
|
|
75
75
|
[
|
|
76
76
|
String.raw`\\begin{(array|subarry)}{(.+?)}`, (s) => {
|
|
77
|
-
const match = s.reMatchArray
|
|
78
|
-
return [
|
|
77
|
+
const match = s.reMatchArray;
|
|
78
|
+
return ScanResult.Accepted([
|
|
79
79
|
new TexToken(TexTokenType.COMMAND, '\\begin'),
|
|
80
80
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
81
81
|
new TexToken(TexTokenType.LITERAL, match[1]),
|
|
@@ -83,74 +83,69 @@ const rules_map = new Map<string, ScannerCallback<TexToken>>([
|
|
|
83
83
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
84
84
|
new TexToken(TexTokenType.LITERAL, match[2]),
|
|
85
85
|
new TexToken(TexTokenType.CONTROL, '}'),
|
|
86
|
-
]
|
|
86
|
+
]);
|
|
87
87
|
}
|
|
88
88
|
],
|
|
89
89
|
[
|
|
90
90
|
String.raw`\\(text|operatorname\*?|textcolor|begin|end|hspace|array)\s*{(.+?)}`, (s) => {
|
|
91
|
-
const match = s.reMatchArray
|
|
92
|
-
return [
|
|
91
|
+
const match = s.reMatchArray;
|
|
92
|
+
return ScanResult.Accepted([
|
|
93
93
|
new TexToken(TexTokenType.COMMAND, '\\' + match[1]),
|
|
94
94
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
95
95
|
new TexToken(TexTokenType.LITERAL, unescape(match[2])),
|
|
96
96
|
new TexToken(TexTokenType.CONTROL, '}')
|
|
97
|
-
]
|
|
97
|
+
]);
|
|
98
98
|
}
|
|
99
99
|
],
|
|
100
|
-
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text
|
|
101
|
-
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
102
|
-
[String.raw`\\[\\,:;!> ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
103
|
-
[String.raw`~`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
104
|
-
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
105
|
-
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text
|
|
106
|
-
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
100
|
+
[String.raw`%[^\n]*`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.COMMENT, s.text.substring(1)))],
|
|
101
|
+
[String.raw`[{}_^&]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
102
|
+
[String.raw`\\[\\,:;!> ]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
103
|
+
[String.raw`~`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
104
|
+
[String.raw`\r?\n`, (_s) => ScanResult.Accepted([new TexToken(TexTokenType.NEWLINE, "\n")])],
|
|
105
|
+
[String.raw`\s+`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.SPACE, s.text)])],
|
|
106
|
+
[String.raw`\\[{}%$&#_|]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.ELEMENT, s.text)])],
|
|
107
107
|
// e.g. match `\frac13`, `\frac1 b`, `\frac a b`
|
|
108
108
|
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
109
|
-
const match = s.reMatchArray
|
|
109
|
+
const match = s.reMatchArray;
|
|
110
110
|
const command = match![1];
|
|
111
111
|
if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
|
|
112
112
|
const arg1 = match[2].trimStart();
|
|
113
113
|
const arg2 = match[3];
|
|
114
|
-
return [
|
|
114
|
+
return ScanResult.Accepted([
|
|
115
115
|
new TexToken(TexTokenType.COMMAND, command),
|
|
116
116
|
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
117
117
|
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
118
|
-
];
|
|
118
|
+
]);
|
|
119
119
|
} else {
|
|
120
|
-
|
|
121
|
-
return [];
|
|
120
|
+
return ScanResult.Rejected();
|
|
122
121
|
}
|
|
123
122
|
}],
|
|
124
123
|
// e.g. match `\sqrt3`, `\sqrt a`
|
|
125
124
|
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
126
|
-
const match = s.reMatchArray
|
|
125
|
+
const match = s.reMatchArray;
|
|
127
126
|
const command = match[1];
|
|
128
127
|
if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
|
|
129
128
|
const arg1 = match[2].trimStart();
|
|
130
|
-
return [
|
|
129
|
+
return ScanResult.Accepted([
|
|
131
130
|
new TexToken(TexTokenType.COMMAND, command),
|
|
132
131
|
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
133
|
-
];
|
|
132
|
+
]);
|
|
134
133
|
} else {
|
|
135
|
-
|
|
136
|
-
return [];
|
|
134
|
+
return ScanResult.Rejected();
|
|
137
135
|
}
|
|
138
136
|
}],
|
|
139
|
-
[String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text
|
|
137
|
+
[String.raw`\\[a-zA-Z]+`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.COMMAND, s.text))],
|
|
140
138
|
// Numbers like "123", "3.14"
|
|
141
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
142
|
-
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
143
|
-
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
139
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
140
|
+
[String.raw`[a-zA-Z]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
141
|
+
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
144
142
|
// non-ASCII characters
|
|
145
|
-
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
146
|
-
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text
|
|
143
|
+
[String.raw`[^\x00-\x7F]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
144
|
+
[String.raw`.`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.UNKNOWN, s.text)])],
|
|
147
145
|
]);
|
|
148
146
|
|
|
149
|
-
const spec = {
|
|
150
|
-
"start": rules_map
|
|
151
|
-
};
|
|
152
147
|
|
|
153
148
|
export function tokenize_tex(input: string): TexToken[] {
|
|
154
|
-
const lexer = new JSLex<TexToken>(
|
|
149
|
+
const lexer = new JSLex<TexToken>(rules_map);
|
|
155
150
|
return lexer.collect(input);
|
|
156
151
|
}
|
package/src/typst-tokenizer.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { TypstToken } from "./typst-types";
|
|
2
2
|
import { TypstTokenType } from "./typst-types";
|
|
3
3
|
import { reverseShorthandMap } from "./typst-shorthands";
|
|
4
|
-
import { JSLex,
|
|
4
|
+
import { JSLex, ScannerCallback, ScanResult } from "./lex";
|
|
5
5
|
|
|
6
6
|
const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
|
|
7
7
|
|
|
@@ -21,66 +21,70 @@ function generate_regex_for_shorthands(): string {
|
|
|
21
21
|
const REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
22
22
|
|
|
23
23
|
const rules_map = new Map<string, ScannerCallback<TypstToken>>([
|
|
24
|
-
[String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text
|
|
25
|
-
[String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
26
|
-
[String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text
|
|
27
|
-
[String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
|
|
28
|
-
[String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text
|
|
29
|
-
[String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
30
|
-
[
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
24
|
+
[String.raw`//[^\n]*`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.COMMENT, s.text.substring(2)))],
|
|
25
|
+
[String.raw`/`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
26
|
+
[String.raw`[_^&]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.CONTROL, s.text))],
|
|
27
|
+
[String.raw`\r?\n`, (_s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NEWLINE, "\n"))],
|
|
28
|
+
[String.raw`\s+`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.SPACE, s.text))],
|
|
29
|
+
[String.raw`\\[$&#_]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
30
|
+
[
|
|
31
|
+
String.raw`\\\n`,
|
|
32
|
+
(s) => ScanResult.Accepted([
|
|
33
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
34
|
+
new TypstToken(TypstTokenType.NEWLINE, "\n")
|
|
35
|
+
])
|
|
36
|
+
],
|
|
36
37
|
[String.raw`\\\s`, (s) => {
|
|
37
|
-
return [
|
|
38
|
+
return ScanResult.Accepted([
|
|
38
39
|
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
39
40
|
new TypstToken(TypstTokenType.SPACE, " "),
|
|
40
|
-
]
|
|
41
|
+
])
|
|
41
42
|
}],
|
|
42
43
|
// this backslash is dummy and will be ignored in later stages
|
|
43
|
-
[String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
|
|
44
|
+
[String.raw`\\\S`, (_s) => ScanResult.Accepted(new TypstToken(TypstTokenType.CONTROL, ""))],
|
|
44
45
|
[
|
|
45
46
|
String.raw`"([^"]|(\\"))*"`,
|
|
46
47
|
(s) => {
|
|
47
|
-
const text = s.text
|
|
48
|
+
const text = s.text.substring(1, s.text.length - 1);
|
|
48
49
|
// replace all escape characters with their actual characters
|
|
49
50
|
text.replaceAll('\\"', '"');
|
|
50
|
-
return new TypstToken(TypstTokenType.TEXT, text);
|
|
51
|
+
return ScanResult.Accepted(new TypstToken(TypstTokenType.TEXT, text));
|
|
51
52
|
}
|
|
52
53
|
],
|
|
53
54
|
[
|
|
54
55
|
REGEX_SHORTHANDS,
|
|
55
56
|
(s) => {
|
|
56
|
-
const shorthand = s.text
|
|
57
|
+
const shorthand = s.text;
|
|
57
58
|
const symbol = reverseShorthandMap.get(shorthand)!;
|
|
58
|
-
return new TypstToken(TypstTokenType.SYMBOL, symbol);
|
|
59
|
+
return ScanResult.Accepted(new TypstToken(TypstTokenType.SYMBOL, symbol));
|
|
59
60
|
}
|
|
60
61
|
],
|
|
61
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
62
|
-
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
63
|
-
[
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
62
|
+
[ String.raw`[0-9]+(\.[0-9]+)?`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text)) ],
|
|
63
|
+
[ String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text)) ],
|
|
64
|
+
[
|
|
65
|
+
String.raw`#h\((.+?)\)`,
|
|
66
|
+
(s) => {
|
|
67
|
+
const match = s.reMatchArray;
|
|
68
|
+
return ScanResult.Accepted([
|
|
69
|
+
new TypstToken(TypstTokenType.SYMBOL, "#h"),
|
|
70
|
+
new TypstToken(TypstTokenType.ELEMENT, "("),
|
|
71
|
+
new TypstToken(TypstTokenType.LITERAL, match[1]),
|
|
72
|
+
new TypstToken(TypstTokenType.ELEMENT, ")"),
|
|
73
|
+
]);
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
[String.raw`#none`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NONE, s.text))],
|
|
77
|
+
[
|
|
78
|
+
String.raw`#none`,
|
|
79
|
+
(s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NONE, s.text)),
|
|
80
|
+
],
|
|
73
81
|
[String.raw`#?[a-zA-Z\.]+`, (s) => {
|
|
74
|
-
return new TypstToken(s.text
|
|
82
|
+
return ScanResult.Accepted(new TypstToken(s.text.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text));
|
|
75
83
|
}],
|
|
76
|
-
[String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
84
|
+
[String.raw`.`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
77
85
|
]);
|
|
78
86
|
|
|
79
|
-
const spec = {
|
|
80
|
-
"start": rules_map
|
|
81
|
-
};
|
|
82
|
-
|
|
83
87
|
export function tokenize_typst(input: string): TypstToken[] {
|
|
84
|
-
const lexer = new JSLex<TypstToken>(
|
|
88
|
+
const lexer = new JSLex<TypstToken>(rules_map);
|
|
85
89
|
return lexer.collect(input);
|
|
86
90
|
}
|