@ansi-tools/parser 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,410 +0,0 @@
1
- import assert from "node:assert/strict";
2
- import { test } from "node:test";
3
- import { tokenizer } from "./tokenize.escaped.ts";
4
- import type { TOKEN } from "./types.ts";
5
-
6
- test("empty", () => {
7
- const input = "";
8
- const tokens: TOKEN[] = [...tokenizer(input)];
9
- assert.deepEqual(tokens, []);
10
- });
11
-
12
- test("text", () => {
13
- const input = "Hello, world!";
14
- const tokens: TOKEN[] = [...tokenizer(input)];
15
- assert.deepEqual(tokens, [{ type: "TEXT", pos: 0, raw: "Hello, world!" }]);
16
- });
17
-
18
- test("CSI", () => {
19
- const input = String.raw`\x1b[31m`;
20
- const tokens: TOKEN[] = [...tokenizer(input)];
21
- assert.deepEqual(tokens, [
22
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
23
- { type: "DATA", pos: 5, raw: "31" },
24
- { type: "FINAL", pos: 7, raw: "m" },
25
- ]);
26
- });
27
-
28
- test("OSC", () => {
29
- const input = String.raw`\x1b]0;title\x07`;
30
- const tokens: TOKEN[] = [...tokenizer(input)];
31
- assert.deepEqual(tokens, [
32
- { type: "INTRODUCER", pos: 0, raw: "\\x1b]", code: "\x9d" },
33
- { type: "DATA", pos: 5, raw: "0;title" },
34
- { type: "FINAL", pos: 12, raw: "\\x07" },
35
- ]);
36
- });
37
-
38
- test("multiple", () => {
39
- const input = String.raw`\x1b[31m\x1b]0;title\x07`;
40
- const tokens: TOKEN[] = [...tokenizer(input)];
41
- assert.deepEqual(tokens, [
42
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
43
- { type: "DATA", pos: 5, raw: "31" },
44
- { type: "FINAL", pos: 7, raw: "m" },
45
- { type: "INTRODUCER", pos: 8, raw: "\\x1b]", code: "\x9d" },
46
- { type: "DATA", pos: 13, raw: "0;title" },
47
- { type: "FINAL", pos: 20, raw: "\\x07" },
48
- ]);
49
- });
50
-
51
- test("mixed", () => {
52
- const input = String.raw`Hello, \x1b[31mworld\x1b]0;title\x07!`;
53
- const tokens: TOKEN[] = [...tokenizer(input)];
54
- assert.deepEqual(tokens, [
55
- { type: "TEXT", pos: 0, raw: "Hello, " },
56
- { type: "INTRODUCER", pos: 7, raw: "\\x1b[", code: "\x9b" },
57
- { type: "DATA", pos: 12, raw: "31" },
58
- { type: "FINAL", pos: 14, raw: "m" },
59
- { type: "TEXT", pos: 15, raw: "world" },
60
- { type: "INTRODUCER", pos: 20, raw: "\\x1b]", code: "\x9d" },
61
- { type: "DATA", pos: 25, raw: "0;title" },
62
- { type: "FINAL", pos: 32, raw: "\\x07" },
63
- { type: "TEXT", pos: 36, raw: "!" },
64
- ]);
65
- });
66
-
67
- test("colors (rgb)", () => {
68
- const input = String.raw`\x1b[38;2;255;255;0mH\x1b[0;1;3;35me\x1b[95ml\x1b[42ml\x1b[0;41mo\x1b[0m`;
69
- const tokens: TOKEN[] = [...tokenizer(input)];
70
- assert.deepEqual(tokens, [
71
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
72
- { type: "DATA", pos: 5, raw: "38;2;255;255;0" },
73
- { type: "FINAL", pos: 19, raw: "m" },
74
- { type: "TEXT", pos: 20, raw: "H" },
75
- { type: "INTRODUCER", pos: 21, raw: "\\x1b[", code: "\x9b" },
76
- { type: "DATA", pos: 26, raw: "0;1;3;35" },
77
- { type: "FINAL", pos: 34, raw: "m" },
78
- { type: "TEXT", pos: 35, raw: "e" },
79
- { type: "INTRODUCER", pos: 36, raw: "\\x1b[", code: "\x9b" },
80
- { type: "DATA", pos: 41, raw: "95" },
81
- { type: "FINAL", pos: 43, raw: "m" },
82
- { type: "TEXT", pos: 44, raw: "l" },
83
- { type: "INTRODUCER", pos: 45, raw: "\\x1b[", code: "\x9b" },
84
- { type: "DATA", pos: 50, raw: "42" },
85
- { type: "FINAL", pos: 52, raw: "m" },
86
- { type: "TEXT", pos: 53, raw: "l" },
87
- { type: "INTRODUCER", pos: 54, raw: "\\x1b[", code: "\x9b" },
88
- { type: "DATA", pos: 59, raw: "0;41" },
89
- { type: "FINAL", pos: 63, raw: "m" },
90
- { type: "TEXT", pos: 64, raw: "o" },
91
- { type: "INTRODUCER", pos: 65, raw: "\\x1b[", code: "\x9b" },
92
- { type: "DATA", pos: 70, raw: "0" },
93
- { type: "FINAL", pos: 71, raw: "m" },
94
- ]);
95
- });
96
-
97
- test("colors", () => {
98
- const input = String.raw`\u001b[31mRed\u001b[39m, \u001b[32mgreen\u001b[39m, and \u001b[44mblue background\u001b[49m.`;
99
- const tokens: TOKEN[] = [...tokenizer(input)];
100
- assert.deepEqual(tokens, [
101
- { type: "INTRODUCER", pos: 0, raw: "\\u001b[", code: "\x9b" },
102
- { type: "DATA", pos: 7, raw: "31" },
103
- { type: "FINAL", pos: 9, raw: "m" },
104
- { type: "TEXT", pos: 10, raw: "Red" },
105
- { type: "INTRODUCER", pos: 13, raw: "\\u001b[", code: "\x9b" },
106
- { type: "DATA", pos: 20, raw: "39" },
107
- { type: "FINAL", pos: 22, raw: "m" },
108
- { type: "TEXT", pos: 23, raw: ", " },
109
- { type: "INTRODUCER", pos: 25, raw: "\\u001b[", code: "\x9b" },
110
- { type: "DATA", pos: 32, raw: "32" },
111
- { type: "FINAL", pos: 34, raw: "m" },
112
- { type: "TEXT", pos: 35, raw: "green" },
113
- { type: "INTRODUCER", pos: 40, raw: "\\u001b[", code: "\x9b" },
114
- { type: "DATA", pos: 47, raw: "39" },
115
- { type: "FINAL", pos: 49, raw: "m" },
116
- { type: "TEXT", pos: 50, raw: ", and " },
117
- { type: "INTRODUCER", pos: 56, raw: "\\u001b[", code: "\x9b" },
118
- { type: "DATA", pos: 63, raw: "44" },
119
- { type: "FINAL", pos: 65, raw: "m" },
120
- { type: "TEXT", pos: 66, raw: "blue background" },
121
- { type: "INTRODUCER", pos: 81, raw: "\\u001b[", code: "\x9b" },
122
- { type: "DATA", pos: 88, raw: "49" },
123
- { type: "FINAL", pos: 90, raw: "m" },
124
- { type: "TEXT", pos: 91, raw: "." },
125
- ]);
126
- });
127
-
128
- test("cursor", () => {
129
- const input = String.raw`\x1b[3A\x1b[4D\x1b[shello\x1b[J\x1b[1;3Hworld\x1b[u\x1b[13T`;
130
- const tokens: TOKEN[] = [...tokenizer(input)];
131
- assert.deepEqual(tokens, [
132
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
133
- { type: "DATA", pos: 5, raw: "3" },
134
- { type: "FINAL", pos: 6, raw: "A" },
135
- { type: "INTRODUCER", pos: 7, raw: "\\x1b[", code: "\x9b" },
136
- { type: "DATA", pos: 12, raw: "4" },
137
- { type: "FINAL", pos: 13, raw: "D" },
138
- { type: "INTRODUCER", pos: 14, raw: "\\x1b[", code: "\x9b" },
139
- { type: "FINAL", pos: 19, raw: "s" },
140
- { type: "TEXT", pos: 20, raw: "hello" },
141
- { type: "INTRODUCER", pos: 25, raw: "\\x1b[", code: "\x9b" },
142
- { type: "FINAL", pos: 30, raw: "J" },
143
- { type: "INTRODUCER", pos: 31, raw: "\\x1b[", code: "\x9b" },
144
- { type: "DATA", pos: 36, raw: "1;3" },
145
- { type: "FINAL", pos: 39, raw: "H" },
146
- { type: "TEXT", pos: 40, raw: "world" },
147
- { type: "INTRODUCER", pos: 45, raw: "\\x1b[", code: "\x9b" },
148
- { type: "FINAL", pos: 50, raw: "u" },
149
- { type: "INTRODUCER", pos: 51, raw: "\\x1b[", code: "\x9b" },
150
- { type: "DATA", pos: 56, raw: "13" },
151
- { type: "FINAL", pos: 58, raw: "T" },
152
- ]);
153
- });
154
-
155
- test("mixed", () => {
156
- const input = String.raw`\x1b[A\r\x1b[K\x1b[1;32mOpened \x1b[1;4;34m%s\x1b[0;1;32m in your browser.\x1b[0m\n\n⭐ → ✨\n\n這裡有一些中文文字。\n\nThe End.`;
157
- const tokens: TOKEN[] = [...tokenizer(input)];
158
- assert.deepEqual(tokens, [
159
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
160
- { type: "FINAL", pos: 5, raw: "A" },
161
- { type: "TEXT", pos: 6, raw: "\\r" },
162
- { type: "INTRODUCER", pos: 8, raw: "\\x1b[", code: "\x9b" },
163
- { type: "FINAL", pos: 13, raw: "K" },
164
- { type: "INTRODUCER", pos: 14, raw: "\\x1b[", code: "\x9b" },
165
- { type: "DATA", pos: 19, raw: "1;32" },
166
- { type: "FINAL", pos: 23, raw: "m" },
167
- { type: "TEXT", pos: 24, raw: "Opened " },
168
- { type: "INTRODUCER", pos: 31, raw: "\\x1b[", code: "\x9b" },
169
- { type: "DATA", pos: 36, raw: "1;4;34" },
170
- { type: "FINAL", pos: 42, raw: "m" },
171
- { type: "TEXT", pos: 43, raw: "%s" },
172
- { type: "INTRODUCER", pos: 45, raw: "\\x1b[", code: "\x9b" },
173
- { type: "DATA", pos: 50, raw: "0;1;32" },
174
- { type: "FINAL", pos: 56, raw: "m" },
175
- { type: "TEXT", pos: 57, raw: " in your browser." },
176
- { type: "INTRODUCER", pos: 74, raw: "\\x1b[", code: "\x9b" },
177
- { type: "DATA", pos: 79, raw: "0" },
178
- { type: "FINAL", pos: 80, raw: "m" },
179
- { type: "TEXT", pos: 81, raw: "\\n\\n⭐ → ✨\\n\\n這裡有一些中文文字。\\n\\nThe End." },
180
- ]);
181
- });
182
-
183
- test("styles", () => {
184
- const input = String.raw`\u001b[1mBold\u001b[22m, \u001b[3mItalic\u001b[23m, \u001b[4mUnderline\u001b[24m, and \u001b[9mStrikethrough\u001b[29m.`;
185
- const tokens: TOKEN[] = [...tokenizer(input)];
186
- assert.deepEqual(tokens, [
187
- { type: "INTRODUCER", pos: 0, raw: "\\u001b[", code: "\x9b" },
188
- { type: "DATA", pos: 7, raw: "1" },
189
- { type: "FINAL", pos: 8, raw: "m" },
190
- { type: "TEXT", pos: 9, raw: "Bold" },
191
- { type: "INTRODUCER", pos: 13, raw: "\\u001b[", code: "\x9b" },
192
- { type: "DATA", pos: 20, raw: "22" },
193
- { type: "FINAL", pos: 22, raw: "m" },
194
- { type: "TEXT", pos: 23, raw: ", " },
195
- { type: "INTRODUCER", pos: 25, raw: "\\u001b[", code: "\x9b" },
196
- { type: "DATA", pos: 32, raw: "3" },
197
- { type: "FINAL", pos: 33, raw: "m" },
198
- { type: "TEXT", pos: 34, raw: "Italic" },
199
- { type: "INTRODUCER", pos: 40, raw: "\\u001b[", code: "\x9b" },
200
- { type: "DATA", pos: 47, raw: "23" },
201
- { type: "FINAL", pos: 49, raw: "m" },
202
- { type: "TEXT", pos: 50, raw: ", " },
203
- { type: "INTRODUCER", pos: 52, raw: "\\u001b[", code: "\x9b" },
204
- { type: "DATA", pos: 59, raw: "4" },
205
- { type: "FINAL", pos: 60, raw: "m" },
206
- { type: "TEXT", pos: 61, raw: "Underline" },
207
- { type: "INTRODUCER", pos: 70, raw: "\\u001b[", code: "\x9b" },
208
- { type: "DATA", pos: 77, raw: "24" },
209
- { type: "FINAL", pos: 79, raw: "m" },
210
- { type: "TEXT", pos: 80, raw: ", and " },
211
- { type: "INTRODUCER", pos: 86, raw: "\\u001b[", code: "\x9b" },
212
- { type: "DATA", pos: 93, raw: "9" },
213
- { type: "FINAL", pos: 94, raw: "m" },
214
- { type: "TEXT", pos: 95, raw: "Strikethrough" },
215
- { type: "INTRODUCER", pos: 108, raw: "\\u001b[", code: "\x9b" },
216
- { type: "DATA", pos: 115, raw: "29" },
217
- { type: "FINAL", pos: 117, raw: "m" },
218
- { type: "TEXT", pos: 118, raw: "." },
219
- ]);
220
- });
221
-
222
- test("commands", () => {
223
- const input = String.raw`\u001bc\u001b[2J\u001b[3J\u001b[?25l\u001b]0;Set Title\u0007An example of terminal commands.`;
224
- const tokens: TOKEN[] = [...tokenizer(input)];
225
- assert.deepEqual(tokens, [
226
- { type: "INTRODUCER", pos: 0, raw: "\\u001b", code: "\x1b" },
227
- { type: "FINAL", pos: 6, raw: "c" },
228
- { type: "INTRODUCER", pos: 7, raw: "\\u001b[", code: "\x9b" },
229
- { type: "DATA", pos: 14, raw: "2" },
230
- { type: "FINAL", pos: 15, raw: "J" },
231
- { type: "INTRODUCER", pos: 16, raw: "\\u001b[", code: "\x9b" },
232
- { type: "DATA", pos: 23, raw: "3" },
233
- { type: "FINAL", pos: 24, raw: "J" },
234
- { type: "INTRODUCER", pos: 25, raw: "\\u001b[", code: "\x9b" },
235
- { type: "DATA", pos: 32, raw: "?25" },
236
- { type: "FINAL", pos: 35, raw: "l" },
237
- { type: "INTRODUCER", pos: 36, raw: "\\u001b]", code: "\x9d" },
238
- { type: "DATA", pos: 43, raw: "0;Set Title" },
239
- { type: "FINAL", pos: 54, raw: "\\u0007" },
240
- { type: "TEXT", pos: 60, raw: "An example of terminal commands." },
241
- ]);
242
- });
243
-
244
- test("8-bit", () => {
245
- const input = String.raw`\u009b32mGreen text\u009b0m.`;
246
- const tokens: TOKEN[] = [...tokenizer(input)];
247
- assert.deepEqual(tokens, [
248
- { type: "INTRODUCER", pos: 0, raw: "\\u009b", code: "\x9b" },
249
- { type: "DATA", pos: 6, raw: "32" },
250
- { type: "FINAL", pos: 8, raw: "m" },
251
- { type: "TEXT", pos: 9, raw: "Green text" },
252
- { type: "INTRODUCER", pos: 19, raw: "\\u009b", code: "\x9b" },
253
- { type: "DATA", pos: 25, raw: "0" },
254
- { type: "FINAL", pos: 26, raw: "m" },
255
- { type: "TEXT", pos: 27, raw: "." },
256
- ]);
257
- });
258
-
259
- test("OSC terminator", () => {
260
- const input = String.raw`\x1b]0;title\x1b\\`;
261
- const tokens: TOKEN[] = [...tokenizer(input)];
262
- assert.deepEqual(tokens, [
263
- { type: "INTRODUCER", pos: 0, raw: "\\x1b]", code: "\x9d" },
264
- { type: "DATA", pos: 5, raw: "0;title" },
265
- { type: "FINAL", pos: 12, raw: "\\x1b\\\\" },
266
- ]);
267
- });
268
-
269
- test("Set G1 Charset to UK", () => {
270
- const input = String.raw`\x1b(Ab`;
271
- const tokens: TOKEN[] = [...tokenizer(input)];
272
- assert.deepEqual(tokens, [
273
- { type: "INTRODUCER", pos: 0, raw: "\\x1b(", code: "\x9b" },
274
- { type: "FINAL", pos: 5, raw: "A" },
275
- { type: "TEXT", pos: 6, raw: "b" },
276
- ]);
277
- });
278
-
279
- test("Set G1 Charset to UK (no intermediate)", () => {
280
- const input = String.raw`\x1b)B`;
281
- const tokens: TOKEN[] = [...tokenizer(input)];
282
- assert.deepEqual(tokens, [
283
- { type: "INTRODUCER", pos: 0, raw: "\\x1b)", code: "\x9b" },
284
- { type: "FINAL", pos: 5, raw: "B" },
285
- ]);
286
- });
287
-
288
- test("Select UTF-8 character set", () => {
289
- const input = String.raw`\x1b%G`;
290
- const tokens: TOKEN[] = [...tokenizer(input)];
291
- assert.deepEqual(tokens, [
292
- { type: "INTRODUCER", pos: 0, raw: "\\x1b%", code: "\x9b" },
293
- { type: "FINAL", pos: 5, raw: "G" },
294
- ]);
295
- });
296
-
297
- test("DEC Private Mode - Hide Cursor", () => {
298
- const input = String.raw`\x1b[?25l`;
299
- const tokens: TOKEN[] = [...tokenizer(input)];
300
- assert.deepEqual(tokens, [
301
- { type: "INTRODUCER", pos: 0, raw: "\\x1b[", code: "\x9b" },
302
- { type: "DATA", pos: 5, raw: "?25" },
303
- { type: "FINAL", pos: 8, raw: "l" },
304
- ]);
305
- });
306
-
307
- test("Simple ESC", () => {
308
- const input = String.raw`\x1bc`;
309
- const tokens: TOKEN[] = [...tokenizer(input)];
310
- assert.deepEqual(tokens, [
311
- { type: "INTRODUCER", pos: 0, raw: "\\x1b", code: "\x1b" },
312
- { type: "FINAL", pos: 4, raw: "c" },
313
- ]);
314
- });
315
-
316
- test("iTerm2 SetUserVar", () => {
317
- const input = String.raw`\x1b]1337;SetUserVar=foo=YmFy\x07`;
318
- const tokens: TOKEN[] = [...tokenizer(input)];
319
- assert.deepEqual(tokens, [
320
- { type: "INTRODUCER", pos: 0, raw: "\\x1b]", code: "\x9d" },
321
- { type: "DATA", pos: 5, raw: "1337;SetUserVar=foo=YmFy" },
322
- { type: "FINAL", pos: 29, raw: "\\x07" },
323
- ]);
324
- });
325
-
326
- test("DCS with \\e\\\\ terminator", () => {
327
- const input = String.raw`\x1bP0;1|name\e\\`;
328
- const tokens: TOKEN[] = [...tokenizer(input)];
329
- assert.deepEqual(tokens, [
330
- { type: "INTRODUCER", pos: 0, raw: "\\x1bP", code: "P" },
331
- { type: "DATA", pos: 5, raw: "0;1|name" },
332
- { type: "FINAL", pos: 13, raw: "\\e\\\\" },
333
- ]);
334
- });
335
-
336
- test("APC with \\e\\\\ terminator", () => {
337
- const input = String.raw`\x1b_some payload\e\\`;
338
- const tokens: TOKEN[] = [...tokenizer(input)];
339
- assert.deepEqual(tokens, [
340
- { type: "INTRODUCER", pos: 0, raw: "\\x1b_", code: "_" },
341
- { type: "DATA", pos: 5, raw: "some payload" },
342
- { type: "FINAL", pos: 17, raw: "\\e\\\\" },
343
- ]);
344
- });
345
-
346
- test("PM with \\e\\\\ terminator", () => {
347
- const input = String.raw`\x1b^privacy data\e\\`;
348
- const tokens: TOKEN[] = [...tokenizer(input)];
349
- assert.deepEqual(tokens, [
350
- { type: "INTRODUCER", pos: 0, raw: "\\x1b^", code: "^" },
351
- { type: "DATA", pos: 5, raw: "privacy data" },
352
- { type: "FINAL", pos: 17, raw: "\\e\\\\" },
353
- ]);
354
- });
355
-
356
- test("SOS with \\e\\\\ terminator", () => {
357
- const input = String.raw`\x1bXstring data\e\\`;
358
- const tokens: TOKEN[] = [...tokenizer(input)];
359
- assert.deepEqual(tokens, [
360
- { type: "INTRODUCER", pos: 0, raw: "\\x1bX", code: "X" },
361
- { type: "DATA", pos: 5, raw: "string data" },
362
- { type: "FINAL", pos: 16, raw: "\\e\\\\" },
363
- ]);
364
- });
365
-
366
- test("DCS with \\x1b\\\\ terminator", () => {
367
- const input = String.raw`\x1bP0;1|name\x1b\\`;
368
- const tokens: TOKEN[] = [...tokenizer(input)];
369
- assert.deepEqual(tokens, [
370
- { type: "INTRODUCER", pos: 0, raw: "\\x1bP", code: "P" },
371
- { type: "DATA", pos: 5, raw: "0;1|name" },
372
- { type: "FINAL", pos: 13, raw: "\\x1b\\\\" },
373
- ]);
374
- });
375
-
376
- test("APC with \\x1b\\\\ terminator", () => {
377
- const input = String.raw`\x1b_app data\x1b\\`;
378
- const tokens: TOKEN[] = [...tokenizer(input)];
379
- assert.deepEqual(tokens, [
380
- { type: "INTRODUCER", pos: 0, raw: "\\x1b_", code: "_" },
381
- { type: "DATA", pos: 5, raw: "app data" },
382
- { type: "FINAL", pos: 13, raw: "\\x1b\\\\" },
383
- ]);
384
- });
385
-
386
- test("OSC with \\e\\\\ terminator", () => {
387
- const input = String.raw`\x1b]0;window title\e\\`;
388
- const tokens: TOKEN[] = [...tokenizer(input)];
389
- assert.deepEqual(tokens, [
390
- { type: "INTRODUCER", pos: 0, raw: "\\x1b]", code: "\x9d" },
391
- { type: "DATA", pos: 5, raw: "0;window title" },
392
- { type: "FINAL", pos: 19, raw: "\\e\\\\" },
393
- ]);
394
- });
395
-
396
- test("Mixed string sequences with different terminators", () => {
397
- const input = String.raw`\x1b_app\e\\\x1bP0|data\x1b\\\x1b]0;title\x07`;
398
- const tokens: TOKEN[] = [...tokenizer(input)];
399
- assert.deepEqual(tokens, [
400
- { type: "INTRODUCER", pos: 0, raw: "\\x1b_", code: "_" },
401
- { type: "DATA", pos: 5, raw: "app" },
402
- { type: "FINAL", pos: 8, raw: "\\e\\\\" },
403
- { type: "INTRODUCER", pos: 12, raw: "\\x1bP", code: "P" },
404
- { type: "DATA", pos: 17, raw: "0|data" },
405
- { type: "FINAL", pos: 23, raw: "\\x1b\\\\" },
406
- { type: "INTRODUCER", pos: 29, raw: "\\x1b]", code: "\x9d" },
407
- { type: "DATA", pos: 34, raw: "0;title" },
408
- { type: "FINAL", pos: 41, raw: "\\x07" },
409
- ]);
410
- });
@@ -1,191 +0,0 @@
1
- import { BACKSLASH, CSI, CSI_OPEN, ESC, OSC, OSC_OPEN, STRING_OPENERS, TOKEN_TYPES } from "./constants.ts";
2
- import type { TOKEN, CODE } from "./types.ts";
3
- import { parser } from "./parse.ts";
4
-
5
- type State = "GROUND" | "SEQUENCE";
6
-
7
- const debug = false;
8
-
9
- const CSI_ESCAPED = "\\u009b";
10
-
11
- const INTRODUCERS = [
12
- ["\\u001b", 6],
13
- [CSI_ESCAPED, 6],
14
- ["\\x1b", 4],
15
- ["\\033", 4],
16
- ["\\e", 2],
17
- ] as const;
18
-
19
- const INTRODUCER_LOOKUP = new Map<string, [string, number][]>();
20
- for (const [sequence, len] of INTRODUCERS) {
21
- const secondChar = sequence[1];
22
- if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
23
- INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
24
- }
25
-
26
- const STRING_TERMINATORS = new Map([
27
- ["\\x9c", 4],
28
- ["\\e\\\\", 4],
29
- ["\\x1b\\\\", 8],
30
- ]);
31
-
32
- const OSC_ONLY_TERMINATORS = new Map([
33
- ["\\a", 2],
34
- ["\\x07", 4],
35
- ["\\u0007", 6],
36
- ]);
37
-
38
- const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
39
- const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
40
- const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map(entry => entry[0][1]));
41
-
42
- function emit(token: TOKEN) {
43
- if (debug) console.log("token", token);
44
- return token;
45
- }
46
-
47
- export function* tokenizer(input: string): Generator<TOKEN> {
48
- let i = 0;
49
- let state: State = "GROUND";
50
- let currentCode: string | undefined;
51
-
52
- function setState(next: State, code?: string) {
53
- if (debug) console.log(`state ${state} → ${next}`);
54
- state = next;
55
- currentCode = code;
56
- }
57
-
58
- while (i < input.length) {
59
- if (state === "GROUND") {
60
- const textStart = i;
61
- while (i < input.length) {
62
- const backslashIndex = input.indexOf(BACKSLASH, i);
63
-
64
- if (backslashIndex === -1) {
65
- i = input.length;
66
- break;
67
- }
68
-
69
- const nextChar = input[backslashIndex + 1];
70
- if (nextChar && INTRODUCER_PEEK_AHEAD.has(nextChar)) {
71
- i = backslashIndex;
72
- break;
73
- } else {
74
- i = backslashIndex + 1;
75
- }
76
- }
77
-
78
- if (i > textStart) {
79
- yield emit({ type: TOKEN_TYPES.TEXT, pos: textStart, raw: input.substring(textStart, i) });
80
- }
81
-
82
- if (i < input.length) {
83
- const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
84
- if (candidates) {
85
- for (const [sequence, len] of candidates) {
86
- if (i + len <= input.length && input.substring(i, i + len) === sequence) {
87
- if (sequence === CSI_ESCAPED) {
88
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: CSI });
89
- i += len;
90
- setState("SEQUENCE", CSI);
91
- } else {
92
- const nextChar = input[i + len];
93
- if (nextChar === CSI_OPEN) {
94
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: CSI });
95
- i += len + 1;
96
- setState("SEQUENCE", CSI);
97
- } else if (nextChar === OSC_OPEN) {
98
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: OSC });
99
- i += len + 1;
100
- setState("SEQUENCE", OSC);
101
- } else if (STRING_OPENERS.has(nextChar)) {
102
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: nextChar });
103
- i += len + 1;
104
- setState("SEQUENCE", nextChar);
105
- } else if (nextChar && nextChar.charCodeAt(0) >= 0x20 && nextChar.charCodeAt(0) <= 0x2f) {
106
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: CSI });
107
- i += len + 1;
108
- setState("SEQUENCE", CSI);
109
- } else if (nextChar) {
110
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: ESC });
111
- i += len;
112
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i, raw: nextChar });
113
- i++;
114
- } else {
115
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: ESC });
116
- i += len;
117
- }
118
- }
119
- break;
120
- }
121
- }
122
- }
123
- }
124
- } else {
125
- let terminator = "";
126
- let terminatorPos = -1;
127
- const pos = i;
128
- const code = currentCode;
129
-
130
- while (!terminator && i < input.length) {
131
- const char = input[i];
132
- if (code === CSI) {
133
- const charCode = input.charCodeAt(i);
134
- if (charCode >= 0x40 && charCode < 0x7e) {
135
- terminator = char;
136
- terminatorPos = i;
137
- i++;
138
- }
139
- } else if (code) {
140
- if (char === BACKSLASH) {
141
- if (code === OSC) {
142
- for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) {
143
- if (i + len <= input.length) {
144
- const sequence = input.substring(i, i + len);
145
- if (OSC_ONLY_TERMINATORS.has(sequence)) {
146
- terminator = sequence;
147
- terminatorPos = i;
148
- i += len;
149
- break;
150
- }
151
- }
152
- }
153
- }
154
- if (!terminator) {
155
- for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) {
156
- if (i + len <= input.length) {
157
- const sequence = input.substring(i, i + len);
158
- if (STRING_TERMINATORS.has(sequence)) {
159
- terminator = sequence;
160
- terminatorPos = i;
161
- i += len;
162
- break;
163
- }
164
- }
165
- }
166
- }
167
- }
168
- }
169
-
170
- if (!terminator) {
171
- i++;
172
- }
173
- }
174
-
175
- if (terminatorPos > pos) {
176
- const data = input.substring(pos, terminatorPos);
177
- yield emit({ type: TOKEN_TYPES.DATA, pos, raw: data });
178
- }
179
-
180
- if (terminator) {
181
- yield emit({ type: TOKEN_TYPES.FINAL, pos: terminatorPos, raw: terminator });
182
- }
183
-
184
- setState("GROUND");
185
- }
186
- }
187
- }
188
-
189
- export function tokenize(input: string): TOKEN[] {
190
- return Array.from(tokenizer(input));
191
- }