@ansi-tools/parser 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,191 +0,0 @@
1
- import { BACKSLASH, CSI, CSI_OPEN, ESC, OSC, OSC_OPEN, STRING_OPENERS, TOKEN_TYPES } from "./constants.ts";
2
- import type { TOKEN, CODE } from "./types.ts";
3
- import { parser } from "./parse.ts";
4
-
5
- type State = "GROUND" | "SEQUENCE";
6
-
7
- const debug = false;
8
-
9
- const CSI_ESCAPED = "\\u009b";
10
-
11
- const INTRODUCERS = [
12
- ["\\u001b", 6],
13
- [CSI_ESCAPED, 6],
14
- ["\\x1b", 4],
15
- ["\\033", 4],
16
- ["\\e", 2],
17
- ] as const;
18
-
19
- const INTRODUCER_LOOKUP = new Map<string, [string, number][]>();
20
- for (const [sequence, len] of INTRODUCERS) {
21
- const secondChar = sequence[1];
22
- if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
23
- INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
24
- }
25
-
26
- const STRING_TERMINATORS = new Map([
27
- ["\\x9c", 4],
28
- ["\\e\\\\", 4],
29
- ["\\x1b\\\\", 8],
30
- ]);
31
-
32
- const OSC_ONLY_TERMINATORS = new Map([
33
- ["\\a", 2],
34
- ["\\x07", 4],
35
- ["\\u0007", 6],
36
- ]);
37
-
38
- const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
39
- const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
40
- const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map(entry => entry[0][1]));
41
-
42
- function emit(token: TOKEN) {
43
- if (debug) console.log("token", token);
44
- return token;
45
- }
46
-
47
- export function* tokenizer(input: string): Generator<TOKEN> {
48
- let i = 0;
49
- let state: State = "GROUND";
50
- let currentCode: string | undefined;
51
-
52
- function setState(next: State, code?: string) {
53
- if (debug) console.log(`state ${state} → ${next}`);
54
- state = next;
55
- currentCode = code;
56
- }
57
-
58
- while (i < input.length) {
59
- if (state === "GROUND") {
60
- const textStart = i;
61
- while (i < input.length) {
62
- const backslashIndex = input.indexOf(BACKSLASH, i);
63
-
64
- if (backslashIndex === -1) {
65
- i = input.length;
66
- break;
67
- }
68
-
69
- const nextChar = input[backslashIndex + 1];
70
- if (nextChar && INTRODUCER_PEEK_AHEAD.has(nextChar)) {
71
- i = backslashIndex;
72
- break;
73
- } else {
74
- i = backslashIndex + 1;
75
- }
76
- }
77
-
78
- if (i > textStart) {
79
- yield emit({ type: TOKEN_TYPES.TEXT, pos: textStart, raw: input.substring(textStart, i) });
80
- }
81
-
82
- if (i < input.length) {
83
- const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
84
- if (candidates) {
85
- for (const [sequence, len] of candidates) {
86
- if (i + len <= input.length && input.substring(i, i + len) === sequence) {
87
- if (sequence === CSI_ESCAPED) {
88
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: CSI });
89
- i += len;
90
- setState("SEQUENCE", CSI);
91
- } else {
92
- const nextChar = input[i + len];
93
- if (nextChar === CSI_OPEN) {
94
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: CSI });
95
- i += len + 1;
96
- setState("SEQUENCE", CSI);
97
- } else if (nextChar === OSC_OPEN) {
98
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: OSC });
99
- i += len + 1;
100
- setState("SEQUENCE", OSC);
101
- } else if (STRING_OPENERS.has(nextChar)) {
102
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: nextChar });
103
- i += len + 1;
104
- setState("SEQUENCE", nextChar);
105
- } else if (nextChar && nextChar.charCodeAt(0) >= 0x20 && nextChar.charCodeAt(0) <= 0x2f) {
106
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence + nextChar, code: CSI });
107
- i += len + 1;
108
- setState("SEQUENCE", CSI);
109
- } else if (nextChar) {
110
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: ESC });
111
- i += len;
112
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i, raw: nextChar });
113
- i++;
114
- } else {
115
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: sequence, code: ESC });
116
- i += len;
117
- }
118
- }
119
- break;
120
- }
121
- }
122
- }
123
- }
124
- } else {
125
- let terminator = "";
126
- let terminatorPos = -1;
127
- const pos = i;
128
- const code = currentCode;
129
-
130
- while (!terminator && i < input.length) {
131
- const char = input[i];
132
- if (code === CSI) {
133
- const charCode = input.charCodeAt(i);
134
- if (charCode >= 0x40 && charCode < 0x7e) {
135
- terminator = char;
136
- terminatorPos = i;
137
- i++;
138
- }
139
- } else if (code) {
140
- if (char === BACKSLASH) {
141
- if (code === OSC) {
142
- for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) {
143
- if (i + len <= input.length) {
144
- const sequence = input.substring(i, i + len);
145
- if (OSC_ONLY_TERMINATORS.has(sequence)) {
146
- terminator = sequence;
147
- terminatorPos = i;
148
- i += len;
149
- break;
150
- }
151
- }
152
- }
153
- }
154
- if (!terminator) {
155
- for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) {
156
- if (i + len <= input.length) {
157
- const sequence = input.substring(i, i + len);
158
- if (STRING_TERMINATORS.has(sequence)) {
159
- terminator = sequence;
160
- terminatorPos = i;
161
- i += len;
162
- break;
163
- }
164
- }
165
- }
166
- }
167
- }
168
- }
169
-
170
- if (!terminator) {
171
- i++;
172
- }
173
- }
174
-
175
- if (terminatorPos > pos) {
176
- const data = input.substring(pos, terminatorPos);
177
- yield emit({ type: TOKEN_TYPES.DATA, pos, raw: data });
178
- }
179
-
180
- if (terminator) {
181
- yield emit({ type: TOKEN_TYPES.FINAL, pos: terminatorPos, raw: terminator });
182
- }
183
-
184
- setState("GROUND");
185
- }
186
- }
187
- }
188
-
189
- export function tokenize(input: string): TOKEN[] {
190
- return Array.from(tokenizer(input));
191
- }
@@ -1,118 +0,0 @@
1
- import assert from "node:assert/strict";
2
- import { test } from "node:test";
3
- import { tokenize } from "./tokenize.ts";
4
-
5
- test("unescaped (ESC)", () => {
6
- const input = "\x1b[2q";
7
- const tokens = tokenize(input);
8
- assert.deepEqual(tokens, [
9
- { type: "INTRODUCER", pos: 0, raw: "\x1b[", code: "\x9b" },
10
- { type: "DATA", pos: 2, raw: "2" },
11
- { type: "FINAL", pos: 3, raw: "q" },
12
- ]);
13
- });
14
-
15
- test("unescaped (ESC/2)", () => {
16
- const input = "\u001b[2q";
17
- const tokens = tokenize(input);
18
- assert.deepEqual(tokens, [
19
- { type: "INTRODUCER", pos: 0, raw: "\u001b[", code: "\x9b" },
20
- { type: "DATA", pos: 2, raw: "2" },
21
- { type: "FINAL", pos: 3, raw: "q" },
22
- ]);
23
- });
24
-
25
- test("unescaped (CSI)", () => {
26
- const input = "\u009b32mGreen text\u009b0m.";
27
- const tokens = tokenize(input);
28
- assert.deepEqual(tokens, [
29
- { type: "INTRODUCER", pos: 0, raw: "\u009b", code: "\x9b" },
30
- { type: "DATA", pos: 1, raw: "32" },
31
- { type: "FINAL", pos: 3, raw: "m" },
32
- { type: "TEXT", pos: 4, raw: "Green text" },
33
- { type: "INTRODUCER", pos: 14, raw: "\u009b", code: "\x9b" },
34
- { type: "DATA", pos: 15, raw: "0" },
35
- { type: "FINAL", pos: 16, raw: "m" },
36
- { type: "TEXT", pos: 17, raw: "." },
37
- ]);
38
- });
39
-
40
- test("unescaped (BEL)", () => {
41
- const input = "\x1b]0;title\x07";
42
- const tokens = tokenize(input);
43
- assert.deepEqual(tokens, [
44
- { type: "INTRODUCER", pos: 0, raw: "\x1b]", code: "\x9d" },
45
- { type: "DATA", pos: 2, raw: "0;title" },
46
- { type: "FINAL", pos: 9, raw: "\x07" },
47
- ]);
48
- });
49
-
50
- test("unescaped (ST - String Terminator)", () => {
51
- const input = "\x1b]0;title\x9c";
52
- const tokens = tokenize(input);
53
- assert.deepEqual(tokens, [
54
- { type: "INTRODUCER", pos: 0, raw: "\x1b]", code: "\x9d" },
55
- { type: "DATA", pos: 2, raw: "0;title" },
56
- { type: "FINAL", pos: 9, raw: "\x9c" },
57
- ]);
58
- });
59
-
60
- test("unescaped (OSC - Operating System Command)", () => {
61
- const input = "\x9d0;title\x07";
62
- const tokens = tokenize(input);
63
- assert.deepEqual(tokens, [
64
- { type: "INTRODUCER", pos: 0, raw: "\x9d", code: "\x9d" },
65
- { type: "DATA", pos: 1, raw: "0;title" },
66
- { type: "FINAL", pos: 8, raw: "\x07" },
67
- ]);
68
- });
69
-
70
- test("unescaped (DCS - Device Control String)", () => {
71
- const input = "\x900;1|data\x9c";
72
- const tokens = tokenize(input);
73
- assert.deepEqual(tokens, [
74
- { type: "INTRODUCER", pos: 0, raw: "\x90", code: "\x90" },
75
- { type: "DATA", pos: 1, raw: "0;1|data" },
76
- { type: "FINAL", pos: 9, raw: "\x9c" },
77
- ]);
78
- });
79
-
80
- test("unescaped (APC - Application Program Command)", () => {
81
- const input = "\x9fapp data\x9c";
82
- const tokens = tokenize(input);
83
- assert.deepEqual(tokens, [
84
- { type: "INTRODUCER", pos: 0, raw: "\x9f", code: "\x9f" },
85
- { type: "DATA", pos: 1, raw: "app data" },
86
- { type: "FINAL", pos: 9, raw: "\x9c" },
87
- ]);
88
- });
89
-
90
- test("unescaped (PM - Privacy Message)", () => {
91
- const input = "\x9eprivacy data\x9c";
92
- const tokens = tokenize(input);
93
- assert.deepEqual(tokens, [
94
- { type: "INTRODUCER", pos: 0, raw: "\x9e", code: "\x9e" },
95
- { type: "DATA", pos: 1, raw: "privacy data" },
96
- { type: "FINAL", pos: 13, raw: "\x9c" },
97
- ]);
98
- });
99
-
100
- test("unescaped (SOS - Start of String)", () => {
101
- const input = "\x98string data\x9c";
102
- const tokens = tokenize(input);
103
- assert.deepEqual(tokens, [
104
- { type: "INTRODUCER", pos: 0, raw: "\x98", code: "\x98" },
105
- { type: "DATA", pos: 1, raw: "string data" },
106
- { type: "FINAL", pos: 12, raw: "\x9c" },
107
- ]);
108
- });
109
-
110
- test("unescaped (ESC with backslash terminator)", () => {
111
- const input = "\x1b_payload\x1b\\";
112
- const tokens = tokenize(input);
113
- assert.deepEqual(tokens, [
114
- { type: "INTRODUCER", pos: 0, raw: "\x1b_", code: "_" },
115
- { type: "DATA", pos: 2, raw: "payload" },
116
- { type: "FINAL", pos: 9, raw: "\x1b\\" },
117
- ]);
118
- });
package/src/tokenize.ts DELETED
@@ -1,140 +0,0 @@
1
- import {
2
- APC,
3
- BACKSLASH,
4
- BELL,
5
- CSI,
6
- CSI_OPEN,
7
- DCS,
8
- ESC,
9
- OSC,
10
- OSC_OPEN,
11
- PM,
12
- SOS,
13
- ST,
14
- STRING_OPENERS,
15
- TOKEN_TYPES,
16
- } from "./constants.ts";
17
- import type { TOKEN } from "./types.ts";
18
-
19
- type State = "GROUND" | "SEQUENCE";
20
-
21
- const debug = false;
22
-
23
- const INTRODUCERS = new Set([ESC, CSI, OSC, DCS, APC, PM, SOS]);
24
-
25
- function emit(token: TOKEN) {
26
- if (debug) console.log("token", token);
27
- return token;
28
- }
29
-
30
- export function* tokenizer(input: string): Generator<TOKEN> {
31
- let i = 0;
32
- let state: State = "GROUND";
33
- let currentCode: string | undefined;
34
-
35
- function setState(next: State, code?: string) {
36
- if (debug) console.log(`state ${state} → ${next}`);
37
- state = next;
38
- currentCode = code;
39
- }
40
-
41
- while (i < input.length) {
42
- if (state === "GROUND") {
43
- const textStart = i;
44
- while (i < input.length) {
45
- const char = input[i];
46
- if (INTRODUCERS.has(char)) {
47
- break;
48
- }
49
- i++;
50
- }
51
-
52
- if (i > textStart) {
53
- yield emit({ type: TOKEN_TYPES.TEXT, pos: textStart, raw: input.substring(textStart, i) });
54
- }
55
-
56
- if (i < input.length) {
57
- const char = input[i];
58
- if (char === CSI || char === OSC || char === DCS || char === APC || char === PM || char === SOS) {
59
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char, code: char });
60
- i++;
61
- setState("SEQUENCE", char);
62
- } else if (char === ESC) {
63
- const next = input[i + 1];
64
- if (next === CSI_OPEN) {
65
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char + next, code: CSI });
66
- i += 2;
67
- setState("SEQUENCE", CSI);
68
- } else if (next === OSC_OPEN) {
69
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char + next, code: OSC });
70
- i += 2;
71
- setState("SEQUENCE", OSC);
72
- } else if (STRING_OPENERS.has(next)) {
73
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char + next, code: next });
74
- i += 2;
75
- setState("SEQUENCE", next);
76
- } else if (next && next.charCodeAt(0) >= 0x20 && next.charCodeAt(0) <= 0x2f) {
77
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char, code: ESC });
78
- i += 1;
79
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i, raw: next });
80
- i++;
81
- } else if (next) {
82
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char, code: ESC });
83
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i + 1, raw: next });
84
- i += 2;
85
- } else {
86
- yield emit({ type: TOKEN_TYPES.INTRODUCER, pos: i, raw: char, code: ESC });
87
- i++;
88
- }
89
- }
90
- }
91
- } else {
92
- const pos = i;
93
- const code = currentCode;
94
- let data = "";
95
-
96
- if (code === CSI) {
97
- while (i < input.length) {
98
- const char = input[i];
99
- const charCode = char.charCodeAt(0);
100
- if (charCode >= 0x40 && charCode < 0x7e) {
101
- if (data) yield emit({ type: TOKEN_TYPES.DATA, pos, raw: data });
102
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i, raw: char });
103
- i++;
104
- break;
105
- }
106
- data += char;
107
- i++;
108
- }
109
- } else if (code) {
110
- while (i < input.length) {
111
- const char = input[i];
112
- let terminator: string | undefined;
113
-
114
- if (char === ST) {
115
- terminator = ST;
116
- } else if (char === BELL && code === OSC) {
117
- terminator = BELL;
118
- } else if (char === ESC && input[i + 1] === BACKSLASH) {
119
- terminator = ESC + BACKSLASH;
120
- }
121
-
122
- if (terminator) {
123
- if (data) yield emit({ type: TOKEN_TYPES.DATA, pos, raw: data });
124
- yield emit({ type: TOKEN_TYPES.FINAL, pos: i, raw: terminator });
125
- i += terminator.length;
126
- break;
127
- }
128
-
129
- data += char;
130
- i++;
131
- }
132
- }
133
- setState("GROUND");
134
- }
135
- }
136
- }
137
-
138
- export function tokenize(input: string): TOKEN[] {
139
- return Array.from(tokenizer(input));
140
- }
package/src/types.ts DELETED
@@ -1,24 +0,0 @@
1
- import type { TOKEN_TYPES } from "./constants.ts";
2
-
3
- export type TOKEN = {
4
- type: keyof typeof TOKEN_TYPES;
5
- pos: number;
6
- raw: string;
7
- code?: string;
8
- };
9
-
10
- export type CONTROL_CODE = {
11
- type: "CSI" | "DCS" | "DEC" | "ESC" | "OSC" | "SGR" | "STRING" | "PRIVATE";
12
- command: string;
13
- raw: string;
14
- params: string[];
15
- pos: number;
16
- };
17
-
18
- export type CONTROL_CODE_TEXT = {
19
- type: "TEXT";
20
- raw: string;
21
- pos: number;
22
- };
23
-
24
- export type CODE = CONTROL_CODE | CONTROL_CODE_TEXT;
package/tsconfig.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "allowImportingTsExtensions": true,
4
- "esModuleInterop": true,
5
- "forceConsistentCasingInFileNames": true,
6
- "module": "nodenext",
7
- "moduleResolution": "nodenext",
8
- "noEmit": true,
9
- "outDir": "./dist",
10
- "skipLibCheck": true,
11
- "strict": true,
12
- "target": "esnext"
13
- },
14
- "include": ["src"],
15
- "exclude": ["node_modules", "dist"]
16
- }