@ansi-tools/parser 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,187 @@
1
+ # @ansi-tools/parser
2
+
3
+ Parser for ANSI escape sequences.
4
+
5
+ ## Supported sequence types
6
+
7
+ - **CSI** (Control Sequence Introducer): `\x1b[...`
8
+ - **OSC** (Operating System Command): `\x1b]...`
9
+ - **DCS** (Device Control String): `\x1bP...`
10
+ - **ESC** (Escape): `\x1b...`
11
+ - **DEC** (DEC Private Mode): `\x1b[?...`
12
+ - **STRING** (APC/PM/SOS): `\x1b_...`, `\x1b^...`, `\x1bX...`
13
+ - **PRIVATE** (Private sequences): `\x1b[<...`, `\x1b[=...`, `\x1b[>...`
14
+
15
+ ## Features
16
+
17
+ - ✅ Handles 7-bit (`\x1b` or `\u001b`) and 8-bit (`\u009b`) introducers
18
+ - ✅ Handles octal (`\033`) and shorthand `\e` introducers (only escaped)
19
+ - ✅ Multiple string terminators (`\x1b\\`, `\x07`)
20
+ - ✅ Zero dependencies
21
+ - ✅ Separate optimized modules for raw and escaped input
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ npm install @ansi-tools/parser
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```ts
32
+ import { parse } from "@ansi-tools/parser";
33
+
34
+ const input = "\x1b[31mHello\x1b[0m World";
35
+
36
+ for (const code of parse(input)) {
37
+ console.log(code);
38
+ }
39
+ ```
40
+
41
+ There is a difference between escaped and unescaped input. Only with an escaped
42
+ input string the raw input and the positions can be preserved in the tokens and
43
+ control codes. See the example below for the default and the `/escaped` import.
44
+
45
+ The default and unescaped tokenization is roughly ~30% faster. Use this default
46
+ if you just need the control codes.
47
+
48
+ ## Examples
49
+
50
+ ### Default (raw/unescaped)
51
+
52
+ ```ts
53
+ import { parse } from "@ansi-tools/parser";
54
+
55
+ parse(`\x1b[31mHello\x1b[0m`);
56
+
57
+ // result:
58
+ [
59
+ {
60
+ type: "CSI",
61
+ pos: 0,
62
+ raw: "\u001b[31m",
63
+ command: "m",
64
+ params: ["31"],
65
+ },
66
+ {
67
+ type: "TEXT",
68
+ pos: 5,
69
+ raw: "Hello",
70
+ },
71
+ {
72
+ type: "CSI",
73
+ pos: 10,
74
+ raw: "\u001b[0m",
75
+ command: "m",
76
+ params: ["0"],
77
+ },
78
+ ];
79
+ ```
80
+
81
+ ### Escaped
82
+
83
+ ```ts
84
+ import { parse } from "@ansi-tools/parser/escaped";
85
+
86
+ parse(String.raw`\x1b[31mHello\x1b[0m`);
87
+
88
+ // result:
89
+ [
90
+ {
91
+ type: "CSI",
92
+ pos: 0,
93
+ raw: "\\x1b[31m",
94
+ command: "m",
95
+ params: ["31"],
96
+ },
97
+ {
98
+ type: "TEXT",
99
+ pos: 8,
100
+ raw: "Hello",
101
+ },
102
+ {
103
+ type: "CSI",
104
+ pos: 13,
105
+ raw: "\\x1b[0m",
106
+ command: "m",
107
+ params: ["0"],
108
+ },
109
+ ];
110
+ ```
111
+
112
+ ## Tokenizer & generators
113
+
114
+ The tokenizer and generators are also available, for both the default and the
115
+ `/escaped` versions.
116
+
117
+ ### tokenize
118
+
119
+ ```ts
120
+ import { tokenize } from "@ansi-tools/parser";
121
+
122
+ const input = "\x1b[31m";
123
+
124
+ for (const token of tokenize(input)) {
125
+ console.log(token);
126
+ }
127
+ ```
128
+
129
+ ### Generators
130
+
131
+ ```ts
132
+ import { tokenizer, parser } from "@ansi-tools/parser";
133
+
134
+ const input = "\x1b[31mHello\x1b[0m";
135
+
136
+ const tokens = tokenizer(input);
137
+
138
+ const codes = parser(tokens);
139
+
140
+ for (const code of codes) {
141
+ console.log(code);
142
+ }
143
+ ```
144
+
145
+ ## Type Definitions
146
+
147
+ ```ts
148
+ function parse(input: string): CODE[];
149
+ function tokenize(input: string): TOKEN[];
150
+ function* parser(tokens: Generator<TOKEN>): Generator<CODE>;
151
+ function* tokenizer(input: string): Generator<TOKEN>;
152
+ ```
153
+
154
+ ### CODE
155
+
156
+ ```ts
157
+ type CONTROL_CODE = {
158
+ type: "CSI" | "DCS" | "DEC" | "ESC" | "OSC" | "SGR" | "STRING" | "PRIVATE";
159
+ command: string;
160
+ raw: string;
161
+ params: string[];
162
+ pos: number;
163
+ };
164
+
165
+ type CONTROL_CODE_TEXT = {
166
+ type: "TEXT";
167
+ raw: string;
168
+ pos: number;
169
+ };
170
+
171
+ type CODE = CONTROL_CODE | CONTROL_CODE_TEXT;
172
+ ```
173
+
174
+ ### TOKEN
175
+
176
+ ```ts
177
+ type TOKEN = {
178
+ type: "INTRODUCER" | "DATA" | "FINAL" | "TEXT";
179
+ pos: number;
180
+ raw: string;
181
+ code?: string;
182
+ };
183
+ ```
184
+
185
+ ## License
186
+
187
+ ISC
@@ -0,0 +1,7 @@
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser } from "./parse-BirjVUvQ.js";
2
+
3
+ //#region src/tokenize.escaped.d.ts
4
+ declare function tokenizer(input: string): Generator<TOKEN>;
5
+ declare function tokenize(input: string): TOKEN[];
6
+ //#endregion
7
+ export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -0,0 +1,203 @@
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser } from "./parse-ClmKWMZx.js";
2
+
3
+ //#region src/tokenize.escaped.ts
4
+ const CSI_ESCAPED = "\\u009b";
5
+ const INTRODUCERS = [
6
+ ["\\u001b", 6],
7
+ [CSI_ESCAPED, 6],
8
+ ["\\x1b", 4],
9
+ ["\\033", 4],
10
+ ["\\e", 2]
11
+ ];
12
+ const INTRODUCER_LOOKUP = /* @__PURE__ */ new Map();
13
+ for (const [sequence, len] of INTRODUCERS) {
14
+ const secondChar = sequence[1];
15
+ if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
16
+ INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
17
+ }
18
+ const STRING_TERMINATORS = new Map([
19
+ ["\\x9c", 4],
20
+ ["\\e\\\\", 4],
21
+ ["\\x1b\\\\", 8]
22
+ ]);
23
+ const OSC_ONLY_TERMINATORS = new Map([
24
+ ["\\a", 2],
25
+ ["\\x07", 4],
26
+ ["\\u0007", 6]
27
+ ]);
28
+ const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
29
+ const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
30
+ const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map((entry) => entry[0][1]));
31
+ function emit(token) {
32
+ return token;
33
+ }
34
+ function* tokenizer(input) {
35
+ let i = 0;
36
+ let state = "GROUND";
37
+ let currentCode;
38
+ function setState(next, code) {
39
+ state = next;
40
+ currentCode = code;
41
+ }
42
+ while (i < input.length) if (state === "GROUND") {
43
+ const textStart = i;
44
+ while (i < input.length) {
45
+ const backslashIndex = input.indexOf(BACKSLASH, i);
46
+ if (backslashIndex === -1) {
47
+ i = input.length;
48
+ break;
49
+ }
50
+ const nextChar = input[backslashIndex + 1];
51
+ if (nextChar && INTRODUCER_PEEK_AHEAD.has(nextChar)) {
52
+ i = backslashIndex;
53
+ break;
54
+ } else i = backslashIndex + 1;
55
+ }
56
+ if (i > textStart) yield emit({
57
+ type: TOKEN_TYPES.TEXT,
58
+ pos: textStart,
59
+ raw: input.substring(textStart, i)
60
+ });
61
+ if (i < input.length) {
62
+ const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
63
+ if (candidates) {
64
+ for (const [sequence, len] of candidates) if (i + len <= input.length && input.substring(i, i + len) === sequence) {
65
+ if (sequence === CSI_ESCAPED) {
66
+ yield emit({
67
+ type: TOKEN_TYPES.INTRODUCER,
68
+ pos: i,
69
+ raw: sequence,
70
+ code: CSI
71
+ });
72
+ i += len;
73
+ setState("SEQUENCE", CSI);
74
+ } else {
75
+ const nextChar = input[i + len];
76
+ if (nextChar === CSI_OPEN) {
77
+ yield emit({
78
+ type: TOKEN_TYPES.INTRODUCER,
79
+ pos: i,
80
+ raw: sequence + nextChar,
81
+ code: CSI
82
+ });
83
+ i += len + 1;
84
+ setState("SEQUENCE", CSI);
85
+ } else if (nextChar === OSC_OPEN) {
86
+ yield emit({
87
+ type: TOKEN_TYPES.INTRODUCER,
88
+ pos: i,
89
+ raw: sequence + nextChar,
90
+ code: OSC
91
+ });
92
+ i += len + 1;
93
+ setState("SEQUENCE", OSC);
94
+ } else if (STRING_OPENERS.has(nextChar)) {
95
+ yield emit({
96
+ type: TOKEN_TYPES.INTRODUCER,
97
+ pos: i,
98
+ raw: sequence + nextChar,
99
+ code: nextChar
100
+ });
101
+ i += len + 1;
102
+ setState("SEQUENCE", nextChar);
103
+ } else if (nextChar && nextChar.charCodeAt(0) >= 32 && nextChar.charCodeAt(0) <= 47) {
104
+ yield emit({
105
+ type: TOKEN_TYPES.INTRODUCER,
106
+ pos: i,
107
+ raw: sequence + nextChar,
108
+ code: CSI
109
+ });
110
+ i += len + 1;
111
+ setState("SEQUENCE", CSI);
112
+ } else if (nextChar) {
113
+ yield emit({
114
+ type: TOKEN_TYPES.INTRODUCER,
115
+ pos: i,
116
+ raw: sequence,
117
+ code: ESC
118
+ });
119
+ i += len;
120
+ yield emit({
121
+ type: TOKEN_TYPES.FINAL,
122
+ pos: i,
123
+ raw: nextChar
124
+ });
125
+ i++;
126
+ } else {
127
+ yield emit({
128
+ type: TOKEN_TYPES.INTRODUCER,
129
+ pos: i,
130
+ raw: sequence,
131
+ code: ESC
132
+ });
133
+ i += len;
134
+ }
135
+ }
136
+ break;
137
+ }
138
+ }
139
+ }
140
+ } else {
141
+ let terminator = "";
142
+ let terminatorPos = -1;
143
+ const pos = i;
144
+ const code = currentCode;
145
+ while (!terminator && i < input.length) {
146
+ const char = input[i];
147
+ if (code === CSI) {
148
+ const charCode = input.charCodeAt(i);
149
+ if (charCode >= 64 && charCode < 126) {
150
+ terminator = char;
151
+ terminatorPos = i;
152
+ i++;
153
+ }
154
+ } else if (code) {
155
+ if (char === BACKSLASH) {
156
+ if (code === OSC) {
157
+ for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
158
+ const sequence = input.substring(i, i + len);
159
+ if (OSC_ONLY_TERMINATORS.has(sequence)) {
160
+ terminator = sequence;
161
+ terminatorPos = i;
162
+ i += len;
163
+ break;
164
+ }
165
+ }
166
+ }
167
+ if (!terminator) {
168
+ for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
169
+ const sequence = input.substring(i, i + len);
170
+ if (STRING_TERMINATORS.has(sequence)) {
171
+ terminator = sequence;
172
+ terminatorPos = i;
173
+ i += len;
174
+ break;
175
+ }
176
+ }
177
+ }
178
+ }
179
+ }
180
+ if (!terminator) i++;
181
+ }
182
+ if (terminatorPos > pos) {
183
+ const data = input.substring(pos, terminatorPos);
184
+ yield emit({
185
+ type: TOKEN_TYPES.DATA,
186
+ pos,
187
+ raw: data
188
+ });
189
+ }
190
+ if (terminator) yield emit({
191
+ type: TOKEN_TYPES.FINAL,
192
+ pos: terminatorPos,
193
+ raw: terminator
194
+ });
195
+ setState("GROUND");
196
+ }
197
+ }
198
+ function tokenize(input) {
199
+ return Array.from(tokenizer(input));
200
+ }
201
+
202
+ //#endregion
203
+ export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -0,0 +1,7 @@
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser } from "./parse-BirjVUvQ.js";
2
+
3
+ //#region src/tokenize.d.ts
4
+ declare function tokenizer(input: string): Generator<TOKEN>;
5
+ declare function tokenize(input: string): TOKEN[];
6
+ //#endregion
7
+ export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
package/dist/index.js ADDED
@@ -0,0 +1,3 @@
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-ClmKWMZx.js";
2
+
3
+ export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -0,0 +1,64 @@
1
+ //#region src/constants.d.ts
2
+ declare const BELL: string;
3
+ declare const ESC: string;
4
+ declare const BACKSLASH: string;
5
+ declare const DCS: string;
6
+ declare const SOS: string;
7
+ declare const CSI: string;
8
+ declare const ST: string;
9
+ declare const OSC: string;
10
+ declare const PM: string;
11
+ declare const APC: string;
12
+ declare const CSI_OPEN = "[";
13
+ declare const OSC_OPEN = "]";
14
+ declare const DEC_OPEN = "?";
15
+ declare const PRIVATE_OPENERS: Set<string>;
16
+ declare const DCS_OPEN = "P";
17
+ declare const APC_OPEN = "_";
18
+ declare const SOS_OPEN = "^";
19
+ declare const PM_OPEN = "X";
20
+ declare const STRING_OPENERS: Set<string>;
21
+ declare const TOKEN_TYPES: {
22
+ readonly TEXT: "TEXT";
23
+ readonly INTRODUCER: "INTRODUCER";
24
+ readonly DATA: "DATA";
25
+ readonly FINAL: "FINAL";
26
+ };
27
+ declare const CODE_TYPES: {
28
+ readonly CSI: "CSI";
29
+ readonly DCS: "DCS";
30
+ readonly DEC: "DEC";
31
+ readonly ESC: "ESC";
32
+ readonly OSC: "OSC";
33
+ readonly PRIVATE: "PRIVATE";
34
+ readonly SGR: "SGR";
35
+ readonly STRING: "STRING";
36
+ readonly TEXT: "TEXT";
37
+ };
38
+ //#endregion
39
+ //#region src/types.d.ts
40
+ type TOKEN = {
41
+ type: keyof typeof TOKEN_TYPES;
42
+ pos: number;
43
+ raw: string;
44
+ code?: string;
45
+ };
46
+ type CONTROL_CODE = {
47
+ type: "CSI" | "DCS" | "DEC" | "ESC" | "OSC" | "SGR" | "STRING" | "PRIVATE";
48
+ command: string;
49
+ raw: string;
50
+ params: string[];
51
+ pos: number;
52
+ };
53
+ type CONTROL_CODE_TEXT = {
54
+ type: "TEXT";
55
+ raw: string;
56
+ pos: number;
57
+ };
58
+ type CODE = CONTROL_CODE | CONTROL_CODE_TEXT;
59
+ //#endregion
60
+ //#region src/parse.d.ts
61
+ declare function parser(tokens: Generator<TOKEN>): Generator<CODE>;
62
+ declare function parse(input: string): CODE[];
63
+ //#endregion
64
+ export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser };