@ansi-tools/parser 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,18 +2,9 @@
2
2
 
3
3
  Parser for ANSI escape sequences.
4
4
 
5
- ## Supported sequence types
6
-
7
- - **CSI** (Control Sequence Introducer): `\x1b[...`
8
- - **OSC** (Operating System Command): `\x1b]...`
9
- - **DCS** (Device Control String): `\x1bP...`
10
- - **ESC** (Escape): `\x1b...`
11
- - **DEC** (DEC Private Mode): `\x1b[?...`
12
- - **STRING** (APC/PM/SOS): `\x1b_...`, `\x1b^...`, `\x1bX...`
13
- - **PRIVATE** (Private sequences): `\x1b[<...`, `\x1b[=...`, `\x1b[>...`
14
-
15
5
  ## Features
16
6
 
7
+ - ✅ Supports CSI, OSC, DCS, ESC, APC, SOS, PM, etc.
17
8
  - ✅ Handles 7-bit (`\x1b` or `\u001b`) and 8-bit (`\u009b`) introducers
18
9
  - ✅ Handles octal (`\033`) and shorthand `\e` introducers (only escaped)
19
10
  - ✅ Multiple string terminators (`\x1b\\`, `\x07`)
@@ -44,8 +35,8 @@ There is a difference between escaped and unescaped input. Only with an escaped
44
35
  input string the raw input and the positions can be preserved in the tokens and
45
36
  control codes. See the example below for the default and the `/escaped` import.
46
37
 
47
- The default and unescaped tokenization is roughly ~30% faster. Use this default
48
- if you just need the control codes.
38
+ The default and unescaped tokenization is roughly ~30% faster. Use this if you
39
+ just need the control codes.
49
40
 
50
41
  ## Examples
51
42
 
package/dist/escaped.js CHANGED
@@ -1,7 +1,8 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-DX-Po36R.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-5y5izuPn.js";
2
2
 
3
3
  //#region src/tokenize.escaped.ts
4
4
  const CSI_ESCAPED = "\\u009b";
5
+ const ABANDONED = "ABANDONED";
5
6
  const INTRODUCERS = [
6
7
  ["\\u001b", 6],
7
8
  [CSI_ESCAPED, 6],
@@ -102,26 +103,28 @@ function* tokenizer(input) {
102
103
  });
103
104
  i += len + 1;
104
105
  setState("SEQUENCE", next);
105
- } else if (next && next.charCodeAt(0) >= 32 && next.charCodeAt(0) <= 47) {
106
- yield emit({
107
- type: TOKEN_TYPES.INTRODUCER,
108
- pos: i,
109
- raw: seq + next,
110
- code: ESC,
111
- intermediate: next
112
- });
113
- i += len + 1;
114
- setState("SEQUENCE", ESC);
115
106
  } else if (next) {
116
- yield emit({
117
- type: TOKEN_TYPES.INTRODUCER,
118
- pos: i,
119
- raw: seq,
120
- code: ESC
121
- });
122
- i += len;
123
- setState("SEQUENCE", ESC);
124
- }
107
+ let j = i + len;
108
+ while (j < input.length && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
109
+ if (j < input.length) {
110
+ const is = input.slice(i + len, j);
111
+ if (is) yield emit({
112
+ type: TOKEN_TYPES.INTRODUCER,
113
+ pos: i,
114
+ raw: seq + is,
115
+ code: ESC,
116
+ intermediate: is
117
+ });
118
+ else yield emit({
119
+ type: TOKEN_TYPES.INTRODUCER,
120
+ pos: i,
121
+ raw: seq,
122
+ code: ESC
123
+ });
124
+ i = j;
125
+ setState("SEQUENCE", ESC);
126
+ } else i = j;
127
+ } else i += len;
125
128
  }
126
129
  break;
127
130
  }
@@ -137,7 +140,7 @@ function* tokenizer(input) {
137
140
  const char = input[i];
138
141
  if (code === CSI) {
139
142
  const charCode = input.charCodeAt(i);
140
- if (charCode >= 64 && charCode < 126) {
143
+ if (charCode >= 64 && charCode <= 126) {
141
144
  terminator = char;
142
145
  terminatorPos = i;
143
146
  i++;
@@ -172,6 +175,28 @@ function* tokenizer(input) {
172
175
  }
173
176
  }
174
177
  }
178
+ if (!terminator && char === BACKSLASH) {
179
+ const nextChar = input[i + 1];
180
+ if (nextChar) {
181
+ const candidates = INTRODUCER_LOOKUP.get(nextChar);
182
+ if (candidates) {
183
+ for (const [seq, len] of candidates) {
184
+ if (i + len > input.length) continue;
185
+ let matches = true;
186
+ for (let j = 0; j < len; j++) if (input[i + j] !== seq[j]) {
187
+ matches = false;
188
+ break;
189
+ }
190
+ if (matches) {
191
+ terminator = ABANDONED;
192
+ terminatorPos = i;
193
+ break;
194
+ }
195
+ }
196
+ if (terminator === ABANDONED) break;
197
+ }
198
+ }
199
+ }
175
200
  if (!terminator) i++;
176
201
  }
177
202
  if (terminatorPos > pos) {
@@ -182,7 +207,7 @@ function* tokenizer(input) {
182
207
  raw: data
183
208
  });
184
209
  }
185
- if (terminator) yield emit({
210
+ if (terminator && terminator !== ABANDONED) yield emit({
186
211
  type: TOKEN_TYPES.FINAL,
187
212
  pos: terminatorPos,
188
213
  raw: terminator
package/dist/index.js CHANGED
@@ -1,3 +1,3 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-DX-Po36R.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-5y5izuPn.js";
2
2
 
3
3
  export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -350,26 +350,28 @@ function* tokenizer(input) {
350
350
  });
351
351
  i += 2;
352
352
  setState("SEQUENCE", next);
353
- } else if (next && next.charCodeAt(0) >= 32 && next.charCodeAt(0) <= 47) {
354
- yield emit$1({
355
- type: TOKEN_TYPES.INTRODUCER,
356
- pos: i,
357
- raw: char + next,
358
- code: ESC,
359
- intermediate: next
360
- });
361
- i += 2;
362
- setState("SEQUENCE", ESC);
363
353
  } else if (next) {
364
- yield emit$1({
365
- type: TOKEN_TYPES.INTRODUCER,
366
- pos: i,
367
- raw: char,
368
- code: ESC
369
- });
370
- i++;
371
- setState("SEQUENCE", ESC);
372
- }
354
+ let j = i + 1;
355
+ while (j < input.length && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
356
+ if (j < input.length) {
357
+ const is = input.slice(i + 1, j);
358
+ if (is) yield emit$1({
359
+ type: TOKEN_TYPES.INTRODUCER,
360
+ pos: i,
361
+ raw: char + is,
362
+ code: ESC,
363
+ intermediate: is
364
+ });
365
+ else yield emit$1({
366
+ type: TOKEN_TYPES.INTRODUCER,
367
+ pos: i,
368
+ raw: char,
369
+ code: ESC
370
+ });
371
+ i = j;
372
+ setState("SEQUENCE", ESC);
373
+ } else i = j;
374
+ } else i++;
373
375
  }
374
376
  }
375
377
  } else if (state === "SEQUENCE") {
@@ -379,7 +381,7 @@ function* tokenizer(input) {
379
381
  if (code === CSI) while (i < input.length) {
380
382
  const char = input[i];
381
383
  const charCode = char.charCodeAt(0);
382
- if (charCode >= 64 && charCode < 126) {
384
+ if (charCode >= 64 && charCode <= 126) {
383
385
  if (data) yield emit$1({
384
386
  type: TOKEN_TYPES.DATA,
385
387
  pos,
@@ -426,6 +428,14 @@ function* tokenizer(input) {
426
428
  i += terminator.length;
427
429
  break;
428
430
  }
431
+ if (char === ESC) {
432
+ if (data) yield emit$1({
433
+ type: TOKEN_TYPES.DATA,
434
+ pos,
435
+ raw: data
436
+ });
437
+ break;
438
+ }
429
439
  data += char;
430
440
  i++;
431
441
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ansi-tools/parser",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "Tokenize and parse strings containing ANSI escape sequences and control codes",
5
5
  "main": "./dist/index.js",
6
6
  "type": "module",
@@ -22,6 +22,7 @@
22
22
  "scripts": {
23
23
  "prebuild": "pnpm type-check && pnpm test",
24
24
  "build": "tsdown --dts src/index.ts src/escaped.ts",
25
+ "dev": "tsdown --dts src/index.ts src/escaped.ts --watch",
25
26
  "test": "node --test",
26
27
  "type-check": "tsc",
27
28
  "prepack": "pnpm build"