@ansi-tools/parser 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/escaped.js CHANGED
@@ -1,4 +1,4 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-BrF7Yirl.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-Dtk-XHF4.js";
2
2
 
3
3
  //#region src/tokenize.escaped.ts
4
4
  const CSI_ESCAPED = "\\u009b";
@@ -11,28 +11,19 @@ const INTRODUCERS = [
11
11
  ["\\e", 2]
12
12
  ];
13
13
  const INTRODUCER_LOOKUP = /* @__PURE__ */ new Map();
14
+ const INTRODUCER_FIRST_CHAR_CACHE = /* @__PURE__ */ new Map();
14
15
  for (const [sequence, len] of INTRODUCERS) {
15
16
  const secondChar = sequence[1];
16
17
  if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
17
18
  INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
19
+ INTRODUCER_FIRST_CHAR_CACHE.set(sequence, true);
18
20
  }
19
- const STRING_TERMINATORS = new Map([
20
- ["\\x9c", 4],
21
- ["\\e\\\\", 4],
22
- ["\\x1b\\\\", 8]
23
- ]);
24
- const OSC_ONLY_TERMINATORS = new Map([
25
- ["\\a", 2],
26
- ["\\x07", 4],
27
- ["\\u0007", 6]
28
- ]);
29
- const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
30
- const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
31
21
  const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map((entry) => entry[0][1]));
32
22
  function emit(token) {
33
23
  return token;
34
24
  }
35
25
  function* tokenizer(input) {
26
+ const l = input.length;
36
27
  let i = 0;
37
28
  let state = "GROUND";
38
29
  let currentCode;
@@ -40,12 +31,12 @@ function* tokenizer(input) {
40
31
  state = next;
41
32
  currentCode = code;
42
33
  }
43
- while (i < input.length) if (state === "GROUND") {
34
+ while (i < l) if (state === "GROUND") {
44
35
  const textStart = i;
45
- while (i < input.length) {
36
+ while (i < l) {
46
37
  const backslashIndex = input.indexOf(BACKSLASH, i);
47
38
  if (backslashIndex === -1) {
48
- i = input.length;
39
+ i = l;
49
40
  break;
50
41
  }
51
42
  const nextChar = input[backslashIndex + 1];
@@ -59,74 +50,82 @@ function* tokenizer(input) {
59
50
  pos: textStart,
60
51
  raw: input.substring(textStart, i)
61
52
  });
62
- if (i < input.length) {
53
+ if (i < l) {
63
54
  const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
64
55
  if (candidates) {
65
56
  let matched = false;
66
- for (const [seq, len] of candidates) if (i + len <= input.length && input.substring(i, i + len) === seq) {
67
- matched = true;
68
- if (seq === CSI_ESCAPED) {
69
- yield emit({
70
- type: TOKEN_TYPES.INTRODUCER,
71
- pos: i,
72
- raw: seq,
73
- code: CSI
74
- });
75
- i += len;
76
- setState("SEQUENCE", CSI);
77
- } else {
78
- const next = input[i + len];
79
- if (next === CSI_OPEN) {
57
+ for (const [seq, len] of candidates) {
58
+ if (i + len > l) continue;
59
+ let seqMatched = true;
60
+ for (let k = 0; k < len && seqMatched; k += 2) {
61
+ seqMatched = input[i + k] === seq[k];
62
+ if (seqMatched && k + 1 < len) seqMatched = input[i + k + 1] === seq[k + 1];
63
+ }
64
+ if (seqMatched) {
65
+ matched = true;
66
+ if (seq === CSI_ESCAPED) {
80
67
  yield emit({
81
68
  type: TOKEN_TYPES.INTRODUCER,
82
69
  pos: i,
83
- raw: seq + next,
70
+ raw: seq,
84
71
  code: CSI
85
72
  });
86
- i += len + 1;
73
+ i += len;
87
74
  setState("SEQUENCE", CSI);
88
- } else if (next === OSC_OPEN) {
89
- yield emit({
90
- type: TOKEN_TYPES.INTRODUCER,
91
- pos: i,
92
- raw: seq + next,
93
- code: OSC
94
- });
95
- i += len + 1;
96
- setState("SEQUENCE", OSC);
97
- } else if (STRING_OPENERS.has(next)) {
98
- yield emit({
99
- type: TOKEN_TYPES.INTRODUCER,
100
- pos: i,
101
- raw: seq + next,
102
- code: next
103
- });
104
- i += len + 1;
105
- setState("SEQUENCE", next);
106
- } else if (next) {
107
- let j = i + len;
108
- while (j < input.length && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
109
- if (j < input.length) {
110
- const is = input.slice(i + len, j);
111
- if (is) yield emit({
75
+ } else {
76
+ const next = input[i + len];
77
+ if (next === CSI_OPEN) {
78
+ yield emit({
79
+ type: TOKEN_TYPES.INTRODUCER,
80
+ pos: i,
81
+ raw: seq + next,
82
+ code: CSI
83
+ });
84
+ i += len + 1;
85
+ setState("SEQUENCE", CSI);
86
+ } else if (next === OSC_OPEN) {
87
+ yield emit({
112
88
  type: TOKEN_TYPES.INTRODUCER,
113
89
  pos: i,
114
- raw: seq + is,
115
- code: ESC,
116
- intermediate: is
90
+ raw: seq + next,
91
+ code: OSC
117
92
  });
118
- else yield emit({
93
+ i += len + 1;
94
+ setState("SEQUENCE", OSC);
95
+ } else if (STRING_OPENERS.has(next)) {
96
+ yield emit({
119
97
  type: TOKEN_TYPES.INTRODUCER,
120
98
  pos: i,
121
- raw: seq,
122
- code: ESC
99
+ raw: seq + next,
100
+ code: next
123
101
  });
124
- i = j;
125
- setState("SEQUENCE", ESC);
126
- } else i = j;
127
- } else i += len;
102
+ i += len + 1;
103
+ setState("SEQUENCE", next);
104
+ } else if (next) {
105
+ let j = i + len;
106
+ while (j < l && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
107
+ if (j < l) {
108
+ const is = input.slice(i + len, j);
109
+ if (is) yield emit({
110
+ type: TOKEN_TYPES.INTRODUCER,
111
+ pos: i,
112
+ raw: seq + is,
113
+ code: ESC,
114
+ intermediate: is
115
+ });
116
+ else yield emit({
117
+ type: TOKEN_TYPES.INTRODUCER,
118
+ pos: i,
119
+ raw: seq,
120
+ code: ESC
121
+ });
122
+ i = j;
123
+ setState("SEQUENCE", ESC);
124
+ } else i = j;
125
+ } else i += len;
126
+ }
127
+ break;
128
128
  }
129
- break;
130
129
  }
131
130
  if (!matched) i++;
132
131
  } else i++;
@@ -136,66 +135,79 @@ function* tokenizer(input) {
136
135
  let terminatorPos = -1;
137
136
  const pos = i;
138
137
  const code = currentCode;
139
- while (!terminator && i < input.length) {
138
+ while (!terminator && i < l) {
140
139
  const char = input[i];
141
- if (code === CSI) {
142
- const charCode = input.charCodeAt(i);
143
- if (charCode >= 64 && charCode <= 126) {
144
- terminator = char;
145
- terminatorPos = i;
146
- i++;
147
- }
148
- } else if (code === ESC) {
149
- terminator = char;
150
- terminatorPos = i;
151
- i++;
152
- } else if (code) {
153
- if (char === BACKSLASH) {
154
- if (code === OSC) {
155
- for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
156
- const sequence = input.substring(i, i + len);
157
- if (OSC_ONLY_TERMINATORS.has(sequence)) {
158
- terminator = sequence;
159
- terminatorPos = i;
160
- i += len;
161
- break;
162
- }
140
+ if (char === BACKSLASH) {
141
+ if (code !== CSI && code !== ESC) {
142
+ const next = input[i + 1];
143
+ if (next === "a" && i + 2 <= l) {
144
+ if (code === OSC && input[i + 1] === "a") {
145
+ terminator = "\\a";
146
+ terminatorPos = i;
147
+ i += 2;
163
148
  }
164
- }
165
- if (!terminator) {
166
- for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
167
- const sequence = input.substring(i, i + len);
168
- if (STRING_TERMINATORS.has(sequence)) {
169
- terminator = sequence;
149
+ } else if (next === "x") {
150
+ if (i + 4 <= l) {
151
+ const char3 = input[i + 2];
152
+ const char4 = input[i + 3];
153
+ if (char3 === "0" && char4 === "7" && code === OSC) {
154
+ terminator = "\\x07";
170
155
  terminatorPos = i;
171
- i += len;
172
- break;
156
+ i += 4;
157
+ } else if (char3 === "9" && char4 === "c") {
158
+ terminator = "\\x9c";
159
+ terminatorPos = i;
160
+ i += 4;
161
+ } else if (char3 === "1" && char4 === "b" && i + 6 <= l && input[i + 4] === BACKSLASH && input[i + 5] === BACKSLASH) {
162
+ terminator = "\\x1b\\\\";
163
+ terminatorPos = i;
164
+ i += 6;
173
165
  }
174
166
  }
167
+ } else if (next === "u" && code === OSC && i + 6 <= l) {
168
+ if (input[i + 2] === "0" && input[i + 3] === "0" && input[i + 4] === "0" && input[i + 5] === "7") {
169
+ terminator = "\\u0007";
170
+ terminatorPos = i;
171
+ i += 6;
172
+ }
173
+ } else if (next === "e" && i + 4 <= l) {
174
+ if (input[i + 2] === BACKSLASH && input[i + 3] === BACKSLASH) {
175
+ terminator = "\\e\\\\";
176
+ terminatorPos = i;
177
+ i += 4;
178
+ }
175
179
  }
176
180
  }
177
- }
178
- if (!terminator && char === BACKSLASH) {
179
- const nextChar = input[i + 1];
180
- if (nextChar) {
181
- const candidates = INTRODUCER_LOOKUP.get(nextChar);
182
- if (candidates) {
183
- for (const [seq, len] of candidates) {
184
- if (i + len > input.length) continue;
185
- let matches = true;
186
- for (let j = 0; j < len; j++) if (input[i + j] !== seq[j]) {
187
- matches = false;
188
- break;
181
+ if (!terminator) {
182
+ const next = input[i + 1];
183
+ if (next) {
184
+ const candidates = INTRODUCER_LOOKUP.get(next);
185
+ if (candidates) for (const [seq, len] of candidates) {
186
+ if (i + len > l) continue;
187
+ let matched = true;
188
+ for (let k = 0; k < len && matched; k += 2) {
189
+ matched = input[i + k] === seq[k];
190
+ if (matched && k + 1 < len) matched = input[i + k + 1] === seq[k + 1];
189
191
  }
190
- if (matches) {
192
+ if (matched) {
191
193
  terminator = ABANDONED;
192
194
  terminatorPos = i;
193
195
  break;
194
196
  }
195
197
  }
196
- if (terminator === ABANDONED) break;
197
198
  }
198
199
  }
200
+ } else if (code === CSI) {
201
+ const charCode = input.charCodeAt(i);
202
+ if (charCode >= 64 && charCode <= 126) {
203
+ terminator = char;
204
+ terminatorPos = i;
205
+ i++;
206
+ }
207
+ } else if (code === ESC) {
208
+ terminator = char;
209
+ terminatorPos = i;
210
+ i++;
199
211
  }
200
212
  if (!terminator) i++;
201
213
  }
package/dist/index.js CHANGED
@@ -1,3 +1,3 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-BrF7Yirl.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-Dtk-XHF4.js";
2
2
 
3
3
  export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -380,6 +380,15 @@ function* tokenizer(input) {
380
380
  let data = "";
381
381
  if (code === CSI) while (i < input.length) {
382
382
  const char = input[i];
383
+ if (INTRODUCERS.has(char)) {
384
+ if (data) yield emit$1({
385
+ type: TOKEN_TYPES.DATA,
386
+ pos,
387
+ raw: data
388
+ });
389
+ setState("GROUND");
390
+ break;
391
+ }
383
392
  const charCode = char.charCodeAt(0);
384
393
  if (charCode >= 64 && charCode <= 126) {
385
394
  if (data) yield emit$1({
@@ -393,6 +402,7 @@ function* tokenizer(input) {
393
402
  raw: char
394
403
  });
395
404
  i++;
405
+ setState("GROUND");
396
406
  break;
397
407
  }
398
408
  data += char;
@@ -401,19 +411,23 @@ function* tokenizer(input) {
401
411
  else if (code === ESC) {
402
412
  if (i < input.length) {
403
413
  const char = input[i];
404
- yield emit$1({
405
- type: TOKEN_TYPES.FINAL,
406
- pos: i,
407
- raw: char
408
- });
409
- i++;
414
+ if (INTRODUCERS.has(char)) setState("GROUND");
415
+ else {
416
+ yield emit$1({
417
+ type: TOKEN_TYPES.FINAL,
418
+ pos: i,
419
+ raw: char
420
+ });
421
+ i++;
422
+ setState("GROUND");
423
+ }
410
424
  }
411
425
  } else if (code) while (i < input.length) {
412
426
  const char = input[i];
413
427
  let terminator;
414
- if (char === ST) terminator = ST;
428
+ if (char === ESC && input[i + 1] === BACKSLASH) terminator = ESC + BACKSLASH;
429
+ else if (char === ST) terminator = ST;
415
430
  else if (char === BELL && code === OSC) terminator = BELL;
416
- else if (char === ESC && input[i + 1] === BACKSLASH) terminator = ESC + BACKSLASH;
417
431
  if (terminator) {
418
432
  if (data) yield emit$1({
419
433
  type: TOKEN_TYPES.DATA,
@@ -426,20 +440,22 @@ function* tokenizer(input) {
426
440
  raw: terminator
427
441
  });
428
442
  i += terminator.length;
443
+ setState("GROUND");
429
444
  break;
430
445
  }
431
- if (char === ESC) {
446
+ if (INTRODUCERS.has(char)) {
432
447
  if (data) yield emit$1({
433
448
  type: TOKEN_TYPES.DATA,
434
449
  pos,
435
450
  raw: data
436
451
  });
452
+ setState("GROUND");
437
453
  break;
438
454
  }
439
455
  data += char;
440
456
  i++;
441
457
  }
442
- setState("GROUND");
458
+ if (state === "SEQUENCE") setState("GROUND");
443
459
  }
444
460
  }
445
461
  function tokenize(input) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ansi-tools/parser",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "description": "Tokenize and parse strings containing ANSI escape sequences and control codes",
5
5
  "main": "./dist/index.js",
6
6
  "type": "module",