@ansi-tools/parser 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/escaped.js CHANGED
@@ -1,4 +1,4 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-BrF7Yirl.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parser } from "./parse-Dtk-XHF4.js";
2
2
 
3
3
  //#region src/tokenize.escaped.ts
4
4
  const CSI_ESCAPED = "\\u009b";
@@ -11,28 +11,18 @@ const INTRODUCERS = [
11
11
  ["\\e", 2]
12
12
  ];
13
13
  const INTRODUCER_LOOKUP = /* @__PURE__ */ new Map();
14
+ const INTRODUCER_FIRST_CHAR_CACHE = /* @__PURE__ */ new Map();
14
15
  for (const [sequence, len] of INTRODUCERS) {
15
16
  const secondChar = sequence[1];
16
17
  if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
17
18
  INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
19
+ INTRODUCER_FIRST_CHAR_CACHE.set(sequence, true);
18
20
  }
19
- const STRING_TERMINATORS = new Map([
20
- ["\\x9c", 4],
21
- ["\\e\\\\", 4],
22
- ["\\x1b\\\\", 8]
23
- ]);
24
- const OSC_ONLY_TERMINATORS = new Map([
25
- ["\\a", 2],
26
- ["\\x07", 4],
27
- ["\\u0007", 6]
28
- ]);
29
- const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
30
- const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
31
- const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map((entry) => entry[0][1]));
32
21
  function emit(token) {
33
22
  return token;
34
23
  }
35
24
  function* tokenizer(input) {
25
+ const l = input.length;
36
26
  let i = 0;
37
27
  let state = "GROUND";
38
28
  let currentCode;
@@ -40,16 +30,29 @@ function* tokenizer(input) {
40
30
  state = next;
41
31
  currentCode = code;
42
32
  }
43
- while (i < input.length) if (state === "GROUND") {
33
+ while (i < l) if (state === "GROUND") {
44
34
  const textStart = i;
45
- while (i < input.length) {
35
+ while (i < l) {
46
36
  const backslashIndex = input.indexOf(BACKSLASH, i);
47
37
  if (backslashIndex === -1) {
48
- i = input.length;
38
+ i = l;
49
39
  break;
50
40
  }
51
- const nextChar = input[backslashIndex + 1];
52
- if (nextChar && INTRODUCER_PEEK_AHEAD.has(nextChar)) {
41
+ let isIntroducer = false;
42
+ const candidates = INTRODUCER_LOOKUP.get(input[backslashIndex + 1]);
43
+ if (candidates) for (const [seq, len] of candidates) {
44
+ if (backslashIndex + len > l) continue;
45
+ let matched = true;
46
+ for (let k = 0; k < len && matched; k += 2) {
47
+ matched = input[backslashIndex + k] === seq[k];
48
+ if (matched && k + 1 < len) matched = input[backslashIndex + k + 1] === seq[k + 1];
49
+ }
50
+ if (matched) {
51
+ isIntroducer = true;
52
+ break;
53
+ }
54
+ }
55
+ if (isIntroducer) {
53
56
  i = backslashIndex;
54
57
  break;
55
58
  } else i = backslashIndex + 1;
@@ -59,76 +62,84 @@ function* tokenizer(input) {
59
62
  pos: textStart,
60
63
  raw: input.substring(textStart, i)
61
64
  });
62
- if (i < input.length) {
65
+ if (i < l) {
63
66
  const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
64
67
  if (candidates) {
65
- let matched = false;
66
- for (const [seq, len] of candidates) if (i + len <= input.length && input.substring(i, i + len) === seq) {
67
- matched = true;
68
- if (seq === CSI_ESCAPED) {
69
- yield emit({
70
- type: TOKEN_TYPES.INTRODUCER,
71
- pos: i,
72
- raw: seq,
73
- code: CSI
74
- });
75
- i += len;
76
- setState("SEQUENCE", CSI);
77
- } else {
78
- const next = input[i + len];
79
- if (next === CSI_OPEN) {
68
+ let isMatch = false;
69
+ for (const [seq, len] of candidates) {
70
+ if (i + len > l) continue;
71
+ let isSeqMatch = true;
72
+ for (let k = 0; k < len && isSeqMatch; k += 2) {
73
+ isSeqMatch = input[i + k] === seq[k];
74
+ if (isSeqMatch && k + 1 < len) isSeqMatch = input[i + k + 1] === seq[k + 1];
75
+ }
76
+ if (isSeqMatch) {
77
+ isMatch = true;
78
+ if (seq === CSI_ESCAPED) {
80
79
  yield emit({
81
80
  type: TOKEN_TYPES.INTRODUCER,
82
81
  pos: i,
83
- raw: seq + next,
82
+ raw: seq,
84
83
  code: CSI
85
84
  });
86
- i += len + 1;
85
+ i += len;
87
86
  setState("SEQUENCE", CSI);
88
- } else if (next === OSC_OPEN) {
89
- yield emit({
90
- type: TOKEN_TYPES.INTRODUCER,
91
- pos: i,
92
- raw: seq + next,
93
- code: OSC
94
- });
95
- i += len + 1;
96
- setState("SEQUENCE", OSC);
97
- } else if (STRING_OPENERS.has(next)) {
98
- yield emit({
99
- type: TOKEN_TYPES.INTRODUCER,
100
- pos: i,
101
- raw: seq + next,
102
- code: next
103
- });
104
- i += len + 1;
105
- setState("SEQUENCE", next);
106
- } else if (next) {
107
- let j = i + len;
108
- while (j < input.length && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
109
- if (j < input.length) {
110
- const is = input.slice(i + len, j);
111
- if (is) yield emit({
87
+ } else {
88
+ const next = input[i + len];
89
+ if (next === CSI_OPEN) {
90
+ yield emit({
112
91
  type: TOKEN_TYPES.INTRODUCER,
113
92
  pos: i,
114
- raw: seq + is,
115
- code: ESC,
116
- intermediate: is
93
+ raw: seq + next,
94
+ code: CSI
117
95
  });
118
- else yield emit({
96
+ i += len + 1;
97
+ setState("SEQUENCE", CSI);
98
+ } else if (next === OSC_OPEN) {
99
+ yield emit({
119
100
  type: TOKEN_TYPES.INTRODUCER,
120
101
  pos: i,
121
- raw: seq,
122
- code: ESC
102
+ raw: seq + next,
103
+ code: OSC
123
104
  });
124
- i = j;
125
- setState("SEQUENCE", ESC);
126
- } else i = j;
127
- } else i += len;
105
+ i += len + 1;
106
+ setState("SEQUENCE", OSC);
107
+ } else if (STRING_OPENERS.has(next)) {
108
+ yield emit({
109
+ type: TOKEN_TYPES.INTRODUCER,
110
+ pos: i,
111
+ raw: seq + next,
112
+ code: next
113
+ });
114
+ i += len + 1;
115
+ setState("SEQUENCE", next);
116
+ } else if (next) {
117
+ let j = i + len;
118
+ while (j < l && input.charCodeAt(j) >= 32 && input.charCodeAt(j) <= 47) j++;
119
+ if (j < l) {
120
+ const is = input.slice(i + len, j);
121
+ if (is) yield emit({
122
+ type: TOKEN_TYPES.INTRODUCER,
123
+ pos: i,
124
+ raw: seq + is,
125
+ code: ESC,
126
+ intermediate: is
127
+ });
128
+ else yield emit({
129
+ type: TOKEN_TYPES.INTRODUCER,
130
+ pos: i,
131
+ raw: seq,
132
+ code: ESC
133
+ });
134
+ i = j;
135
+ setState("SEQUENCE", ESC);
136
+ } else i = j;
137
+ } else i += len;
138
+ }
139
+ break;
128
140
  }
129
- break;
130
141
  }
131
- if (!matched) i++;
142
+ if (!isMatch) i++;
132
143
  } else i++;
133
144
  }
134
145
  } else if (state === "SEQUENCE") {
@@ -136,66 +147,79 @@ function* tokenizer(input) {
136
147
  let terminatorPos = -1;
137
148
  const pos = i;
138
149
  const code = currentCode;
139
- while (!terminator && i < input.length) {
150
+ while (!terminator && i < l) {
140
151
  const char = input[i];
141
- if (code === CSI) {
142
- const charCode = input.charCodeAt(i);
143
- if (charCode >= 64 && charCode <= 126) {
144
- terminator = char;
145
- terminatorPos = i;
146
- i++;
147
- }
148
- } else if (code === ESC) {
149
- terminator = char;
150
- terminatorPos = i;
151
- i++;
152
- } else if (code) {
153
- if (char === BACKSLASH) {
154
- if (code === OSC) {
155
- for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
156
- const sequence = input.substring(i, i + len);
157
- if (OSC_ONLY_TERMINATORS.has(sequence)) {
158
- terminator = sequence;
159
- terminatorPos = i;
160
- i += len;
161
- break;
162
- }
152
+ if (char === BACKSLASH) {
153
+ if (code !== CSI && code !== ESC) {
154
+ const next = input[i + 1];
155
+ if (next === "a" && i + 2 <= l) {
156
+ if (code === OSC && input[i + 1] === "a") {
157
+ terminator = "\\a";
158
+ terminatorPos = i;
159
+ i += 2;
163
160
  }
164
- }
165
- if (!terminator) {
166
- for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
167
- const sequence = input.substring(i, i + len);
168
- if (STRING_TERMINATORS.has(sequence)) {
169
- terminator = sequence;
161
+ } else if (next === "x") {
162
+ if (i + 4 <= l) {
163
+ const char3 = input[i + 2];
164
+ const char4 = input[i + 3];
165
+ if (char3 === "0" && char4 === "7" && code === OSC) {
166
+ terminator = "\\x07";
170
167
  terminatorPos = i;
171
- i += len;
172
- break;
168
+ i += 4;
169
+ } else if (char3 === "9" && char4 === "c") {
170
+ terminator = "\\x9c";
171
+ terminatorPos = i;
172
+ i += 4;
173
+ } else if (char3 === "1" && char4 === "b" && i + 6 <= l && input[i + 4] === BACKSLASH && input[i + 5] === BACKSLASH) {
174
+ terminator = "\\x1b\\\\";
175
+ terminatorPos = i;
176
+ i += 6;
173
177
  }
174
178
  }
179
+ } else if (next === "u" && code === OSC && i + 6 <= l) {
180
+ if (input[i + 2] === "0" && input[i + 3] === "0" && input[i + 4] === "0" && input[i + 5] === "7") {
181
+ terminator = "\\u0007";
182
+ terminatorPos = i;
183
+ i += 6;
184
+ }
185
+ } else if (next === "e" && i + 4 <= l) {
186
+ if (input[i + 2] === BACKSLASH && input[i + 3] === BACKSLASH) {
187
+ terminator = "\\e\\\\";
188
+ terminatorPos = i;
189
+ i += 4;
190
+ }
175
191
  }
176
192
  }
177
- }
178
- if (!terminator && char === BACKSLASH) {
179
- const nextChar = input[i + 1];
180
- if (nextChar) {
181
- const candidates = INTRODUCER_LOOKUP.get(nextChar);
182
- if (candidates) {
183
- for (const [seq, len] of candidates) {
184
- if (i + len > input.length) continue;
185
- let matches = true;
186
- for (let j = 0; j < len; j++) if (input[i + j] !== seq[j]) {
187
- matches = false;
188
- break;
193
+ if (!terminator) {
194
+ const next = input[i + 1];
195
+ if (next) {
196
+ const candidates = INTRODUCER_LOOKUP.get(next);
197
+ if (candidates) for (const [seq, len] of candidates) {
198
+ if (i + len > l) continue;
199
+ let matched = true;
200
+ for (let k = 0; k < len && matched; k += 2) {
201
+ matched = input[i + k] === seq[k];
202
+ if (matched && k + 1 < len) matched = input[i + k + 1] === seq[k + 1];
189
203
  }
190
- if (matches) {
204
+ if (matched) {
191
205
  terminator = ABANDONED;
192
206
  terminatorPos = i;
193
207
  break;
194
208
  }
195
209
  }
196
- if (terminator === ABANDONED) break;
197
210
  }
198
211
  }
212
+ } else if (code === CSI) {
213
+ const charCode = input.charCodeAt(i);
214
+ if (charCode >= 64 && charCode <= 126) {
215
+ terminator = char;
216
+ terminatorPos = i;
217
+ i++;
218
+ }
219
+ } else if (code === ESC) {
220
+ terminator = char;
221
+ terminatorPos = i;
222
+ i++;
199
223
  }
200
224
  if (!terminator) i++;
201
225
  }
package/dist/index.js CHANGED
@@ -1,3 +1,3 @@
1
- import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-BrF7Yirl.js";
1
+ import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-Dtk-XHF4.js";
2
2
 
3
3
  export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };
@@ -380,6 +380,15 @@ function* tokenizer(input) {
380
380
  let data = "";
381
381
  if (code === CSI) while (i < input.length) {
382
382
  const char = input[i];
383
+ if (INTRODUCERS.has(char)) {
384
+ if (data) yield emit$1({
385
+ type: TOKEN_TYPES.DATA,
386
+ pos,
387
+ raw: data
388
+ });
389
+ setState("GROUND");
390
+ break;
391
+ }
383
392
  const charCode = char.charCodeAt(0);
384
393
  if (charCode >= 64 && charCode <= 126) {
385
394
  if (data) yield emit$1({
@@ -393,6 +402,7 @@ function* tokenizer(input) {
393
402
  raw: char
394
403
  });
395
404
  i++;
405
+ setState("GROUND");
396
406
  break;
397
407
  }
398
408
  data += char;
@@ -401,19 +411,23 @@ function* tokenizer(input) {
401
411
  else if (code === ESC) {
402
412
  if (i < input.length) {
403
413
  const char = input[i];
404
- yield emit$1({
405
- type: TOKEN_TYPES.FINAL,
406
- pos: i,
407
- raw: char
408
- });
409
- i++;
414
+ if (INTRODUCERS.has(char)) setState("GROUND");
415
+ else {
416
+ yield emit$1({
417
+ type: TOKEN_TYPES.FINAL,
418
+ pos: i,
419
+ raw: char
420
+ });
421
+ i++;
422
+ setState("GROUND");
423
+ }
410
424
  }
411
425
  } else if (code) while (i < input.length) {
412
426
  const char = input[i];
413
427
  let terminator;
414
- if (char === ST) terminator = ST;
428
+ if (char === ESC && input[i + 1] === BACKSLASH) terminator = ESC + BACKSLASH;
429
+ else if (char === ST) terminator = ST;
415
430
  else if (char === BELL && code === OSC) terminator = BELL;
416
- else if (char === ESC && input[i + 1] === BACKSLASH) terminator = ESC + BACKSLASH;
417
431
  if (terminator) {
418
432
  if (data) yield emit$1({
419
433
  type: TOKEN_TYPES.DATA,
@@ -426,20 +440,22 @@ function* tokenizer(input) {
426
440
  raw: terminator
427
441
  });
428
442
  i += terminator.length;
443
+ setState("GROUND");
429
444
  break;
430
445
  }
431
- if (char === ESC) {
446
+ if (INTRODUCERS.has(char)) {
432
447
  if (data) yield emit$1({
433
448
  type: TOKEN_TYPES.DATA,
434
449
  pos,
435
450
  raw: data
436
451
  });
452
+ setState("GROUND");
437
453
  break;
438
454
  }
439
455
  data += char;
440
456
  i++;
441
457
  }
442
- setState("GROUND");
458
+ if (state === "SEQUENCE") setState("GROUND");
443
459
  }
444
460
  }
445
461
  function tokenize(input) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ansi-tools/parser",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "Tokenize and parse strings containing ANSI escape sequences and control codes",
5
5
  "main": "./dist/index.js",
6
6
  "type": "module",