npm - @ansi-tools/parser - Versions diffs - 0.0.0 - Mend

@ansi-tools/parser 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +187 -0
package/dist/escaped.d.ts +7 -0
package/dist/escaped.js +203 -0
package/dist/index.d.ts +7 -0
package/dist/index.js +3 -0
package/dist/parse-BirjVUvQ.d.ts +64 -0
package/dist/parse-ClmKWMZx.js +485 -0
package/package.json +40 -0
package/src/constants.ts +40 -0
package/src/escaped.ts +4 -0
package/src/index.ts +4 -0
package/src/parse.escaped.test.ts +86 -0
package/src/parse.test.ts +86 -0
package/src/parse.ts +111 -0
package/src/parsers/csi.test.ts +55 -0
package/src/parsers/csi.ts +54 -0
package/src/parsers/dcs.test.ts +47 -0
package/src/parsers/dcs.ts +36 -0
package/src/parsers/dec.test.ts +24 -0
package/src/parsers/dec.ts +30 -0
package/src/parsers/esc.test.ts +19 -0
package/src/parsers/esc.ts +6 -0
package/src/parsers/osc.test.ts +36 -0
package/src/parsers/osc.ts +29 -0
package/src/tokenize.escaped.test.ts +410 -0
package/src/tokenize.escaped.ts +191 -0
package/src/tokenize.test.ts +118 -0
package/src/tokenize.ts +140 -0
package/src/types.ts +24 -0
package/tsconfig.json +16 -0

package/README.md ADDED Viewed

@@ -0,0 +1,187 @@
+# @ansi-tools/parser
+Parser for ANSI escape sequences.
+## Supported sequence types
+- **CSI** (Control Sequence Introducer): `\x1b[...`
+- **OSC** (Operating System Command): `\x1b]...`
+- **DCS** (Device Control String): `\x1bP...`
+- **ESC** (Escape): `\x1b...`
+- **DEC** (DEC Private Mode): `\x1b[?...`
+- **STRING** (APC/PM/SOS): `\x1b_...`, `\x1b^...`, `\x1bX...`
+- **PRIVATE** (Private sequences): `\x1b[<...`, `\x1b[=...`, `\x1b[>...`
+## Features
+- ✅ Handles 7-bit (`\x1b` or `\u001b`) and 8-bit (`\u009b`) introducers
+- ✅ Handles octal (`\033`) and shorthand `\e` introducers (only escaped)
+- ✅ Multiple string terminators (`\x1b\\`, `\x07`)
+- ✅ Zero dependencies
+- ✅ Separate optimized modules for raw and escaped input
+## Installation
+```bash
+npm install @ansi-tools/parser
+```
+## Usage
+```ts
+import { parse } from "@ansi-tools/parser";
+const input = "\x1b[31mHello\x1b[0m World";
+for (const code of parse(input)) {
+  console.log(code);
+}
+```
+There is a difference between escaped and unescaped input. Only with an escaped
+input string the raw input and the positions can be preserved in the tokens and
+control codes. See the example below for the default and the `/escaped` import.
+The default and unescaped tokenization is roughly ~30% faster. Use this default
+if you just need the control codes.
+## Examples
+### Default (raw/unescaped)
+```ts
+import { parse } from "@ansi-tools/parser";
+parse(`\x1b[31mHello\x1b[0m`);
+// result:
+[
+  {
+    type: "CSI",
+    pos: 0,
+    raw: "\u001b[31m",
+    command: "m",
+    params: ["31"],
+  },
+  {
+    type: "TEXT",
+    pos: 5,
+    raw: "Hello",
+  },
+  {
+    type: "CSI",
+    pos: 10,
+    raw: "\u001b[0m",
+    command: "m",
+    params: ["0"],
+  },
+];
+```
+### Escaped
+```ts
+import { parse } from "@ansi-tools/parser/escaped";
+parse(String.raw`\x1b[31mHello\x1b[0m`);
+// result:
+[
+  {
+    type: "CSI",
+    pos: 0,
+    raw: "\\x1b[31m",
+    command: "m",
+    params: ["31"],
+  },
+  {
+    type: "TEXT",
+    pos: 8,
+    raw: "Hello",
+  },
+  {
+    type: "CSI",
+    pos: 13,
+    raw: "\\x1b[0m",
+    command: "m",
+    params: ["0"],
+  },
+];
+```
+## Tokenizer & generators
+The tokenizer and generators are also available, for both the default and the
+`/escaped` versions.
+### tokenize
+```ts
+import { tokenize } from "@ansi-tools/parser";
+const input = "\x1b[31m";
+for (const token of tokenize(input)) {
+  console.log(token);
+}
+```
+### Generators
+```ts
+import { tokenizer, parser } from "@ansi-tools/parser";
+const input = "\x1b[31mHello\x1b[0m";
+const tokens = tokenizer(input);
+const codes = parser(tokens);
+for (const code of codes) {
+  console.log(code);
+}
+```
+## Type Definitions
+```ts
+function parse(input: string): CODE[];
+function tokenize(input: string): TOKEN[];
+function* parser(tokens: Generator<TOKEN>): Generator<CODE>;
+function* tokenizer(input: string): Generator<TOKEN>;
+```
+### CODE
+```ts
+type CONTROL_CODE = {
+  type: "CSI" | "DCS" | "DEC" | "ESC" | "OSC" | "SGR" | "STRING" | "PRIVATE";
+  command: string;
+  raw: string;
+  params: string[];
+  pos: number;
+};
+type CONTROL_CODE_TEXT = {
+  type: "TEXT";
+  raw: string;
+  pos: number;
+};
+type CODE = CONTROL_CODE | CONTROL_CODE_TEXT;
+```
+### TOKEN
+```ts
+type TOKEN = {
+  type: "INTRODUCER" | "DATA" | "FINAL" | "TEXT";
+  pos: number;
+  raw: string;
+  code?: string;
+};
+```
+## License
+ISC

package/dist/escaped.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser } from "./parse-BirjVUvQ.js";
+//#region src/tokenize.escaped.d.ts
+declare function tokenizer(input: string): Generator<TOKEN>;
+declare function tokenize(input: string): TOKEN[];
+//#endregion
+export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser, tokenize, tokenizer };

package/dist/escaped.js ADDED Viewed

@@ -0,0 +1,203 @@
+import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser } from "./parse-ClmKWMZx.js";
+//#region src/tokenize.escaped.ts
+const CSI_ESCAPED = "\\u009b";
+const INTRODUCERS = [
+	["\\u001b", 6],
+	[CSI_ESCAPED, 6],
+	["\\x1b", 4],
+	["\\033", 4],
+	["\\e", 2]
+];
+const INTRODUCER_LOOKUP = /* @__PURE__ */ new Map();
+for (const [sequence, len] of INTRODUCERS) {
+	const secondChar = sequence[1];
+	if (!INTRODUCER_LOOKUP.has(secondChar)) INTRODUCER_LOOKUP.set(secondChar, []);
+	INTRODUCER_LOOKUP.get(secondChar)?.push([sequence, len]);
+}
+const STRING_TERMINATORS = new Map([
+	["\\x9c", 4],
+	["\\e\\\\", 4],
+	["\\x1b\\\\", 8]
+]);
+const OSC_ONLY_TERMINATORS = new Map([
+	["\\a", 2],
+	["\\x07", 4],
+	["\\u0007", 6]
+]);
+const ST_MAX_LENGTH = Math.max(...STRING_TERMINATORS.values());
+const OSC_TERM_MAX_LENGTH = Math.max(...OSC_ONLY_TERMINATORS.values());
+const INTRODUCER_PEEK_AHEAD = new Set(INTRODUCERS.map((entry) => entry[0][1]));
+function emit(token) {
+	return token;
+}
+function* tokenizer(input) {
+	let i = 0;
+	let state = "GROUND";
+	let currentCode;
+	function setState(next, code) {
+		state = next;
+		currentCode = code;
+	}
+	while (i < input.length) if (state === "GROUND") {
+		const textStart = i;
+		while (i < input.length) {
+			const backslashIndex = input.indexOf(BACKSLASH, i);
+			if (backslashIndex === -1) {
+				i = input.length;
+				break;
+			}
+			const nextChar = input[backslashIndex + 1];
+			if (nextChar && INTRODUCER_PEEK_AHEAD.has(nextChar)) {
+				i = backslashIndex;
+				break;
+			} else i = backslashIndex + 1;
+		}
+		if (i > textStart) yield emit({
+			type: TOKEN_TYPES.TEXT,
+			pos: textStart,
+			raw: input.substring(textStart, i)
+		});
+		if (i < input.length) {
+			const candidates = INTRODUCER_LOOKUP.get(input[i + 1]);
+			if (candidates) {
+				for (const [sequence, len] of candidates) if (i + len <= input.length && input.substring(i, i + len) === sequence) {
+					if (sequence === CSI_ESCAPED) {
+						yield emit({
+							type: TOKEN_TYPES.INTRODUCER,
+							pos: i,
+							raw: sequence,
+							code: CSI
+						});
+						i += len;
+						setState("SEQUENCE", CSI);
+					} else {
+						const nextChar = input[i + len];
+						if (nextChar === CSI_OPEN) {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence + nextChar,
+								code: CSI
+							});
+							i += len + 1;
+							setState("SEQUENCE", CSI);
+						} else if (nextChar === OSC_OPEN) {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence + nextChar,
+								code: OSC
+							});
+							i += len + 1;
+							setState("SEQUENCE", OSC);
+						} else if (STRING_OPENERS.has(nextChar)) {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence + nextChar,
+								code: nextChar
+							});
+							i += len + 1;
+							setState("SEQUENCE", nextChar);
+						} else if (nextChar && nextChar.charCodeAt(0) >= 32 && nextChar.charCodeAt(0) <= 47) {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence + nextChar,
+								code: CSI
+							});
+							i += len + 1;
+							setState("SEQUENCE", CSI);
+						} else if (nextChar) {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence,
+								code: ESC
+							});
+							i += len;
+							yield emit({
+								type: TOKEN_TYPES.FINAL,
+								pos: i,
+								raw: nextChar
+							});
+							i++;
+						} else {
+							yield emit({
+								type: TOKEN_TYPES.INTRODUCER,
+								pos: i,
+								raw: sequence,
+								code: ESC
+							});
+							i += len;
+						}
+					}
+					break;
+				}
+			}
+		}
+	} else {
+		let terminator = "";
+		let terminatorPos = -1;
+		const pos = i;
+		const code = currentCode;
+		while (!terminator && i < input.length) {
+			const char = input[i];
+			if (code === CSI) {
+				const charCode = input.charCodeAt(i);
+				if (charCode >= 64 && charCode < 126) {
+					terminator = char;
+					terminatorPos = i;
+					i++;
+				}
+			} else if (code) {
+				if (char === BACKSLASH) {
+					if (code === OSC) {
+						for (let len = OSC_TERM_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
+							const sequence = input.substring(i, i + len);
+							if (OSC_ONLY_TERMINATORS.has(sequence)) {
+								terminator = sequence;
+								terminatorPos = i;
+								i += len;
+								break;
+							}
+						}
+					}
+					if (!terminator) {
+						for (let len = ST_MAX_LENGTH; len >= 2; len -= 2) if (i + len <= input.length) {
+							const sequence = input.substring(i, i + len);
+							if (STRING_TERMINATORS.has(sequence)) {
+								terminator = sequence;
+								terminatorPos = i;
+								i += len;
+								break;
+							}
+						}
+					}
+				}
+			}
+			if (!terminator) i++;
+		}
+		if (terminatorPos > pos) {
+			const data = input.substring(pos, terminatorPos);
+			yield emit({
+				type: TOKEN_TYPES.DATA,
+				pos,
+				raw: data
+			});
+		}
+		if (terminator) yield emit({
+			type: TOKEN_TYPES.FINAL,
+			pos: terminatorPos,
+			raw: terminator
+		});
+		setState("GROUND");
+	}
+}
+function tokenize(input) {
+	return Array.from(tokenizer(input));
+}
+//#endregion
+export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser } from "./parse-BirjVUvQ.js";
+//#region src/tokenize.d.ts
+declare function tokenizer(input: string): Generator<TOKEN>;
+declare function tokenize(input: string): TOKEN[];
+//#endregion
+export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser, tokenize, tokenizer };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,3 @@
+import { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer } from "./parse-ClmKWMZx.js";
+export { APC, APC_OPEN, BACKSLASH, BELL, CODE_TYPES, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN_TYPES, parse, parser, tokenize, tokenizer };

package/dist/parse-BirjVUvQ.d.ts ADDED Viewed

@@ -0,0 +1,64 @@
+//#region src/constants.d.ts
+declare const BELL: string;
+declare const ESC: string;
+declare const BACKSLASH: string;
+declare const DCS: string;
+declare const SOS: string;
+declare const CSI: string;
+declare const ST: string;
+declare const OSC: string;
+declare const PM: string;
+declare const APC: string;
+declare const CSI_OPEN = "[";
+declare const OSC_OPEN = "]";
+declare const DEC_OPEN = "?";
+declare const PRIVATE_OPENERS: Set<string>;
+declare const DCS_OPEN = "P";
+declare const APC_OPEN = "_";
+declare const SOS_OPEN = "^";
+declare const PM_OPEN = "X";
+declare const STRING_OPENERS: Set<string>;
+declare const TOKEN_TYPES: {
+  readonly TEXT: "TEXT";
+  readonly INTRODUCER: "INTRODUCER";
+  readonly DATA: "DATA";
+  readonly FINAL: "FINAL";
+};
+declare const CODE_TYPES: {
+  readonly CSI: "CSI";
+  readonly DCS: "DCS";
+  readonly DEC: "DEC";
+  readonly ESC: "ESC";
+  readonly OSC: "OSC";
+  readonly PRIVATE: "PRIVATE";
+  readonly SGR: "SGR";
+  readonly STRING: "STRING";
+  readonly TEXT: "TEXT";
+};
+//#endregion
+//#region src/types.d.ts
+type TOKEN = {
+  type: keyof typeof TOKEN_TYPES;
+  pos: number;
+  raw: string;
+  code?: string;
+};
+type CONTROL_CODE = {
+  type: "CSI" | "DCS" | "DEC" | "ESC" | "OSC" | "SGR" | "STRING" | "PRIVATE";
+  command: string;
+  raw: string;
+  params: string[];
+  pos: number;
+};
+type CONTROL_CODE_TEXT = {
+  type: "TEXT";
+  raw: string;
+  pos: number;
+};
+type CODE = CONTROL_CODE | CONTROL_CODE_TEXT;
+//#endregion
+//#region src/parse.d.ts
+declare function parser(tokens: Generator<TOKEN>): Generator<CODE>;
+declare function parse(input: string): CODE[];
+//#endregion
+export { APC, APC_OPEN, BACKSLASH, BELL, CODE, CODE_TYPES, CONTROL_CODE, CONTROL_CODE_TEXT, CSI, CSI_OPEN, DCS, DCS_OPEN, DEC_OPEN, ESC, OSC, OSC_OPEN, PM, PM_OPEN, PRIVATE_OPENERS, SOS, SOS_OPEN, ST, STRING_OPENERS, TOKEN, TOKEN_TYPES, parse, parser };