@alcalzone/ansi-tokenize 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/tokenize.js +22 -9
- package/build/tokenize.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -231,6 +231,10 @@ This automatically figures out the least amount of escape codes necessary to ach
|
|
|
231
231
|
Placeholder for next release:
|
|
232
232
|
### __WORK IN PROGRESS__
|
|
233
233
|
-->
|
|
234
|
+
### 0.2.5 (2026-02-11)
|
|
235
|
+
|
|
236
|
+
- Fix: preserve grapheme clusters when tokenizing (#51)
|
|
237
|
+
|
|
234
238
|
### 0.2.4 (2026-01-29)
|
|
235
239
|
|
|
236
240
|
- Fix: Support hyperlinks with parameters (#45)
|
package/build/tokenize.js
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
import isFullwidthCodePoint from "is-fullwidth-code-point";
|
|
2
2
|
import { CSI, ESCAPES, getEndCode, linkCodePrefix, linkCodePrefixCharCodes, OSC, } from "./ansiCodes.js";
|
|
3
|
+
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
|
|
4
|
+
function isFullwidthGrapheme(grapheme, baseCodePoint) {
|
|
5
|
+
if (isFullwidthCodePoint(baseCodePoint))
|
|
6
|
+
return true;
|
|
7
|
+
// Variation Selector 16 forces emoji presentation (2 columns wide)
|
|
8
|
+
if (grapheme.includes("\uFE0F"))
|
|
9
|
+
return true;
|
|
10
|
+
// Regional indicator pairs form flag emoji (2 columns wide)
|
|
11
|
+
if (baseCodePoint >= 0x1f1e6 && baseCodePoint <= 0x1f1ff)
|
|
12
|
+
return true;
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
3
15
|
// HOT PATH: Use only basic string/char code operations for maximum performance
|
|
4
16
|
function parseLinkCode(string, offset) {
|
|
5
17
|
string = string.slice(offset);
|
|
@@ -88,10 +100,13 @@ function splitCompoundSGRSequences(code) {
|
|
|
88
100
|
}
|
|
89
101
|
export function tokenize(str, endChar = Number.POSITIVE_INFINITY) {
|
|
90
102
|
const ret = [];
|
|
91
|
-
let index = 0;
|
|
92
103
|
let visible = 0;
|
|
93
|
-
|
|
94
|
-
|
|
104
|
+
let codeEndIndex = 0;
|
|
105
|
+
for (const { segment, index } of segmenter.segment(str)) {
|
|
106
|
+
// Skip segments consumed as part of an ANSI sequence
|
|
107
|
+
if (index < codeEndIndex)
|
|
108
|
+
continue;
|
|
109
|
+
const codePoint = segment.codePointAt(0);
|
|
95
110
|
if (ESCAPES.has(codePoint)) {
|
|
96
111
|
let code;
|
|
97
112
|
// Peek the next code point to determine the type of ANSI sequence
|
|
@@ -123,19 +138,17 @@ export function tokenize(str, endChar = Number.POSITIVE_INFINITY) {
|
|
|
123
138
|
}
|
|
124
139
|
}
|
|
125
140
|
if (code) {
|
|
126
|
-
index
|
|
141
|
+
codeEndIndex = index + code.length;
|
|
127
142
|
continue;
|
|
128
143
|
}
|
|
129
144
|
}
|
|
130
|
-
const fullWidth =
|
|
131
|
-
const character = String.fromCodePoint(codePoint);
|
|
145
|
+
const fullWidth = isFullwidthGrapheme(segment, codePoint);
|
|
132
146
|
ret.push({
|
|
133
147
|
type: "char",
|
|
134
|
-
value:
|
|
148
|
+
value: segment,
|
|
135
149
|
fullWidth,
|
|
136
150
|
});
|
|
137
|
-
|
|
138
|
-
visible += fullWidth ? 2 : character.length;
|
|
151
|
+
visible += fullWidth ? 2 : 1;
|
|
139
152
|
if (visible >= endChar) {
|
|
140
153
|
break;
|
|
141
154
|
}
|
package/build/tokenize.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenize.js","sourceRoot":"","sources":["../src/tokenize.ts"],"names":[],"mappings":"AAAA,OAAO,oBAAoB,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EACN,GAAG,EACH,OAAO,EACP,UAAU,EACV,cAAc,EACd,uBAAuB,EACvB,GAAG,GACH,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"tokenize.js","sourceRoot":"","sources":["../src/tokenize.ts"],"names":[],"mappings":"AAAA,OAAO,oBAAoB,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EACN,GAAG,EACH,OAAO,EACP,UAAU,EACV,cAAc,EACd,uBAAuB,EACvB,GAAG,GACH,MAAM,gBAAgB,CAAC;AAExB,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;AAE7E,SAAS,mBAAmB,CAAC,QAAgB,EAAE,aAAqB;IACnE,IAAI,oBAAoB,CAAC,aAAa,CAAC;QAAE,OAAO,IAAI,CAAC;IACrD,mEAAmE;IACnE,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAC7C,4DAA4D;IAC5D,IAAI,aAAa,IAAI,OAAO,IAAI,aAAa,IAAI,OAAO;QAAE,OAAO,IAAI,CAAC;IACtE,OAAO,KAAK,CAAC;AACd,CAAC;AAgBD,+EAA+E;AAC/E,SAAS,aAAa,CAAC,MAAc,EAAE,MAAc;IACpD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC9B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,uBAAuB,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;QACpE,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,uBAAuB,CAAC,KAAK,CAAC,EAAE;YAChE,OAAO,SAAS,CAAC;SACjB;KACD;IACD,sCAAsC;IACtC,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC;IAClE,IAAI,cAAc,KAAK,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAC5C,0EAA0E;IAC1E,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,cAAc,GAAG,CAAC,CAAC,CAAC;IAC5D,IAAI,QAAQ,KAAK,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAEtC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC;AACtC,CAAC;AAED,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;AAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;AAC/B,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;AAClC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;AAE/B;;;;GAIG;AACH,SAAS,uBAAuB,CAAC,GAAW;IAC3C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;QAChD,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACvC,sCAAsC;QACtC,IAAI,QAAQ,KAAK,IAAI;YAAE,OAAO,KAAK,CAAC;QACpC,kCAAkC;QAClC,IAAI,QAAQ,KAAK,OAAO;YAAE,SAAS;QACnC,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI;YAAE,SAAS;QACnD,6BAA6B;QAC7B,MAAM;KACN;IAED,OAAO,CAAC,CAAC,CAAC;AACX,CAAC;AAED,+EAA+E;AAC/E,SAAS,gBAAgB,CAAC,MAAc,EAAE,MAAc;IACvD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAG,uBAAuB,CAAC,MAAM,CAAC,CAAC;IACjD,IAAI,QAAQ,KAAK,CAAC,CAAC;QAAE,OAAO;IAE5B,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,SAAS,yBAAyB,CAAC,IAAY;IAC9C,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;QACxB,sBAAsB;QACtB,OAAO,CAAC,IAAI,CAAC,CAAC;KACd;IAED,MAAM,SAAS,GAAG,IAAI;QACrB,6CAA6C;SAC5C,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;SACZ,KAAK,CAAC,GAAG,CAAC,CAAC;IAEb,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC7B,uEAAuE;QACvE,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,IAAI,EAAE;YACzC,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,IAAI,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE;gBACzD,0CAA0C;gBAC1C,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9C,CAAC,IAAI,CAAC,CAAC;gBACP,SAAS;aACT;iBAAM,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,IAAI,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE;gBAChE,0CAA0C;gBAC1C,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9C,CAAC,IAAI,CAAC,CAAC;gBACP,SAAS;aACT;SACD;QAED,gDAAgD;QAChD,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;KAClB;IAED,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAW,EAAE,UAAkB,MAAM,CAAC,iBAAiB;IAC/E,MAAM,GAAG,GAAY,EAAE,CAAC;IACxB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;QACxD,qDAAqD;QACrD,IAAI,KAAK,GAAG,YAAY;YAAE,SAAS;QAEnC,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;QAE1C,IAAI,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;YAC3B,IAAI,IAAwB,CAAC;YAE7B,kEAAkE;YAClE,MAAM,aAAa,GAAG,GAAG,CAAC,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;YACjD,IAAI,aAAa,KAAK,GAAG,EAAE;gBAC1B,4CAA4C;gBAC5C,IAAI,GAAG,aAAa,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;gBACjC,IAAI,IAAI,EAAE;oBACT,GAAG,CAAC,IAAI,CAAC;wBACR,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI;wBACV,OAAO,EAAE,UAAU,CAAC,IAAI,CAAC;qBACzB,CAAC,CAAC;iBACH;aACD;iBAAM,IAAI,aAAa,KAAK,GAAG,EAAE;gBACjC,4DAA4D;gBAC5D,IAAI,GAAG,gBAAgB,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;gBACpC,IAAI,IAAI,EAAE;oBACT,8CAA8C;oBAC9C,MAAM,KAAK,GAAG,yBAAyB,CAAC,IAAI,CAAC,CAAC;oBAC9C,KAAK,MAAM,cAAc,IAAI,KAAK,EAAE;wBACnC,GAAG,CAAC,IAAI,CAAC;4BACR,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,cAAc;4BACpB,OAAO,EAAE,UAAU,CAAC,cAAc,CAAC;yBACnC,CAAC,CAAC;qBACH;iBACD;aACD;YAED,IAAI,IAAI,EAAE;gBACT,YAAY,GAAG,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;gBACnC,SAAS;aACT;SACD;QAED,MAAM,SAAS,GAAG,mBAAmB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAE1D,GAAG,CAAC,IAAI,CAAC;YACR,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,OAAO;YACd,SAAS;SACT,CAAC,CAAC;QAEH,OAAO,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE7B,IAAI,OAAO,IAAI,OAAO,EAAE;YACvB,MAAM;SACN;KACD;IAED,OAAO,GAAG,CAAC;AACZ,CAAC"}
|