@chigichan24/crune 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -123,3 +123,20 @@ describe("tokenize", () => {
|
|
|
123
123
|
expect(tokens).not.toContain("go");
|
|
124
124
|
});
|
|
125
125
|
});
|
|
126
|
+
describe("tokenize - large input regression (Issue #18)", () => {
|
|
127
|
+
// Real Claude Code sessions can contain `ls -R` / `find` / `tree` dumps
|
|
128
|
+
// with hundreds of thousands of path segments. `tokens.push(...arr)` on
|
|
129
|
+
// such an array hits V8's argument count limit and throws
|
|
130
|
+
// `RangeError: Maximum call stack size exceeded` even though it is not
|
|
131
|
+
// recursion. See https://github.com/chigichan24/crune/issues/18.
|
|
132
|
+
it("extractPathTokens does not throw on a huge corpus of file paths", () => {
|
|
133
|
+
const pathSegments = Array.from({ length: 100_000 }, (_, i) => `/dir${i}/file${i}.ts`);
|
|
134
|
+
const text = pathSegments.join(" ");
|
|
135
|
+
expect(() => extractPathTokens(text)).not.toThrow();
|
|
136
|
+
});
|
|
137
|
+
it("tokenize does not throw on a huge corpus of file paths", () => {
|
|
138
|
+
const pathSegments = Array.from({ length: 100_000 }, (_, i) => `/dir${i}/file${i}.ts`);
|
|
139
|
+
const text = pathSegments.join(" ");
|
|
140
|
+
expect(() => tokenize(text)).not.toThrow();
|
|
141
|
+
});
|
|
142
|
+
});
|
|
@@ -18,7 +18,11 @@ export function extractPathTokens(text) {
|
|
|
18
18
|
for (const seg of segments) {
|
|
19
19
|
const name = seg.replace(/\.[^.]+$/, ""); // remove extension
|
|
20
20
|
if (name.length > 2) {
|
|
21
|
-
tokens.push(...
|
|
21
|
+
// Avoid `tokens.push(...arr)` — large arrays exceed V8's argument
|
|
22
|
+
// count limit and throw RangeError.
|
|
23
|
+
// See https://github.com/chigichan24/crune/issues/18
|
|
24
|
+
for (const t of splitCamelCase(name))
|
|
25
|
+
tokens.push(t);
|
|
22
26
|
}
|
|
23
27
|
}
|
|
24
28
|
}
|
|
@@ -33,8 +37,12 @@ export function isNoiseToken(token) {
|
|
|
33
37
|
}
|
|
34
38
|
export function tokenize(text) {
|
|
35
39
|
const tokens = [];
|
|
36
|
-
// Extract file path tokens first
|
|
37
|
-
tokens.push(...
|
|
40
|
+
// Extract file path tokens first.
|
|
41
|
+
// Avoid `tokens.push(...arr)` — large arrays exceed V8's argument
|
|
42
|
+
// count limit and throw RangeError.
|
|
43
|
+
// See https://github.com/chigichan24/crune/issues/18
|
|
44
|
+
for (const t of extractPathTokens(text))
|
|
45
|
+
tokens.push(t);
|
|
38
46
|
// Split on whitespace, punctuation, CJK boundaries
|
|
39
47
|
const words = text
|
|
40
48
|
.replace(/[`'"{}()[\]<>;:,!?@#$%^&*=+|\\~]/g, " ")
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chigichan24/crune",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"tsx": "^4.21.0",
|
|
45
45
|
"typescript": "~5.9.3",
|
|
46
46
|
"typescript-eslint": "^8.56.1",
|
|
47
|
-
"vite": "^8.0.
|
|
47
|
+
"vite": "^8.0.10",
|
|
48
48
|
"vitest": "^3.2.4"
|
|
49
49
|
}
|
|
50
50
|
}
|