npm - @chigichan24/crune - Versions diffs - 0.1.5 → 0.1.6 - Mend

@chigichan24/crune 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist-cli/__tests__/tokenizer.test.js +17 -0
package/dist-cli/knowledge-graph/tokenizer.js +11 -3
package/package.json +1 -1

package/dist-cli/__tests__/tokenizer.test.js CHANGED Viewed

@@ -123,3 +123,20 @@ describe("tokenize", () => {
         expect(tokens).not.toContain("go");
     });
 });
+describe("tokenize - large input regression (Issue #18)", () => {
+    // Real Claude Code sessions can contain `ls -R` / `find` / `tree` dumps
+    // with hundreds of thousands of path segments. `tokens.push(...arr)` on
+    // such an array hits V8's argument count limit and throws
+    // `RangeError: Maximum call stack size exceeded` even though it is not
+    // recursion. See https://github.com/chigichan24/crune/issues/18.
+    it("extractPathTokens does not throw on a huge corpus of file paths", () => {
+        const pathSegments = Array.from({ length: 100_000 }, (_, i) => `/dir${i}/file${i}.ts`);
+        const text = pathSegments.join(" ");
+        expect(() => extractPathTokens(text)).not.toThrow();
+    });
+    it("tokenize does not throw on a huge corpus of file paths", () => {
+        const pathSegments = Array.from({ length: 100_000 }, (_, i) => `/dir${i}/file${i}.ts`);
+        const text = pathSegments.join(" ");
+        expect(() => tokenize(text)).not.toThrow();
+    });
+});

package/dist-cli/knowledge-graph/tokenizer.js CHANGED Viewed

@@ -18,7 +18,11 @@ export function extractPathTokens(text) {
         for (const seg of segments) {
             const name = seg.replace(/\.[^.]+$/, ""); // remove extension
             if (name.length > 2) {
-                tokens.push(...splitCamelCase(name));
+                // Avoid `tokens.push(...arr)` — large arrays exceed V8's argument
+                // count limit and throw RangeError.
+                // See https://github.com/chigichan24/crune/issues/18
+                for (const t of splitCamelCase(name))
+                    tokens.push(t);
             }
         }
     }
@@ -33,8 +37,12 @@ export function isNoiseToken(token) {
 }
 export function tokenize(text) {
     const tokens = [];
-    // Extract file path tokens first
-    tokens.push(...extractPathTokens(text));
+    // Extract file path tokens first.
+    // Avoid `tokens.push(...arr)` — large arrays exceed V8's argument
+    // count limit and throw RangeError.
+    // See https://github.com/chigichan24/crune/issues/18
+    for (const t of extractPathTokens(text))
+        tokens.push(t);
     // Split on whitespace, punctuation, CJK boundaries
     const words = text
         .replace(/[`'"{}()[\]<>;:,!?@#$%^&*=+|\\~]/g, " ")

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@chigichan24/crune",
-  "version": "0.1.5",
+  "version": "0.1.6",
   "type": "module",
   "publishConfig": {
     "access": "public"