npm - @softerist/heuristic-mcp - Versions diffs - 2.1.47 → 3.0.0 - Mend

@softerist/heuristic-mcp 2.1.47 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/.agent/workflows/code-review.md +60 -0
package/.prettierrc +7 -0
package/ARCHITECTURE.md +105 -170
package/CONTRIBUTING.md +32 -113
package/GEMINI.md +73 -0
package/LICENSE +21 -21
package/README.md +161 -54
package/config.json +876 -75
package/debug-pids.js +27 -0
package/eslint.config.js +36 -0
package/features/ann-config.js +37 -26
package/features/clear-cache.js +28 -19
package/features/find-similar-code.js +142 -66
package/features/hybrid-search.js +253 -93
package/features/index-codebase.js +1455 -394
package/features/lifecycle.js +813 -180
package/features/register.js +58 -52
package/index.js +450 -306
package/lib/cache-ops.js +22 -0
package/lib/cache-utils.js +68 -0
package/lib/cache.js +1392 -587
package/lib/call-graph.js +165 -50
package/lib/cli.js +154 -0
package/lib/config.js +462 -121
package/lib/embedding-process.js +77 -0
package/lib/embedding-worker.js +545 -30
package/lib/ignore-patterns.js +61 -59
package/lib/json-worker.js +14 -0
package/lib/json-writer.js +344 -0
package/lib/logging.js +88 -0
package/lib/memory-logger.js +13 -0
package/lib/project-detector.js +13 -17
package/lib/server-lifecycle.js +38 -0
package/lib/settings-editor.js +645 -0
package/lib/tokenizer.js +207 -104
package/lib/utils.js +273 -198
package/lib/vector-store-binary.js +592 -0
package/mcp_config.example.json +13 -0
package/package.json +13 -2
package/scripts/clear-cache.js +6 -17
package/scripts/download-model.js +14 -9
package/scripts/postinstall.js +5 -5
package/search-configs.js +36 -0
package/test/ann-config.test.js +179 -0
package/test/ann-fallback.test.js +6 -6
package/test/binary-store.test.js +69 -0
package/test/cache-branches.test.js +120 -0
package/test/cache-errors.test.js +264 -0
package/test/cache-extra.test.js +300 -0
package/test/cache-helpers.test.js +205 -0
package/test/cache-hnsw-failure.test.js +40 -0
package/test/cache-json-worker.test.js +190 -0
package/test/cache-worker.test.js +102 -0
package/test/cache.test.js +443 -0
package/test/call-graph.test.js +103 -4
package/test/clear-cache.test.js +69 -68
package/test/code-review-workflow.test.js +50 -0
package/test/config.test.js +418 -0
package/test/coverage-gap.test.js +497 -0
package/test/coverage-maximizer.test.js +236 -0
package/test/debug-analysis.js +107 -0
package/test/embedding-model.test.js +173 -103
package/test/embedding-worker-extra.test.js +272 -0
package/test/embedding-worker.test.js +158 -0
package/test/features.test.js +139 -0
package/test/final-boost.test.js +271 -0
package/test/final-polish.test.js +183 -0
package/test/final.test.js +95 -0
package/test/find-similar-code.test.js +191 -0
package/test/helpers.js +92 -11
package/test/helpers.test.js +46 -0
package/test/hybrid-search-basic.test.js +62 -0
package/test/hybrid-search-branch.test.js +202 -0
package/test/hybrid-search-callgraph.test.js +229 -0
package/test/hybrid-search-extra.test.js +81 -0
package/test/hybrid-search.test.js +484 -71
package/test/index-cli.test.js +520 -0
package/test/index-codebase-batch.test.js +119 -0
package/test/index-codebase-branches.test.js +585 -0
package/test/index-codebase-core.test.js +1032 -0
package/test/index-codebase-edge-cases.test.js +254 -0
package/test/index-codebase-errors.test.js +132 -0
package/test/index-codebase-gap.test.js +239 -0
package/test/index-codebase-lines.test.js +151 -0
package/test/index-codebase-watcher.test.js +259 -0
package/test/index-codebase-zone.test.js +259 -0
package/test/index-codebase.test.js +371 -69
package/test/index-memory.test.js +220 -0
package/test/indexer-detailed.test.js +176 -0
package/test/integration.test.js +148 -92
package/test/json-worker.test.js +50 -0
package/test/lifecycle.test.js +541 -0
package/test/master.test.js +198 -0
package/test/perfection.test.js +349 -0
package/test/project-detector.test.js +65 -0
package/test/register.test.js +262 -0
package/test/tokenizer.test.js +55 -93
package/test/ultra-maximizer.test.js +116 -0
package/test/utils-branches.test.js +161 -0
package/test/utils-extra.test.js +116 -0
package/test/utils.test.js +131 -0
package/test/verify_fixes.js +76 -0
package/test/worker-errors.test.js +96 -0
package/test/worker-init.test.js +102 -0
package/test/worker_throttling.test.js +93 -0
package/tools/scripts/benchmark-search.js +95 -0
package/tools/scripts/cache-stats.js +71 -0
package/tools/scripts/manual-search.js +34 -0
package/vitest.config.js +19 -9

package/lib/utils.js CHANGED Viewed

@@ -1,31 +1,35 @@
-import crypto from "crypto";
-import path from "path";
-import { estimateTokens, getChunkingParams, getModelTokenLimit } from "./tokenizer.js";
+import crypto from 'crypto';
+import path from 'path';
+import { estimateTokens, getChunkingParams } from './tokenizer.js';
 // Re-export tokenizer utilities
-export { estimateTokens, getChunkingParams, getModelTokenLimit, MODEL_TOKEN_LIMITS } from "./tokenizer.js";
-/**
- * Calculate cosine similarity between two vectors
- */
-export function cosineSimilarity(a, b) {
-  let dot = 0, normA = 0, normB = 0;
-  for (let i = 0; i < a.length; i++) {
-    dot += a[i] * b[i];
-    normA += a[i] * a[i];
-    normB += b[i] * b[i];
-  }
-  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
-}
+export {
+  estimateTokens,
+  getChunkingParams,
+  getModelTokenLimit,
+  MODEL_TOKEN_LIMITS,
+} from './tokenizer.js';
 /**
  * Fast similarity for normalized vectors (dot product)
  */
 export function dotSimilarity(a, b) {
+  if (a.length !== b.length) return 0;
   let dot = 0;
-  for (let i = 0; i < a.length; i++) {
+  let i = 0;
+  const len = a.length;
+  const m = len % 4;
+  while (i < m) {
     dot += a[i] * b[i];
+    i++;
+  }
+  while (i < len) {
+    dot += a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2] * b[i + 2] + a[i + 3] * b[i + 3];
+    i += 4;
   }
   return dot;
 }
@@ -33,9 +37,118 @@ export function dotSimilarity(a, b) {
  * Generate hash for file content to detect changes
  */
 export function hashContent(content) {
-  return crypto.createHash("md5").update(content).digest("hex");
+  return crypto.createHash('md5').update(content).digest('hex');
 }
+// Language-specific patterns for function/class detection
+const patterns = {
+  // JavaScript/TypeScript
+  js: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
+  jsx: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
+  ts: /^(export\s+)?(async\s+)?(function|class|const|let|var|interface|type)\s+\w+/,
+  tsx: /^(export\s+)?(async\s+)?(function|class|const|let|var|interface|type)\s+\w+/,
+  mjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
+  cjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
+  // Python
+  py: /^(class|def|async\s+def)\s+\w+/,
+  pyw: /^(class|def|async\s+def)\s+\w+/,
+  pyx: /^(cdef|cpdef|def|class)\s+\w+/, // Cython
+  // Java/Kotlin/Scala
+  java: /^(public|private|protected)?\s*(static\s+)?(class|interface|enum|void|int|String|boolean)\s+\w+/,
+  kt: /^(class|interface|object|fun|val|var)\s+\w+/,
+  kts: /^(class|interface|object|fun|val|var)\s+\w+/,
+  scala: /^(class|object|trait|def|val|var)\s+\w+/,
+  // C/C++
+  c: /^(struct|enum|union|void|int|char|float|double)\s+\w+/,
+  cpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  cc: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  cxx: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  h: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  hpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  hxx: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
+  // C#
+  cs: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
+  csx: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
+  // Go
+  go: /^(func|type|const|var)\s+\w+/,
+  // Rust
+  rs: /^(pub\s+)?(fn|struct|enum|trait|impl|const|static|mod)\s+\w+/,
+  // PHP
+  php: /^(class|interface|trait|function|const)\s+\w+/,
+  phtml: /^(<\?php|class|interface|trait|function)\s*/,
+  // Ruby
+  rb: /^(class|module|def)\s+\w+/,
+  rake: /^(class|module|def|task|namespace)\s+\w+/,
+  // Swift
+  swift: /^(class|struct|enum|protocol|func|var|let|extension)\s+\w+/,
+  // R
+  r: /^(\w+)\s*(<-|=)\s*function/,
+  R: /^(\w+)\s*(<-|=)\s*function/,
+  // Lua
+  lua: /^(function|local\s+function)\s+\w+/,
+  // Shell scripts
+  sh: /^(\w+\s*\(\)|function\s+\w+)/,
+  bash: /^(\w+\s*\(\)|function\s+\w+)/,
+  zsh: /^(\w+\s*\(\)|function\s+\w+)/,
+  fish: /^function\s+\w+/,
+  // CSS/Styles
+  css: /^(\.|#|@media|@keyframes|@font-face|\w+)\s*[{,]/,
+  scss: /^(\$\w+:|@mixin|@function|@include|\.|#|@media)\s*/,
+  sass: /^(\$\w+:|=\w+|\+\w+|\.|#|@media)\s*/,
+  less: /^(@\w+:|\.|#|@media)\s*/,
+  styl: /^(\$\w+\s*=|\w+\(|\.|#)\s*/,
+  // Markup/HTML
+  html: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
+  htm: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
+  xml: /^(<\w+|\s*<!\[CDATA\[)/,
+  svg: /^(<svg|<g|<path|<defs|<symbol)\b/,
+  // Config files
+  json: /^(\s*"[\w-]+"\s*:\s*[[{])/,
+  yaml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
+  yml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
+  toml: /^(\[\[?\w+\]?\]?|\w+\s*=)/,
+  ini: /^(\[\w+\]|\w+\s*=)/,
+  env: /^[A-Z_][A-Z0-9_]*=/,
+  // Makefile
+  makefile: /^([A-Za-z0-9_./-]+)\s*:(?!=)/,
+  mk: /^([A-Za-z0-9_./-]+)\s*:(?!=)/,
+  // Docker
+  dockerfile: /^(FROM|RUN|CMD|LABEL|EXPOSE|ENV|ADD|COPY|ENTRYPOINT|VOLUME|USER|WORKDIR|ARG|ONBUILD|STOPSIGNAL|HEALTHCHECK|SHELL)\s+/i,
+  // Documentation
+  md: /^(#{1,6}\s+|```|\*{3}|_{3})/,
+  mdx: /^(#{1,6}\s+|```|import\s+|export\s+)/,
+  txt: /^.{50,}/, // Split on long paragraphs
+  rst: /^(={3,}|-{3,}|~{3,}|\.\.\s+\w+::)/,
+  // Database
+  sql: /^(CREATE|ALTER|INSERT|UPDATE|DELETE|SELECT|DROP|GRANT|REVOKE|WITH|DECLARE|BEGIN|END)\s+/i,
+  // Perl
+  pl: /^(sub|package|use|require)\s+\w+/,
+  pm: /^(sub|package|use|require)\s+\w+/,
+  // Vim
+  vim: /^(function|command|autocmd|let\s+g:)\s*/,
+};
 /**
  * Intelligent chunking with token limit awareness
  * Tries to split by function/class boundaries while respecting token limits
@@ -46,118 +159,29 @@ export function hashContent(content) {
  * @returns {Array<{text: string, startLine: number, endLine: number, tokenCount: number}>}
  */
 export function smartChunk(content, file, config) {
-  const lines = content.split("\n");
+  const lines = content.split('\n');
   const chunks = [];
-  const ext = path.extname(file);
-  // Get model-specific chunking parameters
-  const { targetTokens, overlapTokens } = getChunkingParams(config.embeddingModel);
-  // Language-specific patterns for function/class detection
-  const patterns = {
-    // JavaScript/TypeScript
-    js: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
-    jsx: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
-    ts: /^(export\s+)?(async\s+)?(function|class|const|let|var|interface|type)\s+\w+/,
-    tsx: /^(export\s+)?(async\s+)?(function|class|const|let|var|interface|type)\s+\w+/,
-    mjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
-    cjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
-    // Python
-    py: /^(class|def|async\s+def)\s+\w+/,
-    pyw: /^(class|def|async\s+def)\s+\w+/,
-    pyx: /^(cdef|cpdef|def|class)\s+\w+/, // Cython
-    // Java/Kotlin/Scala
-    java: /^(public|private|protected)?\s*(static\s+)?(class|interface|enum|void|int|String|boolean)\s+\w+/,
-    kt: /^(class|interface|object|fun|val|var)\s+\w+/,
-    kts: /^(class|interface|object|fun|val|var)\s+\w+/,
-    scala: /^(class|object|trait|def|val|var)\s+\w+/,
-    // C/C++
-    c: /^(struct|enum|union|void|int|char|float|double)\s+\w+/,
-    cpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    cc: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    cxx: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    h: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    hpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    hxx: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-    // C#
-    cs: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
-    csx: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
-    // Go
-    go: /^(func|type|const|var)\s+\w+/,
-    // Rust
-    rs: /^(pub\s+)?(fn|struct|enum|trait|impl|const|static|mod)\s+\w+/,
-    // PHP
-    php: /^(class|interface|trait|function|const)\s+\w+/,
-    phtml: /^(<\?php|class|interface|trait|function)\s*/,
-    // Ruby
-    rb: /^(class|module|def)\s+\w+/,
-    rake: /^(class|module|def|task|namespace)\s+\w+/,
-    // Swift
-    swift: /^(class|struct|enum|protocol|func|var|let|extension)\s+\w+/,
-    // R
-    r: /^(\w+)\s*(<-|=)\s*function/,
-    R: /^(\w+)\s*(<-|=)\s*function/,
-    // Lua
-    lua: /^(function|local\s+function)\s+\w+/,
-    // Shell scripts
-    sh: /^(\w+\s*\(\)|function\s+\w+)/,
-    bash: /^(\w+\s*\(\)|function\s+\w+)/,
-    zsh: /^(\w+\s*\(\)|function\s+\w+)/,
-    fish: /^function\s+\w+/,
-    // CSS/Styles
-    css: /^(\.|#|@media|@keyframes|@font-face|\w+)\s*[{,]/,
-    scss: /^(\$\w+:|@mixin|@function|@include|\.|#|@media)\s*/,
-    sass: /^(\$\w+:|=\w+|\+\w+|\.|#|@media)\s*/,
-    less: /^(@\w+:|\.|\#|@media)\s*/,
-    styl: /^(\$\w+\s*=|\w+\(|\.|\#)\s*/,
-    // Markup/HTML
-    html: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
-    htm: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
-    xml: /^(<\w+|\s*<!\[CDATA\[)/,
-    svg: /^(<svg|<g|<path|<defs|<symbol)\b/,
-    // Config files
-    json: /^(\s*"[\w-]+"\s*:\s*[\[{])/,
-    yaml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
-    yml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
-    toml: /^(\[\[?\w+\]?\]?|\w+\s*=)/,
-    ini: /^(\[\w+\]|\w+\s*=)/,
-    env: /^[A-Z_][A-Z0-9_]*=/,
-    // Documentation
-    md: /^(#{1,6}\s+|```|\*{3}|_{3})/,
-    mdx: /^(#{1,6}\s+|```|import\s+|export\s+)/,
-    txt: /^.{50,}/, // Split on long paragraphs
-    rst: /^(={3,}|-{3,}|~{3,}|\.\.\s+\w+::)/,
-    // Database
-    sql: /^(CREATE|ALTER|INSERT|UPDATE|DELETE|SELECT|DROP|GRANT|REVOKE|WITH|DECLARE|BEGIN|END)\s+/i,
-    // Perl
-    pl: /^(sub|package|use|require)\s+\w+/,
-    pm: /^(sub|package|use|require)\s+\w+/,
-    // Vim
-    vim: /^(function|command|autocmd|let\s+g:)\s*/,
-  };
-  const langPattern = patterns[ext.slice(1)] || patterns.js;
+  const ext = path.extname(file).toLowerCase();
+  const base = path.basename(file).toLowerCase();
+  // Get model-specific chunking parameters with optional user overrides
+  let { maxTokens, targetTokens, overlapTokens } = getChunkingParams(config.embeddingModel);
+  if (config.maxTokens) maxTokens = config.maxTokens;
+  if (config.targetTokens) targetTokens = config.targetTokens;
+  if (config.overlapTokens) overlapTokens = config.overlapTokens;
+  let langPattern = patterns[ext.slice(1)];
+  if (!langPattern) {
+    if (base === 'dockerfile') langPattern = patterns.dockerfile;
+    else if (base === 'makefile') langPattern = patterns.makefile;
+    else if (base.startsWith('.env')) langPattern = patterns.env;
+  }
+  if (!langPattern || typeof langPattern.test !== 'function') {
+    langPattern = patterns.js; // Default fallback
+  }
   let currentChunk = [];
   let chunkStartLine = 0;
   let currentTokenCount = 0;
   // Track bracket depth for better boundary detection
@@ -168,97 +192,145 @@ export function smartChunk(content, file, config) {
   let inComment = false;
   let stringChar = null; // ' or " or `
+  const splitOversizedLine = (line, lineTokens) => {
+    const charsPerToken = line.length / Math.max(1, lineTokens);
+    const segmentSize = Math.max(100, Math.floor(charsPerToken * targetTokens)); // Min 100 chars
+    const segments = [];
+    for (let start = 0; start < line.length; start += segmentSize) {
+      segments.push(line.slice(start, start + segmentSize));
+    }
+    return segments;
+  };
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i];
     const lineTokens = estimateTokens(line);
-    const trimmed = line.trim();
+    let j = 0;
     // Simple state tracking for heuristics (not a full parser)
     if (inComment) {
       // Look for end of block comment
-      if (line.includes('*/')) {
-        const parts = line.split('*/');
-        // If there's content after the comment, process it (simplified)
-        if (parts[parts.length - 1].trim().length > 0) {
-           inComment = false;
-           // Recursive call or continue logic would be better, but for heuristic this is fine
-           // We just assume the line is mixed and skip granular checks
-        } else {
-           inComment = false;
-        }
+      const endIdx = line.indexOf('*/');
+      if (endIdx !== -1) {
+        inComment = false;
+        j = endIdx + 2;
+      } else {
+        // Skip whole line
+        j = line.length;
       }
-    } else {
-      for (let j = 0; j < line.length; j++) {
-        const char = line[j];
-        const nextChar = line[j+1];
-        if (inString) {
-          if (char === '\\') {
-            j++; // Skip escaped char
-          } else if (char === stringChar) {
-            inString = false;
-            stringChar = null;
-          }
-        } else {
-          // Check for comment start
-          if (char === '/' && nextChar === '*') {
-            inComment = true;
-            j++;
-            // Check if it ends on same line
-            if (line.indexOf('*/', j) !== -1) {
-              inComment = false;
-              j = line.indexOf('*/', j) + 1;
-            } else {
-              break; // Rest of line is comment
-            }
-          } else if (char === '/' && nextChar === '/') {
-            break; // Skip rest of line (line comment)
-          } else if (char === '\'' || char === '"' || char === '`') {
-            inString = true;
-            stringChar = char;
+    }
+    const scanLine = j < line.length ? line.slice(j) : '';
+    const trimmed = scanLine.trim();
+    for (; j < line.length; j++) {
+      const char = line[j];
+      const nextChar = line[j + 1];
+      if (inString) {
+        if (char === '\\') {
+          j++; // Skip escaped char
+        } else if (char === stringChar) {
+          inString = false;
+          stringChar = null;
+        }
+      } else {
+        // Check for comment start
+        if (char === '/' && nextChar === '*') {
+          inComment = true;
+          j++;
+          // Check if it ends on same line
+          const endIdx = line.indexOf('*/', j);
+          if (endIdx !== -1) {
+            inComment = false;
+            j = endIdx + 1;
           } else {
-            // Only count brackets if not in string or comment
-            if (char === '{') braceDepth++;
-            else if (char === '}') braceDepth = Math.max(0, braceDepth - 1);
-            else if (char === '[') bracketDepth++;
-            else if (char === ']') bracketDepth = Math.max(0, bracketDepth - 1);
-            else if (char === '(') parenDepth++;
-            else if (char === ')') parenDepth = Math.max(0, parenDepth - 1);
+            break; // Rest of line is comment
           }
+        } else if (char === '/' && nextChar === '/') {
+          break; // Skip rest of line (line comment)
+        } else if (char === "'" || char === '"' || char === '`') {
+          inString = true;
+          stringChar = char;
+        } else {
+          // Only count brackets if not in string or comment
+          if (char === '{') braceDepth++;
+          else if (char === '}') braceDepth = Math.max(0, braceDepth - 1);
+          else if (char === '[') bracketDepth++;
+          else if (char === ']') bracketDepth = Math.max(0, bracketDepth - 1);
+          else if (char === '(') parenDepth++;
+          else if (char === ')') parenDepth = Math.max(0, parenDepth - 1);
         }
       }
     }
+    // Split lines that are too large to ever fit in a single chunk
+    if (lineTokens > maxTokens) {
+      if (currentChunk.length > 0) {
+        const chunkText = currentChunk.join('\n');
+        if (chunkText.trim().length > 20) {
+          chunks.push({
+            text: chunkText,
+            startLine: chunkStartLine + 1,
+            endLine: i,
+            tokenCount: currentTokenCount,
+          });
+        }
+      }
+      const parts = splitOversizedLine(line, lineTokens);
+      for (const part of parts) {
+        if (part.trim().length <= 20) continue;
+        chunks.push({
+          text: part,
+          startLine: i + 1,
+          endLine: i + 1,
+          tokenCount: estimateTokens(part),
+        });
+      }
+      currentChunk = [];
+      currentTokenCount = 0;
+      chunkStartLine = i + 1;
+      continue;
+    }
     // Check if adding this line would exceed token limit
-    const wouldExceedLimit = (currentTokenCount + lineTokens) > targetTokens;
+    const wouldExceedLimit = currentTokenCount + lineTokens > targetTokens;
     // Check if this is a good split point using multiple heuristics
     const matchesPattern = langPattern.test(trimmed);
-    const atTopLevel = braceDepth === 0 && bracketDepth === 0 && parenDepth === 0 && !inString && !inComment;
-    const startsAtColumn0 = line.length > 0 && /^\S/.test(line);
+    const atTopLevel =
+      braceDepth === 0 && bracketDepth === 0 && parenDepth === 0 && !inString && !inComment;
+    const startsAtColumn0 = scanLine.length > 0 && /^\S/.test(scanLine);
     const isEmptyLine = trimmed.length === 0;
-    const prevWasEmpty = i > 0 && currentChunk.length > 0 && currentChunk[currentChunk.length - 1].trim().length === 0;
-    const isCommentStart = /^\s*(\/\*\*|\/\/\s*[-=]{3,}|#\s*[-=]{3,})/.test(line);
+    const prevWasEmpty =
+      i > 0 && currentChunk.length > 0 && currentChunk[currentChunk.length - 1].trim().length === 0;
+    const isCommentStart = /^\s*(\/\*\*|\/\/\s*[-=]{3,}|#\s*[-=]{3,})/.test(scanLine);
-    const isGoodSplitPoint = currentChunk.length > 3 && (
-      (matchesPattern && (atTopLevel || braceDepth <= 1)) ||
-      (atTopLevel && startsAtColumn0 && !isEmptyLine) ||
-      (prevWasEmpty && (matchesPattern || isCommentStart))
-    );
+    const isGoodSplitPoint =
+      currentChunk.length > 3 &&
+      ((matchesPattern && (atTopLevel || braceDepth <= 1)) ||
+        (atTopLevel && startsAtColumn0 && !isEmptyLine) ||
+        (prevWasEmpty && (matchesPattern || isCommentStart)));
-    const shouldSplit = wouldExceedLimit || (isGoodSplitPoint && currentTokenCount > targetTokens * 0.6);
+    const shouldSplit =
+      wouldExceedLimit || (isGoodSplitPoint && currentTokenCount > targetTokens * 0.6);
     // Avoid splitting in weird states if possible
     const safeToSplit = (braceDepth <= 1 && !inString) || wouldExceedLimit;
     if (shouldSplit && safeToSplit && currentChunk.length > 0) {
-      const chunkText = currentChunk.join("\n");
+      const chunkText = currentChunk.join('\n');
       if (chunkText.trim().length > 20) {
         chunks.push({
           text: chunkText,
           startLine: chunkStartLine + 1,
           endLine: i,
-          tokenCount: currentTokenCount
+          tokenCount: currentTokenCount,
         });
       }
@@ -282,19 +354,22 @@ export function smartChunk(content, file, config) {
     currentChunk.push(line);
     currentTokenCount += lineTokens;
+    if (chunks.length >= (config.maxChunksPerFile || 1000)) {
+       // Hard limit to prevent memory explosion on minified/data files
+       break;
+    }
   }
   // Add remaining chunk
-  if (currentChunk.length > 0) {
-    const chunkText = currentChunk.join("\n");
-    if (chunkText.trim().length > 20) {
-      chunks.push({
-        text: chunkText,
-        startLine: chunkStartLine + 1,
-        endLine: lines.length,
-        tokenCount: currentTokenCount
-      });
-    }
+  const chunkText = currentChunk.join('\n');
+  if (chunkText.trim().length > 20) {
+    chunks.push({
+      text: chunkText,
+      startLine: chunkStartLine + 1,
+      endLine: lines.length,
+      tokenCount: currentTokenCount,
+    });
   }
   return chunks;