npm - @softerist/heuristic-mcp - Versions diffs - 3.0.17 → 3.1.0 - Mend

@softerist/heuristic-mcp 3.0.17 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/config.jsonc +23 -6
package/features/ann-config.js +7 -14
package/features/clear-cache.js +3 -3
package/features/find-similar-code.js +17 -22
package/features/hybrid-search.js +59 -67
package/features/index-codebase.js +305 -268
package/features/lifecycle.js +370 -176
package/features/package-version.js +15 -26
package/features/register.js +75 -57
package/features/resources.js +21 -47
package/features/set-workspace.js +31 -43
package/index.js +818 -172
package/lib/cache-utils.js +95 -99
package/lib/cache.js +121 -166
package/lib/cli.js +246 -238
package/lib/config.js +232 -62
package/lib/constants.js +22 -2
package/lib/embed-query-process.js +13 -29
package/lib/embedding-process.js +29 -19
package/lib/embedding-worker.js +166 -149
package/lib/ignore-patterns.js +39 -39
package/lib/json-writer.js +7 -34
package/lib/logging.js +11 -42
package/lib/onnx-backend.js +4 -4
package/lib/path-utils.js +4 -21
package/lib/project-detector.js +3 -3
package/lib/server-lifecycle.js +109 -15
package/lib/settings-editor.js +25 -18
package/lib/slice-normalize.js +6 -16
package/lib/tokenizer.js +56 -109
package/lib/utils.js +62 -81
package/lib/vector-store-binary.js +7 -7
package/lib/vector-store-sqlite.js +35 -67
package/lib/workspace-cache-key.js +36 -0
package/lib/workspace-env.js +55 -14
package/package.json +86 -86

package/lib/utils.js CHANGED Viewed

@@ -2,7 +2,7 @@ import crypto from 'crypto';
 import path from 'path';
 import { estimateTokens, getChunkingParams } from './tokenizer.js';
-// Re-export tokenizer utilities
 export {
   estimateTokens,
   getChunkingParams,
@@ -10,19 +10,10 @@ export {
   MODEL_TOKEN_LIMITS,
 } from './tokenizer.js';
-// Minimum text length for a chunk to be considered valid (avoids tiny fragments)
 import { MIN_CHUNK_TEXT_LENGTH } from './constants.js';
-/**
- * Fast similarity for normalized vectors (dot product).
- * Uses loop unrolling for performance on large vectors.
- * NOTE: For very large codebases (10k+ chunks), consider WebAssembly SIMD
- * for ~2-4x speedup on 768-dim vectors.
- * @param {Float32Array} a - First normalized vector
- * @param {Float32Array} b - Second normalized vector
- * @returns {number} Dot product similarity score (-1 to 1 for normalized vectors)
- * @throws {Error} If vectors are null/undefined or have different dimensions
- */
 export function dotSimilarity(a, b) {
   if (!a || !b) {
     throw new Error(
@@ -54,16 +45,14 @@ export function dotSimilarity(a, b) {
   return dot;
 }
-/**
- * Generate hash for file content to detect changes
- */
 export function hashContent(content) {
   return crypto.createHash('md5').update(content).digest('hex');
 }
-// Language-specific patterns for function/class detection
 const patterns = {
-  // JavaScript/TypeScript
   js: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
   jsx: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
   ts: /^(export\s+)?(async\s+)?(function|class|const|let|var|interface|type)\s+\w+/,
@@ -71,18 +60,18 @@ const patterns = {
   mjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
   cjs: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
-  // Python
   py: /^(class|def|async\s+def)\s+\w+/,
   pyw: /^(class|def|async\s+def)\s+\w+/,
-  pyx: /^(cdef|cpdef|def|class)\s+\w+/, // Cython
+  pyx: /^(cdef|cpdef|def|class)\s+\w+/,
-  // Java/Kotlin/Scala
   java: /^(public|private|protected)?\s*(static\s+)?(class|interface|enum|void|int|String|boolean)\s+\w+/,
   kt: /^(class|interface|object|fun|val|var)\s+\w+/,
   kts: /^(class|interface|object|fun|val|var)\s+\w+/,
   scala: /^(class|object|trait|def|val|var)\s+\w+/,
-  // C/C++
   c: /^(struct|enum|union|void|int|char|float|double)\s+\w+/,
   cpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
   cc: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
@@ -91,54 +80,54 @@ const patterns = {
   hpp: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
   hxx: /^(class|struct|namespace|template|void|int|bool)\s+\w+/,
-  // C#
   cs: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
   csx: /^(public|private|protected)?\s*(static\s+)?(class|interface|struct|enum|void|int|string|bool)\s+\w+/,
-  // Go
   go: /^(func|type|const|var)\s+\w+/,
-  // Rust
   rs: /^(pub\s+)?(fn|struct|enum|trait|impl|const|static|mod)\s+\w+/,
-  // PHP
   php: /^(class|interface|trait|function|const)\s+\w+/,
   phtml: /^(<\?php|class|interface|trait|function)\s*/,
-  // Ruby
   rb: /^(class|module|def)\s+\w+/,
   rake: /^(class|module|def|task|namespace)\s+\w+/,
-  // Swift
   swift: /^(class|struct|enum|protocol|func|var|let|extension)\s+\w+/,
-  // R
   r: /^(\w+)\s*(<-|=)\s*function/,
   R: /^(\w+)\s*(<-|=)\s*function/,
-  // Lua
   lua: /^(function|local\s+function)\s+\w+/,
-  // Shell scripts
   sh: /^(\w+\s*\(\)|function\s+\w+)/,
   bash: /^(\w+\s*\(\)|function\s+\w+)/,
   zsh: /^(\w+\s*\(\)|function\s+\w+)/,
   fish: /^function\s+\w+/,
-  // CSS/Styles
   css: /^(\.|#|@media|@keyframes|@font-face|\w+)\s*[{,]/,
   scss: /^(\$\w+:|@mixin|@function|@include|\.|#|@media)\s*/,
   sass: /^(\$\w+:|=\w+|\+\w+|\.|#|@media)\s*/,
   less: /^(@\w+:|\.|#|@media)\s*/,
   styl: /^(\$\w+\s*=|\w+\(|\.|#)\s*/,
-  // Markup/HTML
   html: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
   htm: /^(<(div|section|article|header|footer|nav|main|aside|form|table|template|script|style)\b)/i,
   xml: /^(<\w+|\s*<!\[CDATA\[)/,
   svg: /^(<svg|<g|<path|<defs|<symbol)\b/,
-  // Config files
   json: /^(\s*"[\w-]+"\s*:\s*[[{])/,
   yaml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
   yml: /^(\w[\w-]*:\s*[|>]?$|\w[\w-]*:\s*$)/,
@@ -146,48 +135,40 @@ const patterns = {
   ini: /^(\[\w+\]|\w+\s*=)/,
   env: /^[A-Z_][A-Z0-9_]*=/,
-  // Makefile
   makefile: /^([A-Za-z0-9_./-]+)\s*:(?!=)/,
   mk: /^([A-Za-z0-9_./-]+)\s*:(?!=)/,
-  // Docker
   dockerfile:
     /^(FROM|RUN|CMD|LABEL|EXPOSE|ENV|ADD|COPY|ENTRYPOINT|VOLUME|USER|WORKDIR|ARG|ONBUILD|STOPSIGNAL|HEALTHCHECK|SHELL)\s+/i,
-  // Documentation
   md: /^(#{1,6}\s+|```|\*{3}|_{3})/,
   mdx: /^(#{1,6}\s+|```|import\s+|export\s+)/,
-  txt: /^.{50,}/, // Split on long paragraphs
+  txt: /^.{50,}/,
   rst: /^(={3,}|-{3,}|~{3,}|\.\.\s+\w+::)/,
-  // Database
   sql: /^(CREATE|ALTER|INSERT|UPDATE|DELETE|SELECT|DROP|GRANT|REVOKE|WITH|DECLARE|BEGIN|END)\s+/i,
-  // Perl
   pl: /^(sub|package|use|require)\s+\w+/,
   pm: /^(sub|package|use|require)\s+\w+/,
-  // Vim
   vim: /^(function|command|autocmd|let\s+g:)\s*/,
 };
-/**
- * Intelligent chunking with token limit awareness
- * Tries to split by function/class boundaries while respecting token limits
- *
- * @param {string} content - File content to chunk
- * @param {string} file - File path (for language detection)
- * @param {object} config - Configuration object with embeddingModel
- * @returns {Array<{text: string, startLine: number, endLine: number, tokenCount: number}>}
- */
 export function smartChunk(content, file, config) {
   const lines = content.split('\n');
   const chunks = [];
   const ext = path.extname(file).toLowerCase();
   const base = path.basename(file).toLowerCase();
-  const SPECIAL_TOKENS = 2; // [CLS] + [SEP] accounted once per chunk
+  const SPECIAL_TOKENS = 2;
-  // Get model-specific chunking parameters with optional user overrides
   let { maxTokens, targetTokens, overlapTokens } = getChunkingParams(config.embeddingModel);
   if (config.maxTokens) maxTokens = config.maxTokens;
   if (config.targetTokens) targetTokens = config.targetTokens;
@@ -200,25 +181,25 @@ export function smartChunk(content, file, config) {
     else if (base.startsWith('.env')) langPattern = patterns.env;
   }
   if (!langPattern || typeof langPattern.test !== 'function') {
-    langPattern = patterns.js; // Default fallback
+    langPattern = patterns.js;
   }
   let currentChunk = [];
   let chunkStartLine = 0;
-  let lineTokenCounts = []; // Cache token counts for overlap calculation
+  let lineTokenCounts = [];
   let currentTokenCount = 0;
-  // Track bracket depth for better boundary detection
   let bracketDepth = 0;
   let braceDepth = 0;
   let parenDepth = 0;
   let inString = false;
   let inComment = false;
-  let stringChar = null; // ' or " or `
+  let stringChar = null;
   const splitOversizedLine = (line, lineTokens) => {
     const charsPerToken = line.length / Math.max(1, lineTokens);
-    const segmentSize = Math.max(100, Math.floor(charsPerToken * targetTokens)); // Min 100 chars
+    const segmentSize = Math.max(100, Math.floor(charsPerToken * targetTokens));
     const segments = [];
     for (let start = 0; start < line.length; start += segmentSize) {
@@ -234,15 +215,15 @@ export function smartChunk(content, file, config) {
     let j = 0;
-    // Simple state tracking for heuristics (not a full parser)
     if (inComment) {
-      // Look for end of block comment
       const endIdx = line.indexOf('*/');
       if (endIdx !== -1) {
         inComment = false;
         j = endIdx + 2;
       } else {
-        // Skip whole line
         j = line.length;
       }
     }
@@ -256,31 +237,31 @@ export function smartChunk(content, file, config) {
       if (inString) {
         if (char === '\\') {
-          j++; // Skip escaped char
+          j++;
         } else if (char === stringChar) {
           inString = false;
           stringChar = null;
         }
       } else {
-        // Check for comment start
         if (char === '/' && nextChar === '*') {
           inComment = true;
           j++;
-          // Check if it ends on same line
           const endIdx = line.indexOf('*/', j);
           if (endIdx !== -1) {
             inComment = false;
             j = endIdx + 1;
           } else {
-            break; // Rest of line is comment
+            break;
           }
         } else if (char === '/' && nextChar === '/') {
-          break; // Skip rest of line (line comment)
+          break;
         } else if (char === "'" || char === '"' || char === '`') {
           inString = true;
           stringChar = char;
         } else {
-          // Only count brackets if not in string or comment
           if (char === '{') braceDepth++;
           else if (char === '}') braceDepth = Math.max(0, braceDepth - 1);
           else if (char === '[') bracketDepth++;
@@ -291,7 +272,7 @@ export function smartChunk(content, file, config) {
       }
     }
-    // Split lines that are too large to ever fit in a single chunk
     if (lineTokens + SPECIAL_TOKENS > maxTokens) {
       if (currentChunk.length > 0) {
         const chunkText = currentChunk.join('\n');
@@ -324,11 +305,11 @@ export function smartChunk(content, file, config) {
       continue;
     }
-    // Check if adding this line would exceed token limit
     const effectiveTokenCount = currentTokenCount + SPECIAL_TOKENS;
     const wouldExceedLimit = currentTokenCount + lineTokens + SPECIAL_TOKENS > targetTokens;
-    // Check if this is a good split point using multiple heuristics
     const matchesPattern = langPattern.test(trimmed);
     const atTopLevel =
       braceDepth === 0 && bracketDepth === 0 && parenDepth === 0 && !inString && !inComment;
@@ -347,7 +328,7 @@ export function smartChunk(content, file, config) {
     const shouldSplit =
       wouldExceedLimit || (isGoodSplitPoint && effectiveTokenCount > targetTokens * 0.6);
-    // Avoid splitting in weird states if possible
     const safeToSplit = (braceDepth <= 1 && !inString) || wouldExceedLimit;
     if (shouldSplit && safeToSplit && currentChunk.length > 0) {
@@ -363,8 +344,8 @@ export function smartChunk(content, file, config) {
       let overlapLines = [];
       let overlapTokensCount = 0;
-      let overlapStartOffset = 0;  // Track how many lines back we went
-      const MAX_OVERLAP_ITERATIONS = 50; // Absolute limit to prevent unbounded loops
+      let overlapStartOffset = 0;
+      const MAX_OVERLAP_ITERATIONS = 50;
       let overlapIterations = 0;
       for (
         let k = currentChunk.length - 1;
@@ -372,12 +353,12 @@ export function smartChunk(content, file, config) {
         k--
       ) {
         overlapIterations++;
-        // Use cached token count instead of re-estimating
         const lineT = lineTokenCounts[k] ?? 0;
-        // Guard against infinite loops: if lineT is 0, count the line but don't loop forever
         if (lineT <= 0) {
-          // Include zero-token lines (e.g., empty lines) but limit to prevent infinite spin
-          // Also guard with overlapStartOffset < 20 to prevent excessive lines even if under 10 in overlapLines
           if (overlapLines.length < 10 && overlapStartOffset < 20) {
             overlapLines.unshift(currentChunk[k]);
             overlapStartOffset++;
@@ -394,12 +375,12 @@ export function smartChunk(content, file, config) {
       }
       currentChunk = overlapLines;
-      // Rebuild lineTokenCounts for the overlap lines
       lineTokenCounts = overlapLines.map(l => estimateTokens(l, { includeSpecialTokens: false }));
       currentTokenCount = overlapTokensCount;
-      // The new chunk starts from where the overlap begins in the original file
-      // i is the current line we're about to process, overlap lines are from before
-      // Ensure non-negative to handle edge cases where overlapStartOffset > i
       chunkStartLine = Math.max(0, i - overlapStartOffset);
     }
@@ -408,12 +389,12 @@ export function smartChunk(content, file, config) {
     currentTokenCount += lineTokens;
     if (chunks.length >= (config.maxChunksPerFile || 1000)) {
-      // Hard limit to prevent memory explosion on minified/data files
       break;
     }
   }
-  // Add remaining chunk
   const chunkText = currentChunk.join('\n');
     if (chunkText.trim().length > MIN_CHUNK_TEXT_LENGTH) {
       chunks.push({

package/lib/vector-store-binary.js CHANGED Viewed

@@ -164,7 +164,7 @@ export class BinaryVectorStore {
       try {
         await this.vectorsHandle.close();
       } catch {
-        // ignore close errors
       }
     }
     this.vectorsHandle = null;
@@ -172,7 +172,7 @@ export class BinaryVectorStore {
       try {
         fsSync.closeSync(this.vectorsFd);
       } catch {
-        // ignore close errors
       }
     }
     this.vectorsFd = null;
@@ -180,7 +180,7 @@ export class BinaryVectorStore {
       try {
         await this.contentHandle.close();
       } catch {
-        // ignore close errors
       }
     }
     this.contentHandle = null;
@@ -276,7 +276,7 @@ export class BinaryVectorStore {
         try {
           fsSync.closeSync(vectorsFd);
         } catch {
-          // ignore close errors
         }
       }
       throw err;
@@ -330,8 +330,8 @@ export class BinaryVectorStore {
         this.dim
       );
     } else if (Number.isInteger(this.vectorsFd)) {
-      // Use Buffer.alloc (not allocUnsafe) for safety - prevents potential
-      // information leak if read is partial or fails silently
       const buffer = Buffer.alloc(byteLength);
       const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
       if (bytesRead === byteLength) {
@@ -592,7 +592,7 @@ export class BinaryVectorStore {
         vectorPos += vectorBuffer.length;
         if (entry.contentLength > 0) {
-          // Re-fetch content to avoid holding all strings in memory
           const val = await resolveContent(chunk, sourceIndex);
           const contentBuffer = Buffer.from(val, 'utf-8');
           await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);