npm - @softerist/heuristic-mcp - Versions diffs - 3.0.15 → 3.0.16 - Mend

@softerist/heuristic-mcp 3.0.15 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +104 -104
package/config.jsonc +173 -173
package/features/ann-config.js +131 -0
package/features/clear-cache.js +84 -0
package/features/find-similar-code.js +291 -0
package/features/hybrid-search.js +544 -0
package/features/index-codebase.js +3268 -0
package/features/lifecycle.js +1189 -0
package/features/package-version.js +302 -0
package/features/register.js +408 -0
package/features/resources.js +156 -0
package/features/set-workspace.js +265 -0
package/index.js +96 -96
package/lib/cache-ops.js +22 -22
package/lib/cache-utils.js +565 -565
package/lib/cache.js +1870 -1870
package/lib/call-graph.js +396 -396
package/lib/cli.js +1 -1
package/lib/config.js +517 -517
package/lib/constants.js +39 -39
package/lib/embed-query-process.js +7 -7
package/lib/embedding-process.js +7 -7
package/lib/embedding-worker.js +299 -299
package/lib/ignore-patterns.js +316 -316
package/lib/json-worker.js +14 -14
package/lib/json-writer.js +337 -337
package/lib/logging.js +164 -164
package/lib/memory-logger.js +13 -13
package/lib/onnx-backend.js +193 -193
package/lib/project-detector.js +84 -84
package/lib/server-lifecycle.js +165 -165
package/lib/settings-editor.js +754 -754
package/lib/tokenizer.js +256 -256
package/lib/utils.js +428 -428
package/lib/vector-store-binary.js +627 -627
package/lib/vector-store-sqlite.js +95 -95
package/lib/workspace-env.js +28 -28
package/mcp_config.json +9 -9
package/package.json +86 -75
package/scripts/clear-cache.js +20 -0
package/scripts/download-model.js +43 -0
package/scripts/mcp-launcher.js +49 -0
package/scripts/postinstall.js +12 -0
package/search-configs.js +36 -36
package/.prettierrc +0 -7
package/debug-pids.js +0 -30
package/eslint.config.js +0 -36
package/specs/plan.md +0 -23
package/vitest.config.js +0 -39

package/features/ann-config.js ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * ANN Config Tool - Runtime tuning of ANN search parameters
+ *
+ * Allows adjusting efSearch on the fly for speed/accuracy tradeoff,
+ * and querying current ANN index statistics.
+ */
+export class AnnConfigTool {
+  constructor(cache, config) {
+    this.cache = cache;
+    this.config = config;
+  }
+  /**
+   * Adjust efSearch and optionally trigger index rebuild
+   */
+  async execute(args) {
+    const action = args.action || 'stats';
+    if (action === 'stats') {
+      return this.cache.getAnnStats();
+    }
+    if (action === 'set_ef_search') {
+      const efSearch = args.efSearch;
+      if (efSearch === undefined) {
+        return {
+          success: false,
+          error: 'efSearch parameter is required for set_ef_search action',
+        };
+      }
+      return this.cache.setEfSearch(efSearch);
+    }
+    if (action === 'rebuild') {
+      // Force invalidate and rebuild the ANN index
+      this.cache.invalidateAnnIndex();
+      const index = await this.cache.ensureAnnIndex();
+      return {
+        success: index !== null,
+        message: index
+          ? 'ANN index rebuilt successfully'
+          : 'ANN index rebuild failed or not available',
+      };
+    }
+    return {
+      success: false,
+      error: `Unknown action: ${action}. Valid actions: stats, set_ef_search, rebuild`,
+    };
+  }
+  formatResults(result) {
+    if (result.success === false) {
+      return `Error: ${result.error}`;
+    }
+    if (result.enabled !== undefined) {
+      // Stats response
+      let output = '## ANN Index Statistics\n\n';
+      output += `- **Enabled**: ${result.enabled}\n`;
+      output += `- **Index Loaded**: ${result.indexLoaded}\n`;
+      output += `- **Dirty (needs rebuild)**: ${result.dirty}\n`;
+      output += `- **Vector Count**: ${result.vectorCount}\n`;
+      output += `- **Min Chunks for ANN**: ${result.minChunksForAnn}\n`;
+      if (result.config) {
+        output += '\n### Current Config\n\n';
+        output += `- **Metric**: ${result.config.metric}\n`;
+        output += `- **Dimensions**: ${result.config.dim}\n`;
+        output += `- **Indexed Vectors**: ${result.config.count}\n`;
+        output += `- **M (connectivity)**: ${result.config.m}\n`;
+        output += `- **efConstruction**: ${result.config.efConstruction}\n`;
+        output += `- **efSearch**: ${result.config.efSearch}\n`;
+      } else {
+        output += '\n*No active ANN index.*\n';
+      }
+      return output;
+    }
+    // Other responses (set_ef_search, rebuild)
+    return JSON.stringify(result, null, 2);
+  }
+}
+// MCP Tool definition
+export function getToolDefinition() {
+  return {
+    name: 'd_ann_config',
+    description:
+      "Configure and monitor the ANN (Approximate Nearest Neighbor) search index. Actions: 'stats' (view current config), 'set_ef_search' (tune search accuracy/speed), 'rebuild' (force index rebuild).",
+    inputSchema: {
+      type: 'object',
+      properties: {
+        action: {
+          type: 'string',
+          enum: ['stats', 'set_ef_search', 'rebuild'],
+          description:
+            "Action to perform. 'stats' shows current config, 'set_ef_search' changes the search parameter, 'rebuild' forces index rebuild.",
+          default: 'stats',
+        },
+        efSearch: {
+          type: 'number',
+          description:
+            'New efSearch value (only for set_ef_search action). Higher = more accurate but slower. Typical range: 16-512.',
+          minimum: 1,
+          maximum: 1000,
+        },
+      },
+    },
+    annotations: {
+      title: 'ANN Index Configuration',
+      readOnlyHint: false,
+      destructiveHint: false,
+      idempotentHint: true,
+      openWorldHint: false,
+    },
+  };
+}
+// Tool handler
+export async function handleToolCall(request, annConfigTool) {
+  const args = request.params.arguments || {};
+  const result = await annConfigTool.execute(args);
+  const formattedText = annConfigTool.formatResults(result);
+  return {
+    content: [{ type: 'text', text: formattedText }],
+  };
+}

package/features/clear-cache.js ADDED Viewed

@@ -0,0 +1,84 @@
+export class CacheClearer {
+  constructor(embedder, cache, config, indexer) {
+    this.cache = cache;
+    this.config = config;
+    this.indexer = indexer;
+    this.isClearing = false;
+  }
+  async execute() {
+    // Check if indexing is in progress
+    if (this.indexer && this.indexer.isIndexing) {
+      throw new Error(
+        'Cannot clear cache while indexing is in progress. Please wait for indexing to complete.'
+      );
+    }
+    // Check if cache is currently being saved (race condition prevention)
+    if (this.cache.isSaving) {
+      throw new Error(
+        'Cannot clear cache while cache is being saved. Please try again in a moment.'
+      );
+    }
+    // Check if a clear operation is already in progress (prevent concurrent clears)
+    if (this.isClearing) {
+      throw new Error('Cache clear operation already in progress. Please wait for it to complete.');
+    }
+    this.isClearing = true;
+    try {
+      await this.cache.clear();
+      return {
+        success: true,
+        message: `Cache cleared successfully. Next indexing will be a full rebuild.`,
+        cacheDirectory: this.config.cacheDirectory,
+      };
+    } finally {
+      this.isClearing = false;
+    }
+  }
+}
+export function getToolDefinition() {
+  return {
+    name: 'c_clear_cache',
+    description:
+      'Clears the embeddings cache, forcing a complete reindex on next search or manual index operation. Useful when encountering cache corruption or after major codebase changes.',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+    },
+    annotations: {
+      title: 'Clear Embeddings Cache',
+      readOnlyHint: false,
+      destructiveHint: true,
+      idempotentHint: true,
+      openWorldHint: false,
+    },
+  };
+}
+export async function handleToolCall(request, cacheClearer) {
+  try {
+    const result = await cacheClearer.execute();
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `${result.message}\n\nCache directory: ${result.cacheDirectory}`,
+        },
+      ],
+    };
+  } catch (error) {
+    return {
+      content: [
+        {
+          type: 'text',
+          text: `Failed to clear cache: ${error.message}`,
+        },
+      ],
+    };
+  }
+}

package/features/find-similar-code.js ADDED Viewed

@@ -0,0 +1,291 @@
+import path from 'path';
+import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
+/**
+ * FindSimilarCode feature
+ * Given a code snippet, finds similar patterns elsewhere in the codebase
+ */
+export class FindSimilarCode {
+  constructor(embedder, cache, config) {
+    this.embedder = embedder;
+    this.cache = cache;
+    this.config = config;
+  }
+  async getChunkContent(chunk) {
+    return this.cache.getChunkContent(chunk);
+  }
+  getChunkVector(chunk) {
+    return this.cache.getChunkVector(chunk);
+  }
+  getAnnCandidateCount(maxResults, totalChunks) {
+    const minCandidates = this.config.annMinCandidates ?? 0;
+    const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
+    const multiplier = this.config.annCandidateMultiplier ?? 1;
+    const desired = Math.max(minCandidates, Math.ceil(maxResults * multiplier));
+    const capped = Math.min(maxCandidates, desired);
+    return Math.min(totalChunks, Math.max(maxResults, capped));
+  }
+  async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
+    if (typeof code !== 'string' || code.trim().length === 0) {
+      return {
+        results: [],
+        message: 'Error: A non-empty code string is required.',
+      };
+    }
+    const safeMaxResults =
+      Number.isFinite(maxResults) && maxResults > 0 ? Math.floor(maxResults) : 5;
+    const safeMinSimilarity = Number.isFinite(minSimilarity)
+      ? Math.min(1, Math.max(0, minSimilarity))
+      : 0.3;
+    if (typeof this.cache.ensureLoaded === 'function') {
+      await this.cache.ensureLoaded();
+    }
+    if (typeof this.cache.startRead === 'function') {
+      this.cache.startRead();
+    }
+    try {
+      const vectorStore = this.cache.getVectorStore();
+      if (vectorStore.length === 0) {
+        return {
+          results: [],
+          message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
+        };
+      }
+      let codeToEmbed = code;
+      let warningMessage = null;
+      // Check if input is too large and truncate intelligently
+      const estimatedTokens = estimateTokens(code);
+      const limit = getModelTokenLimit(this.config.embeddingModel);
+      // If input is significantly larger than the model limit, we should chunk it
+      if (estimatedTokens > limit) {
+        // Use smartChunk to get a semantically valid first block
+        // We pass a dummy file name to trigger language detection if possible, or default to .txt
+        // Since we don't know the language, we'll try to guess or just use generic chunking
+        const chunks = smartChunk(code, 'input.txt', this.config);
+        if (chunks.length > 0) {
+          codeToEmbed = chunks[0].text;
+          warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
+        }
+      }
+      // Generate embedding for the input code
+      const codeEmbed = await this.embedder(codeToEmbed, {
+        pooling: 'mean',
+        normalize: true,
+      });
+      // CRITICAL: Deep copy Float32Array to avoid detachment issues with WASM/Workers
+      // accessing a detached buffer from a reusable ONNX tensor can crash the process.
+      let codeVector;
+      try {
+        codeVector = new Float32Array(codeEmbed.data);
+      } finally {
+        if (typeof codeEmbed.dispose === 'function') {
+          try {
+            codeEmbed.dispose();
+          } catch {
+            /* ignore */
+          }
+        }
+      }
+      let candidates = vectorStore;
+      let usedAnn = false;
+      if (this.config.annEnabled) {
+        const candidateCount = this.getAnnCandidateCount(safeMaxResults, vectorStore.length);
+        const annLabels = await this.cache.queryAnn(codeVector, candidateCount);
+        if (annLabels && annLabels.length >= safeMaxResults) {
+          usedAnn = true;
+          const seen = new Set();
+          candidates = annLabels
+            .map((index) => {
+              if (seen.has(index)) return null;
+              seen.add(index);
+              return vectorStore[index];
+            })
+            .filter(Boolean);
+        }
+      }
+      const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
+      const normalizedInput = normalizeText(codeToEmbed);
+      /**
+       * Batch scoring function to prevent blocking the event loop
+       */
+      const scoreAndFilter = async (chunks) => {
+        const BATCH_SIZE = 500;
+        const scored = [];
+        for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
+          const batch = chunks.slice(i, i + BATCH_SIZE);
+          // Yield to event loop between batches
+          if (i > 0) {
+            await new Promise((resolve) => setTimeout(resolve, 0));
+          }
+          for (const chunk of batch) {
+            const vector = this.getChunkVector(chunk);
+            if (!vector) continue;
+            let similarity;
+            try {
+              similarity = dotSimilarity(codeVector, vector);
+            } catch (err) {
+              if (!warningMessage) {
+                warningMessage = err?.message || 'Vector dimension mismatch.';
+              }
+              continue;
+            }
+            if (similarity >= safeMinSimilarity) {
+              scored.push({ ...chunk, similarity });
+            }
+          }
+        }
+        return scored.sort((a, b) => b.similarity - a.similarity);
+      };
+      let filteredResults = await scoreAndFilter(candidates);
+      // Fallback to full scan if ANN didn't provide enough results
+      // Optimization: Skip full scan on large codebases to avoid long pauses
+      const MAX_FULL_SCAN_SIZE = 5000;
+      if (usedAnn && filteredResults.length < safeMaxResults) {
+        if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
+          filteredResults = await scoreAndFilter(vectorStore);
+        } else {
+          // Just return what we found via ANN
+        }
+      }
+      const results = [];
+      for (const chunk of filteredResults) {
+        const content = chunk.content ?? (await this.getChunkContent(chunk));
+        if (normalizedInput) {
+          const normalizedChunk = normalizeText(content);
+          if (normalizedChunk === normalizedInput) continue;
+        }
+        results.push({ ...chunk, content });
+        if (results.length >= safeMaxResults) break;
+      }
+      return {
+        results,
+        message:
+          warningMessage ||
+          (results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
+      };
+    } finally {
+      if (typeof this.cache.endRead === 'function') {
+        this.cache.endRead();
+      }
+    }
+  }
+  async formatResults(results) {
+    if (results.length === 0) {
+      return 'No similar code patterns found in the codebase.';
+    }
+    const formatted = await Promise.all(
+      results.map(async (r, idx) => {
+        const relPath = path.relative(this.config.searchDirectory, r.file);
+        const content = r.content ?? (await this.getChunkContent(r));
+        return (
+          `## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
+          `**File:** \`${relPath}\`\n` +
+          `**Lines:** ${r.startLine}-${r.endLine}\n\n` +
+          '```' +
+          path.extname(r.file).slice(1) +
+          '\n' +
+          content +
+          '\n' +
+          '```\n'
+        );
+      })
+    );
+    return formatted.join('\n');
+  }
+}
+// MCP Tool definition
+export function getToolDefinition(_config) {
+  return {
+    name: 'd_find_similar_code',
+    description:
+      'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        code: {
+          type: 'string',
+          description: 'The code snippet to find similar patterns for',
+        },
+        maxResults: {
+          type: 'number',
+          description: 'Maximum number of similar code chunks to return (default: 5)',
+          default: 5,
+        },
+        minSimilarity: {
+          type: 'number',
+          description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
+          default: 0.3,
+        },
+      },
+      required: ['code'],
+    },
+    annotations: {
+      title: 'Find Similar Code',
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: true,
+      openWorldHint: false,
+    },
+  };
+}
+// Tool handler
+export async function handleToolCall(request, findSimilarCode) {
+  const args = request.params?.arguments || {};
+  const code = args.code;
+  if (typeof code !== 'string' || code.trim().length === 0) {
+    return {
+      content: [{ type: 'text', text: 'Error: A non-empty code string is required.' }],
+      isError: true,
+    };
+  }
+  const maxResults =
+    typeof args.maxResults === 'number' ? args.maxResults : 5;
+  const minSimilarity =
+    typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
+  const { results, message } = await findSimilarCode.execute({
+    code,
+    maxResults,
+    minSimilarity,
+  });
+  if (message) {
+    return {
+      content: [{ type: 'text', text: message }],
+    };
+  }
+  const formattedText = await findSimilarCode.formatResults(results);
+  return {
+    content: [{ type: 'text', text: formattedText }],
+  };
+}