npm - smart-coding-mcp - Versions diffs - 1.2.4 → 1.3.1 - Mend

smart-coding-mcp 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +28 -168
package/config.json +4 -3
package/example.png +0 -0
package/features/clear-cache.js +30 -7
package/features/index-codebase.js +507 -37
package/how-its-works.png +0 -0
package/index.js +2 -2
package/lib/cache.js +5 -0
package/lib/config.js +29 -4
package/lib/embedding-worker.js +67 -0
package/lib/tokenizer.js +142 -0
package/lib/utils.js +113 -25
package/package.json +9 -3
package/test/clear-cache.test.js +288 -0
package/test/embedding-model.test.js +230 -0
package/test/helpers.js +128 -0
package/test/hybrid-search.test.js +243 -0
package/test/index-codebase.test.js +246 -0
package/test/integration.test.js +223 -0
package/test/tokenizer.test.js +225 -0
package/vitest.config.js +29 -0

package/features/index-codebase.js CHANGED Viewed

@@ -1,15 +1,244 @@
-import { glob } from "glob";
+import { fdir } from "fdir";
 import fs from "fs/promises";
 import chokidar from "chokidar";
 import path from "path";
+import os from "os";
+import { Worker } from "worker_threads";
+import { fileURLToPath } from "url";
 import { smartChunk, hashContent } from "../lib/utils.js";
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
 export class CodebaseIndexer {
-  constructor(embedder, cache, config) {
+  constructor(embedder, cache, config, server = null) {
     this.embedder = embedder;
     this.cache = cache;
     this.config = config;
+    this.server = server;
     this.watcher = null;
+    this.workers = [];
+    this.workerReady = [];
+    this.isIndexing = false;
+  }
+  /**
+   * Initialize worker thread pool for parallel embedding
+   */
+  async initializeWorkers() {
+    const numWorkers = this.config.workerThreads === "auto"
+      ? Math.max(1, os.cpus().length - 1)
+      : (this.config.workerThreads || 1);
+    // Only use workers if we have more than 1 CPU
+    if (numWorkers <= 1) {
+      console.error("[Indexer] Single-threaded mode (1 CPU detected)");
+      return;
+    }
+    if (this.config.verbose) {
+      console.error(`[Indexer] Worker config: workerThreads=${this.config.workerThreads}, resolved to ${numWorkers}`);
+    }
+    console.error(`[Indexer] Initializing ${numWorkers} worker threads...`);
+    const workerPath = path.join(__dirname, "../lib/embedding-worker.js");
+    for (let i = 0; i < numWorkers; i++) {
+      try {
+        const worker = new Worker(workerPath, {
+          workerData: {
+            embeddingModel: this.config.embeddingModel,
+            verbose: this.config.verbose
+          }
+        });
+        const readyPromise = new Promise((resolve, reject) => {
+          const timeout = setTimeout(() => reject(new Error("Worker init timeout")), 120000);
+          worker.once("message", (msg) => {
+            clearTimeout(timeout);
+            if (msg.type === "ready") {
+              resolve(worker);
+            } else if (msg.type === "error") {
+              reject(new Error(msg.error));
+            }
+          });
+          worker.once("error", (err) => {
+            clearTimeout(timeout);
+            reject(err);
+          });
+        });
+        this.workers.push(worker);
+        this.workerReady.push(readyPromise);
+      } catch (err) {
+        console.error(`[Indexer] Failed to create worker ${i}: ${err.message}`);
+      }
+    }
+    // Wait for all workers to be ready
+    try {
+      await Promise.all(this.workerReady);
+      console.error(`[Indexer] ${this.workers.length} workers ready`);
+      if (this.config.verbose) {
+        console.error(`[Indexer] Each worker loaded model: ${this.config.embeddingModel}`);
+      }
+    } catch (err) {
+      console.error(`[Indexer] Worker initialization failed: ${err.message}, falling back to single-threaded`);
+      this.terminateWorkers();
+    }
+  }
+  /**
+   * Terminate all worker threads
+   */
+  terminateWorkers() {
+    for (const worker of this.workers) {
+      worker.postMessage({ type: "shutdown" });
+    }
+    this.workers = [];
+    this.workerReady = [];
+  }
+  /**
+   * Send MCP progress notification to connected clients
+   */
+  sendProgress(progress, total, message) {
+    if (this.server) {
+      try {
+        this.server.sendNotification("notifications/progress", {
+          progressToken: "indexing",
+          progress,
+          total,
+          message
+        });
+      } catch (err) {
+        // Silently ignore if client doesn't support progress notifications
+      }
+    }
+  }
+  /**
+   * Process chunks using worker thread pool with timeout and error recovery
+   */
+  async processChunksWithWorkers(allChunks) {
+    if (this.workers.length === 0) {
+      // Fallback to single-threaded processing
+      return this.processChunksSingleThreaded(allChunks);
+    }
+    const results = [];
+    const chunkSize = Math.ceil(allChunks.length / this.workers.length);
+    const workerPromises = [];
+    const WORKER_TIMEOUT = 300000; // 5 minutes per batch
+    if (this.config.verbose) {
+      console.error(`[Indexer] Distributing ${allChunks.length} chunks across ${this.workers.length} workers (~${chunkSize} chunks each)`);
+    }
+    for (let i = 0; i < this.workers.length; i++) {
+      const workerChunks = allChunks.slice(i * chunkSize, (i + 1) * chunkSize);
+      if (workerChunks.length === 0) continue;
+      if (this.config.verbose) {
+        console.error(`[Indexer] Worker ${i}: processing ${workerChunks.length} chunks`);
+      }
+      const promise = new Promise((resolve, reject) => {
+        const worker = this.workers[i];
+        const batchId = `batch-${i}-${Date.now()}`;
+        // Timeout handler
+        const timeout = setTimeout(() => {
+          worker.off("message", handler);
+          console.error(`[Indexer] Worker ${i} timed out, falling back to single-threaded for this batch`);
+          // Return empty and let fallback handle it
+          resolve([]);
+        }, WORKER_TIMEOUT);
+        const handler = (msg) => {
+          if (msg.batchId === batchId) {
+            clearTimeout(timeout);
+            worker.off("message", handler);
+            if (msg.type === "results") {
+              resolve(msg.results);
+            } else if (msg.type === "error") {
+              console.error(`[Indexer] Worker ${i} error: ${msg.error}`);
+              resolve([]); // Return empty, don't reject - let fallback handle
+            }
+          }
+        };
+        // Handle worker crash
+        const errorHandler = (err) => {
+          clearTimeout(timeout);
+          worker.off("message", handler);
+          console.error(`[Indexer] Worker ${i} crashed: ${err.message}`);
+          resolve([]); // Return empty, don't reject
+        };
+        worker.once("error", errorHandler);
+        worker.on("message", handler);
+        worker.postMessage({ type: "process", chunks: workerChunks, batchId });
+      });
+      workerPromises.push({ promise, chunks: workerChunks });
+    }
+    // Wait for all workers with error recovery
+    const workerResults = await Promise.all(workerPromises.map(p => p.promise));
+    // Collect results and identify failed chunks that need retry
+    const failedChunks = [];
+    for (let i = 0; i < workerResults.length; i++) {
+      if (workerResults[i].length > 0) {
+        results.push(...workerResults[i]);
+      } else if (workerPromises[i].chunks.length > 0) {
+        // Worker failed or timed out, need to retry these chunks
+        failedChunks.push(...workerPromises[i].chunks);
+      }
+    }
+    // Retry failed chunks with single-threaded fallback
+    if (failedChunks.length > 0) {
+      console.error(`[Indexer] Retrying ${failedChunks.length} chunks with single-threaded fallback...`);
+      const retryResults = await this.processChunksSingleThreaded(failedChunks);
+      results.push(...retryResults);
+    }
+    return results;
+  }
+  /**
+   * Single-threaded chunk processing (fallback)
+   */
+  async processChunksSingleThreaded(chunks) {
+    const results = [];
+    for (const chunk of chunks) {
+      try {
+        const output = await this.embedder(chunk.text, { pooling: "mean", normalize: true });
+        results.push({
+          file: chunk.file,
+          startLine: chunk.startLine,
+          endLine: chunk.endLine,
+          content: chunk.text,
+          vector: Array.from(output.data),
+          success: true
+        });
+      } catch (error) {
+        results.push({
+          file: chunk.file,
+          startLine: chunk.startLine,
+          endLine: chunk.endLine,
+          error: error.message,
+          success: false
+        });
+      }
+    }
+    return results;
   }
   async indexFile(file) {
@@ -83,47 +312,272 @@ export class CodebaseIndexer {
     }
   }
-  async indexAll() {
-    console.error(`[Indexer] Indexing files in ${this.config.searchDirectory}...`);
+  /**
+   * Discover files using fdir (3-5x faster than glob)
+   * Uses config.excludePatterns which includes smart patterns from ignore-patterns.js
+   */
+  async discoverFiles() {
+    const startTime = Date.now();
-    const pattern = `${this.config.searchDirectory}/**/*.{${this.config.fileExtensions.join(",")}}`;
-    const files = await glob(pattern, {
-      ignore: this.config.excludePatterns,
-      absolute: true
-    });
-    console.error(`[Indexer] Found ${files.length} files to process`);
+    // Build extension filter from config
+    const extensions = new Set(this.config.fileExtensions.map(ext => `.${ext}`));
-    let totalChunks = 0;
-    let processedFiles = 0;
-    let skippedFiles = 0;
+    // Extract directory names from glob patterns in config.excludePatterns
+    // Patterns like "**/node_modules/**" -> "node_modules"
+    const excludeDirs = new Set();
+    for (const pattern of this.config.excludePatterns) {
+      // Extract directory names from glob patterns
+      const match = pattern.match(/\*\*\/([^/*]+)\/?\*?\*?$/);
+      if (match) {
+        excludeDirs.add(match[1]);
+      }
+      // Also handle patterns like "**/dirname/**"
+      const match2 = pattern.match(/\*\*\/([^/*]+)\/\*\*$/);
+      if (match2) {
+        excludeDirs.add(match2[1]);
+      }
+    }
-    // Process files in parallel batches for speed
-    const BATCH_SIZE = this.config.batchSize || 100;
+    // Always exclude cache directory
+    excludeDirs.add(".smart-coding-cache");
+    if (this.config.verbose) {
+      console.error(`[Indexer] Using ${excludeDirs.size} exclude directories from config`);
+    }
+    const api = new fdir()
+      .withFullPaths()
+      .exclude((dirName) => excludeDirs.has(dirName))
+      .filter((filePath) => extensions.has(path.extname(filePath)))
+      .crawl(this.config.searchDirectory);
+    const files = await api.withPromise();
+    console.error(`[Indexer] File discovery: ${files.length} files in ${Date.now() - startTime}ms`);
+    return files;
+  }
+  /**
+   * Pre-filter files by hash (skip unchanged files before processing)
+   */
+  async preFilterFiles(files) {
+    const startTime = Date.now();
+    const filesToProcess = [];
+    const skippedCount = { unchanged: 0, tooLarge: 0, error: 0 };
+    // Process in parallel batches for speed
+    const BATCH_SIZE = 500;
     for (let i = 0; i < files.length; i += BATCH_SIZE) {
       const batch = files.slice(i, i + BATCH_SIZE);
-      // Process batch in parallel
       const results = await Promise.all(
-        batch.map(file => this.indexFile(file))
+        batch.map(async (file) => {
+          try {
+            const stats = await fs.stat(file);
+            if (stats.isDirectory()) {
+              return null;
+            }
+            if (stats.size > this.config.maxFileSize) {
+              skippedCount.tooLarge++;
+              return null;
+            }
+            const content = await fs.readFile(file, "utf-8");
+            const hash = hashContent(content);
+            if (this.cache.getFileHash(file) === hash) {
+              skippedCount.unchanged++;
+              return null;
+            }
+            return { file, content, hash };
+          } catch (error) {
+            skippedCount.error++;
+            return null;
+          }
+        })
       );
-      // Aggregate results
-      for (const chunksAdded of results) {
-        totalChunks += chunksAdded;
-        processedFiles++;
-        if (chunksAdded === 0) skippedFiles++;
+      for (const result of results) {
+        if (result) filesToProcess.push(result);
+      }
+    }
+    console.error(`[Indexer] Pre-filter: ${filesToProcess.length} changed, ${skippedCount.unchanged} unchanged, ${skippedCount.tooLarge} too large, ${skippedCount.error} errors (${Date.now() - startTime}ms)`);
+    return filesToProcess;
+  }
+  async indexAll(force = false) {
+    if (this.isIndexing) {
+      console.error("[Indexer] Indexing already in progress, skipping concurrent request");
+      return { skipped: true, reason: "Indexing already in progress" };
+    }
+    this.isIndexing = true;
+    try {
+      if (force) {
+        console.error("[Indexer] Force reindex requested: clearing cache");
+        this.cache.setVectorStore([]);
+        this.cache.fileHashes = new Map();
       }
+      const totalStartTime = Date.now();
+    console.error(`[Indexer] Starting optimized indexing in ${this.config.searchDirectory}...`);
+    // Step 1: Fast file discovery with fdir
+    const files = await this.discoverFiles();
+    if (files.length === 0) {
+      console.error("[Indexer] No files found to index");
+      this.sendProgress(100, 100, "No files found to index");
+      return { skipped: false, filesProcessed: 0, chunksCreated: 0, message: "No files found to index" };
+    }
+    // Send progress: discovery complete
+    this.sendProgress(5, 100, `Discovered ${files.length} files`);
+    // Step 2: Pre-filter unchanged files (early hash check)
+    const filesToProcess = await this.preFilterFiles(files);
+    if (filesToProcess.length === 0) {
+      console.error("[Indexer] All files unchanged, nothing to index");
+      this.sendProgress(100, 100, "All files up to date");
+      await this.cache.save();
+      const vectorStore = this.cache.getVectorStore();
+      return {
+        skipped: false,
+        filesProcessed: 0,
+        chunksCreated: 0,
+        totalFiles: new Set(vectorStore.map(v => v.file)).size,
+        totalChunks: vectorStore.length,
+        message: "All files up to date"
+      };
+    }
+    // Send progress: filtering complete
+    this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
+    // Step 3: Determine batch size based on project size
+    const adaptiveBatchSize = files.length > 10000 ? 500 :
+                              files.length > 1000 ? 200 :
+                              this.config.batchSize || 100;
+    console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
+    // Step 4: Initialize worker threads (always use when multi-core available)
+    const useWorkers = os.cpus().length > 1;
+    if (useWorkers) {
+      await this.initializeWorkers();
+      console.error(`[Indexer] Multi-threaded mode: ${this.workers.length} workers active`);
+    } else {
+      console.error(`[Indexer] Single-threaded mode (single-core system)`);
+    }
+    let totalChunks = 0;
+    let processedFiles = 0;
+    // Step 5: Process files in adaptive batches
+    for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
+      const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
+      // Generate all chunks for this batch
+      const allChunks = [];
-      // Progress indicator every 500 files (less console overhead)
-      if (processedFiles % 500 === 0 || processedFiles === files.length) {
-        console.error(`[Indexer] Progress: ${processedFiles}/${files.length} files processed...`);
+      for (const { file, content, hash } of batch) {
+        // Remove old chunks for this file
+        this.cache.removeFileFromStore(file);
+        const chunks = smartChunk(content, file, this.config);
+        for (const chunk of chunks) {
+          allChunks.push({
+            file,
+            text: chunk.text,
+            startLine: chunk.startLine,
+            endLine: chunk.endLine,
+            hash
+          });
+        }
+      }
+      // Process chunks (with workers if available, otherwise single-threaded)
+      let results;
+      if (useWorkers && this.workers.length > 0) {
+        results = await this.processChunksWithWorkers(allChunks);
+      } else {
+        results = await this.processChunksSingleThreaded(allChunks);
+      }
+      // Store successful results
+      const fileHashes = new Map();
+      for (const result of results) {
+        if (result.success) {
+          this.cache.addToStore({
+            file: result.file,
+            startLine: result.startLine,
+            endLine: result.endLine,
+            content: result.content,
+            vector: result.vector
+          });
+          totalChunks++;
+        }
+        // Track hash for each file
+        const chunkInfo = allChunks.find(c => c.file === result.file);
+        if (chunkInfo) {
+          fileHashes.set(result.file, chunkInfo.hash);
+        }
+      }
+      // Update file hashes
+      for (const [file, hash] of fileHashes) {
+        this.cache.setFileHash(file, hash);
       }
+      processedFiles += batch.length;
+      // Progress indicator every batch
+      if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
+        const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
+        const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
+        console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
+        // Send MCP progress notification (10-95% range for batch processing)
+        const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
+        this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
+      }
+    }
+    // Cleanup workers
+    if (useWorkers) {
+      this.terminateWorkers();
     }
-    console.error(`[Indexer] Indexed ${totalChunks} code chunks from ${files.length} files (${skippedFiles} unchanged)`);
+    const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
+    console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
+    // Send completion progress
+    this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
     await this.cache.save();
+    const vectorStore = this.cache.getVectorStore();
+    return {
+      skipped: false,
+      filesProcessed: filesToProcess.length,
+      chunksCreated: totalChunks,
+      totalFiles: new Set(vectorStore.map(v => v.file)).size,
+      totalChunks: vectorStore.length,
+      duration: totalTime,
+      message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
+    };
+    } finally {
+      this.isIndexing = false;
+    }
   }
   setupFileWatcher() {
@@ -191,25 +645,41 @@ export function getToolDefinition() {
 // Tool handler
 export async function handleToolCall(request, indexer) {
   const force = request.params.arguments?.force || false;
+  const result = await indexer.indexAll(force);
-  if (force) {
-    // Clear cache to force full reindex
-    indexer.cache.setVectorStore([]);
-    indexer.cache.fileHashes = new Map();
+  // Handle case when indexing was skipped due to concurrent request
+  if (result?.skipped) {
+    return {
+      content: [{
+        type: "text",
+        text: `Indexing skipped: ${result.reason}\n\nPlease wait for the current indexing operation to complete before requesting another reindex.`
+      }]
+    };
   }
-  await indexer.indexAll();
+  // Get current stats from cache
   const vectorStore = indexer.cache.getVectorStore();
   const stats = {
-    totalChunks: vectorStore.length,
-    totalFiles: new Set(vectorStore.map(v => v.file)).size
+    totalChunks: result?.totalChunks ?? vectorStore.length,
+    totalFiles: result?.totalFiles ?? new Set(vectorStore.map(v => v.file)).size,
+    filesProcessed: result?.filesProcessed ?? 0,
+    chunksCreated: result?.chunksCreated ?? 0
   };
+  let message = result?.message
+    ? `Codebase reindexed successfully.\n\n${result.message}`
+    : `Codebase reindexed successfully.`;
+  message += `\n\nStatistics:\n- Total files in index: ${stats.totalFiles}\n- Total code chunks: ${stats.totalChunks}`;
+  if (stats.filesProcessed > 0) {
+    message += `\n- Files processed this run: ${stats.filesProcessed}\n- Chunks created this run: ${stats.chunksCreated}`;
+  }
   return {
     content: [{
       type: "text",
-      text: `Codebase reindexed successfully.\n\nStatistics:\n- Files indexed: ${stats.totalFiles}\n- Code chunks: ${stats.totalChunks}`
+      text: message
     }]
   };
 }

package/how-its-works.png ADDED Viewed

Binary file

package/index.js CHANGED Viewed

@@ -95,9 +95,9 @@ async function initialize() {
   await cache.load();
   // Initialize features
-  indexer = new CodebaseIndexer(embedder, cache, config);
+  indexer = new CodebaseIndexer(embedder, cache, config, server);
   hybridSearch = new HybridSearch(embedder, cache, config);
-  const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config);
+  const cacheClearer = new ClearCacheFeature.CacheClearer(embedder, cache, config, indexer);
   // Store feature instances (matches features array order)
   features[0].instance = hybridSearch;

package/lib/cache.js CHANGED Viewed

@@ -6,6 +6,7 @@ export class EmbeddingsCache {
     this.config = config;
     this.vectorStore = [];
     this.fileHashes = new Map();
+    this.isSaving = false;
   }
   async load() {
@@ -55,6 +56,8 @@ export class EmbeddingsCache {
   async save() {
     if (!this.config.enableCache) return;
+    this.isSaving = true;
     try {
       await fs.mkdir(this.config.cacheDirectory, { recursive: true });
       const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
@@ -66,6 +69,8 @@ export class EmbeddingsCache {
       ]);
     } catch (error) {
       console.error("[Cache] Failed to save cache:", error.message);
+    } finally {
+      this.isSaving = false;
     }
   }