npm - smart-coding-mcp - Versions diffs - 2.0.0 → 2.1.0 - Mend

smart-coding-mcp 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +34 -2
package/config.json +4 -2
package/features/get-status.js +37 -6
package/features/hybrid-search.js +23 -4
package/features/index-codebase.js +137 -60
package/index.js +141 -72
package/lib/config.js +52 -2
package/lib/resource-throttle.js +85 -0
package/lib/sqlite-cache.js +408 -0
package/package.json +2 -1

package/README.md CHANGED Viewed

@@ -46,6 +46,34 @@ This MCP server solves that by indexing your codebase with AI embeddings. Your A
 - Your code never leaves your system
 - No API calls to external services
+## Performance & Resource Management
+**Progressive Indexing**
+- Search works immediately, even while indexing continues (like video buffering)
+- Incremental saves every 5 batches - no data loss if interrupted
+- Real-time indexing status shown when searching during indexing
+**Resource Throttling**
+- CPU usage limited to 50% by default (configurable)
+- Your laptop stays responsive during indexing
+- Configurable delays between batches
+- Worker thread limits respect system resources
+**SQLite Cache**
+- 5-10x faster than JSON for large codebases
+- Write-Ahead Logging (WAL) for better concurrency
+- Binary blob storage for smaller cache size
+- Automatic migration from JSON
+**Optimized Defaults**
+- 128d embeddings by default (2x faster than 256d, minimal quality loss)
+- Smart batch sizing based on project size
+- Parallel processing with auto-tuned worker threads
 ## Installation
 Install globally via npm:
@@ -144,10 +172,13 @@ Override configuration settings via environment variables in your MCP config:
 | `SMART_CODING_SEMANTIC_WEIGHT`     | number  | `0.7`                            | Weight for semantic similarity (0-1)       |
 | `SMART_CODING_EXACT_MATCH_BOOST`   | number  | `1.5`                            | Boost for exact text matches               |
 | `SMART_CODING_EMBEDDING_MODEL`     | string  | `nomic-ai/nomic-embed-text-v1.5` | AI embedding model to use                  |
-| `SMART_CODING_EMBEDDING_DIMENSION` | number  | `256`                            | MRL dimension (64, 128, 256, 512, 768)     |
+| `SMART_CODING_EMBEDDING_DIMENSION` | number  | `128`                            | MRL dimension (64, 128, 256, 512, 768)     |
 | `SMART_CODING_DEVICE`              | string  | `cpu`                            | Inference device (`cpu`, `webgpu`, `auto`) |
 | `SMART_CODING_CHUNKING_MODE`       | string  | `smart`                          | Code chunking (`smart`, `ast`, `line`)     |
 | `SMART_CODING_WORKER_THREADS`      | string  | `auto`                           | Worker threads (`auto` or 1-32)            |
+| `SMART_CODING_MAX_CPU_PERCENT`     | number  | `50`                             | Max CPU usage during indexing (10-100%)    |
+| `SMART_CODING_BATCH_DELAY`         | number  | `100`                            | Delay between batches in ms (0-5000)       |
+| `SMART_CODING_MAX_WORKERS`         | string  | `auto`                           | Override max worker threads limit          |
 **Example with environment variables:**
@@ -202,8 +233,9 @@ flowchart TB
     end
     subgraph Storage["Cache"]
-        Vectors["Vector Store<br/>embeddings.json"]
+        Vectors["SQLite Database<br/>embeddings.db (WAL mode)"]
         Hashes["File Hashes<br/>Incremental updates"]
+        Progressive["Progressive Indexing<br/>Search works during indexing"]
     end
     Agent <-->|"MCP Protocol"| Protocol

package/config.json CHANGED Viewed

@@ -61,10 +61,12 @@
   "watchFiles": false,
   "verbose": false,
   "embeddingModel": "nomic-ai/nomic-embed-text-v1.5",
-  "embeddingDimension": 256,
+  "embeddingDimension": 128,
   "device": "auto",
   "chunkingMode": "smart",
   "semanticWeight": 0.7,
   "exactMatchBoost": 1.5,
-  "workerThreads": "auto"
+  "workerThreads": "auto",
+  "maxCpuPercent": 50,
+  "batchDelay": 100
 }

package/features/get-status.js CHANGED Viewed

@@ -48,20 +48,43 @@ export class StatusReporter {
     // Get unique files from vector store
     const uniqueFiles = new Set(vectorStore.map(v => v.file));
-    // Get cache size
+    // Get cache size (check for SQLite database)
     let cacheSizeBytes = 0;
+    let cacheType = 'none';
     try {
-      const cachePath = path.join(this.config.cacheDirectory, 'embeddings.json');
-      const stats = await fs.stat(cachePath);
+      // Check for SQLite cache first
+      const sqlitePath = path.join(this.config.cacheDirectory, 'embeddings.db');
+      const stats = await fs.stat(sqlitePath);
       cacheSizeBytes = stats.size;
+      cacheType = 'sqlite';
     } catch {
-      // Cache file doesn't exist yet
+      // Try old JSON cache as fallback
+      try {
+        const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
+        const stats = await fs.stat(jsonPath);
+        cacheSizeBytes = stats.size;
+        cacheType = 'json';
+      } catch {
+        // No cache file exists
+        cacheType = 'none';
+      }
     }
-    // Determine index status
+    // Determine index status and progressive indexing info
     let indexStatus = 'empty';
+    let progressiveIndexing = null;
     if (this.indexer?.isIndexing) {
       indexStatus = 'indexing';
+      // Include progressive indexing status
+      if (this.indexer.indexingStatus) {
+        progressiveIndexing = {
+          inProgress: this.indexer.indexingStatus.inProgress,
+          totalFiles: this.indexer.indexingStatus.totalFiles,
+          processedFiles: this.indexer.indexingStatus.processedFiles,
+          percentage: this.indexer.indexingStatus.percentage
+        };
+      }
     } else if (vectorStore.length > 0) {
       indexStatus = 'ready';
     }
@@ -85,11 +108,13 @@ export class StatusReporter {
         status: indexStatus,
         filesIndexed: uniqueFiles.size,
         chunksCount: vectorStore.length,
-        chunkingMode: this.config.chunkingMode
+        chunkingMode: this.config.chunkingMode,
+        ...(progressiveIndexing && { progressiveIndexing })
       },
       cache: {
         enabled: this.config.enableCache,
+        type: cacheType,
         path: this.config.cacheDirectory,
         sizeBytes: cacheSizeBytes,
         sizeFormatted: formatBytes(cacheSizeBytes)
@@ -101,6 +126,12 @@ export class StatusReporter {
         semanticWeight: this.config.semanticWeight,
         exactMatchBoost: this.config.exactMatchBoost,
         workerThreads: this.config.workerThreads
+      },
+      resourceThrottling: {
+        maxCpuPercent: this.config.maxCpuPercent,
+        batchDelay: this.config.batchDelay,
+        maxWorkers: this.config.maxWorkers
       }
     };
   }

package/features/hybrid-search.js CHANGED Viewed

@@ -2,21 +2,35 @@ import path from "path";
 import { cosineSimilarity } from "../lib/utils.js";
 export class HybridSearch {
-  constructor(embedder, cache, config) {
+  constructor(embedder, cache, config, indexer = null) {
     this.embedder = embedder;
     this.cache = cache;
     this.config = config;
+    this.indexer = indexer; // Reference to indexer for status checking
   }
   async search(query, maxResults) {
     const vectorStore = this.cache.getVectorStore();
     if (vectorStore.length === 0) {
+      // Check if indexing is in progress
+      if (this.indexer?.indexingStatus?.inProgress) {
+        return {
+          results: [],
+          message: `Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Search available but results may be incomplete. Please wait for indexing to finish for full coverage.`
+        };
+      }
       return {
         results: [],
         message: "No code has been indexed yet. Please wait for initial indexing to complete."
       };
     }
+    // Show warning if indexing is still in progress but we have some results
+    let indexingWarning = null;
+    if (this.indexer?.indexingStatus?.inProgress) {
+      indexingWarning = `⚠️ Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Results shown are from partially indexed codebase.\n\n`;
+    }
     // Generate query embedding
     const queryEmbed = await this.embedder(query, { pooling: "mean", normalize: true });
@@ -50,7 +64,7 @@ export class HybridSearch {
       .sort((a, b) => b.score - a.score)
       .slice(0, maxResults);
-    return { results, message: null };
+    return { results, message: null, indexingWarning };
   }
   formatResults(results) {
@@ -105,7 +119,7 @@ export async function handleToolCall(request, hybridSearch) {
   const query = request.params.arguments.query;
   const maxResults = request.params.arguments.maxResults || hybridSearch.config.maxResults;
-  const { results, message } = await hybridSearch.search(query, maxResults);
+  const { results, message, indexingWarning } = await hybridSearch.search(query, maxResults);
   if (message) {
     return {
@@ -113,7 +127,12 @@ export async function handleToolCall(request, hybridSearch) {
     };
   }
-  const formattedText = hybridSearch.formatResults(results);
+  let formattedText = hybridSearch.formatResults(results);
+  // Prepend indexing warning if present
+  if (indexingWarning) {
+    formattedText = indexingWarning + formattedText;
+  }
   return {
     content: [{ type: "text", text: formattedText }]

package/features/index-codebase.js CHANGED Viewed

@@ -6,6 +6,7 @@ import os from "os";
 import { Worker } from "worker_threads";
 import { fileURLToPath } from "url";
 import { smartChunk, hashContent } from "../lib/utils.js";
+import { ResourceThrottle } from "../lib/resource-throttle.js";
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -19,6 +20,17 @@ export class CodebaseIndexer {
     this.workers = [];
     this.workerReady = [];
     this.isIndexing = false;
+    // Initialize resource throttling
+    this.throttle = new ResourceThrottle(config);
+    // Track indexing status for progressive search
+    this.indexingStatus = {
+      inProgress: false,
+      totalFiles: 0,
+      processedFiles: 0,
+      percentage: 0
+    };
   }
   /**
@@ -33,8 +45,8 @@ export class CodebaseIndexer {
     }
     const numWorkers = this.config.workerThreads === "auto"
-      ? Math.max(1, os.cpus().length - 1)
-      : (this.config.workerThreads || 1);
+      ? this.throttle.maxWorkers  // Use throttled worker count
+      : this.throttle.getWorkerCount(this.config.workerThreads);
     // Only use workers if we have more than 1 CPU
     if (numWorkers <= 1) {
@@ -426,6 +438,14 @@ export class CodebaseIndexer {
     this.isIndexing = true;
+    // Initialize indexing status for progressive search
+    this.indexingStatus = {
+      inProgress: true,
+      totalFiles: 0,
+      processedFiles: 0,
+      percentage: 0
+    };
     try {
       if (force) {
         console.error("[Indexer] Force reindex requested: clearing cache");
@@ -470,35 +490,15 @@ export class CodebaseIndexer {
       }
     }
-    // Step 2: Pre-filter unchanged files (early hash check)
-    const filesToProcess = await this.preFilterFiles(files);
-    if (filesToProcess.length === 0) {
-      console.error("[Indexer] All files unchanged, nothing to index");
-      this.sendProgress(100, 100, "All files up to date");
-      await this.cache.save();
-      const vectorStore = this.cache.getVectorStore();
-      return {
-        skipped: false,
-        filesProcessed: 0,
-        chunksCreated: 0,
-        totalFiles: new Set(vectorStore.map(v => v.file)).size,
-        totalChunks: vectorStore.length,
-        message: "All files up to date"
-      };
-    }
-    // Send progress: filtering complete
-    this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
-    // Step 3: Determine batch size based on project size
+    // Step 2: Process files in adaptive batches with lazy filtering
+    // Instead of pre-filtering all files (expensive), check hashes during processing
     const adaptiveBatchSize = files.length > 10000 ? 500 :
                               files.length > 1000 ? 200 :
                               this.config.batchSize || 100;
-    console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
+    console.error(`[Indexer] Processing ${files.length} files with lazy filtering (batch size: ${adaptiveBatchSize})`);
-    // Step 4: Initialize worker threads (always use when multi-core available)
+    // Step 3: Initialize worker threads (always use when multi-core available)
     const useWorkers = os.cpus().length > 1;
     if (useWorkers) {
@@ -510,30 +510,69 @@ export class CodebaseIndexer {
     let totalChunks = 0;
     let processedFiles = 0;
+    let skippedFiles = 0;
+    let batchCounter = 0;  // Track batches for incremental saves
+    // Update total file count for status tracking (estimated, will adjust as we filter)
+    this.indexingStatus.totalFiles = files.length;
-    // Step 5: Process files in adaptive batches
-    for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
-      const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
+    // Step 4: Process files in adaptive batches with inline lazy filtering
+    for (let i = 0; i < files.length; i += adaptiveBatchSize) {
+      const batch = files.slice(i, i + adaptiveBatchSize);
-      // Generate all chunks for this batch
+      // Lazy filter and generate chunks for this batch
       const allChunks = [];
+      const fileHashes = new Map();
-      for (const { file, content, hash } of batch) {
-        // Remove old chunks for this file
-        this.cache.removeFileFromStore(file);
-        const chunks = smartChunk(content, file, this.config);
-        for (const chunk of chunks) {
-          allChunks.push({
-            file,
-            text: chunk.text,
-            startLine: chunk.startLine,
-            endLine: chunk.endLine,
-            hash
-          });
+      for (const file of batch) {
+        try {
+          const stats = await fs.stat(file);
+          // Skip directories and oversized files
+          if (stats.isDirectory()) continue;
+          if (stats.size > this.config.maxFileSize) {
+            skippedFiles++;
+            continue;
+          }
+          // Read content and check hash
+          const content = await fs.readFile(file, "utf-8");
+          const hash = hashContent(content);
+          // Skip unchanged files inline (lazy check)
+          if (this.cache.getFileHash(file) === hash) {
+            skippedFiles++;
+            continue;
+          }
+          // File changed - remove old chunks and prepare new ones
+          this.cache.removeFileFromStore(file);
+          const chunks = smartChunk(content, file, this.config);
+          for (const chunk of chunks) {
+            allChunks.push({
+              file,
+              text: chunk.text,
+              startLine: chunk.startLine,
+              endLine: chunk.endLine,
+              hash
+            });
+          }
+          fileHashes.set(file, hash);
+        } catch (error) {
+          // Skip files with read errors
+          skippedFiles++;
+          if (this.config.verbose) {
+            console.error(`[Indexer] Error reading ${path.basename(file)}: ${error.message}`);
+          }
         }
       }
+      // Skip this batch if no chunks to process
+      if (allChunks.length === 0) {
+        continue;
+      }
       // Process chunks (with workers if available, otherwise single-threaded)
       let results;
@@ -543,11 +582,13 @@ export class CodebaseIndexer {
         results = await this.processChunksSingleThreaded(allChunks);
       }
-      // Store successful results
-      const fileHashes = new Map();
+      // Collect successful results for batch insert
+      const chunksToInsert = [];
+      const filesProcessedInBatch = new Set();
       for (const result of results) {
         if (result.success) {
-          this.cache.addToStore({
+          chunksToInsert.push({
             file: result.file,
             startLine: result.startLine,
             endLine: result.endLine,
@@ -555,11 +596,17 @@ export class CodebaseIndexer {
             vector: result.vector
           });
           totalChunks++;
+          filesProcessedInBatch.add(result.file);
         }
-        // Track hash for each file
-        const chunkInfo = allChunks.find(c => c.file === result.file);
-        if (chunkInfo) {
-          fileHashes.set(result.file, chunkInfo.hash);
+      }
+      // Batch insert to SQLite (much faster than individual inserts)
+      if (chunksToInsert.length > 0 && typeof this.cache.addBatchToStore === 'function') {
+        this.cache.addBatchToStore(chunksToInsert);
+      } else {
+        // Fallback for old cache implementation
+        for (const chunk of chunksToInsert) {
+          this.cache.addToStore(chunk);
         }
       }
@@ -568,17 +615,35 @@ export class CodebaseIndexer {
         this.cache.setFileHash(file, hash);
       }
-      processedFiles += batch.length;
+      processedFiles += filesProcessedInBatch.size;
+      batchCounter++;
+      // Update indexing status for progressive search
+      const estimatedTotal = files.length - skippedFiles;
+      this.indexingStatus.processedFiles = processedFiles;
+      this.indexingStatus.totalFiles = Math.max(estimatedTotal, processedFiles);
+      this.indexingStatus.percentage = estimatedTotal > 0 ? Math.floor((processedFiles / estimatedTotal) * 100) : 100;
+      // Incremental save to SQLite (every N batches)
+      const saveInterval = this.config.incrementalSaveInterval || 5;
+      if (batchCounter % saveInterval === 0) {
+        if (typeof this.cache.saveIncremental === 'function') {
+          await this.cache.saveIncremental();
+        }
+      }
+      // Apply CPU throttling (delay between batches)
+      await this.throttle.throttledBatch(null);
       // Progress indicator every batch
-      if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
+      if (processedFiles > 0 && (processedFiles % (adaptiveBatchSize * 2) === 0 || i + adaptiveBatchSize >= files.length)) {
         const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
         const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
-        console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
+        console.error(`[Indexer] Progress: ${processedFiles} changed, ${skippedFiles} skipped (${rate} files/sec)`);
         // Send MCP progress notification (10-95% range for batch processing)
-        const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
-        this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
+        const progressPercent = Math.min(95, Math.floor(10 + (i / files.length) * 85));
+        this.sendProgress(progressPercent, 100, `Indexed ${processedFiles} files, ${skippedFiles} skipped (${rate}/sec)`);
       }
     }
@@ -588,25 +653,37 @@ export class CodebaseIndexer {
     }
     const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
-    console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
+    const changedFiles = processedFiles;
+    console.error(`[Indexer] Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`);
+    // Mark indexing as complete
+    this.indexingStatus.inProgress = false;
+    this.indexingStatus.percentage = 100;
     // Send completion progress
-    this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
+    const summaryMsg = changedFiles > 0
+      ? `Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`
+      : `Complete: No files changed (${skippedFiles} files up to date)`;
+    this.sendProgress(100, 100, summaryMsg);
     await this.cache.save();
     const vectorStore = this.cache.getVectorStore();
     return {
       skipped: false,
-      filesProcessed: filesToProcess.length,
+      filesProcessed: changedFiles,
       chunksCreated: totalChunks,
       totalFiles: new Set(vectorStore.map(v => v.file)).size,
       totalChunks: vectorStore.length,
       duration: totalTime,
-      message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
+      message: changedFiles > 0
+        ? `Indexed ${changedFiles} files (${totalChunks} chunks, ${skippedFiles} unchanged) in ${totalTime}s`
+        : `All ${skippedFiles} files up to date`
     };
     } finally {
       this.isIndexing = false;
+      // Adjust estimated total after completion
+      this.indexingStatus.totalFiles = processedFiles + skippedFiles;
     }
   }