smart-coding-mcp 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -46,6 +46,34 @@ This MCP server solves that by indexing your codebase with AI embeddings. Your A
46
46
  - Your code never leaves your system
47
47
  - No API calls to external services
48
48
 
49
+ ## Performance & Resource Management
50
+
51
+ **Progressive Indexing**
52
+
53
+ - Search works immediately, even while indexing continues (like video buffering)
54
+ - Incremental saves every 5 batches - no data loss if interrupted
55
+ - Real-time indexing status shown when searching during indexing
56
+
57
+ **Resource Throttling**
58
+
59
+ - CPU usage limited to 50% by default (configurable)
60
+ - Your laptop stays responsive during indexing
61
+ - Configurable delays between batches
62
+ - Worker thread limits respect system resources
63
+
64
+ **SQLite Cache**
65
+
66
+ - 5-10x faster than JSON for large codebases
67
+ - Write-Ahead Logging (WAL) for better concurrency
68
+ - Binary blob storage for smaller cache size
69
+ - Automatic migration from JSON
70
+
71
+ **Optimized Defaults**
72
+
73
+ - 128d embeddings by default (2x faster than 256d, minimal quality loss)
74
+ - Smart batch sizing based on project size
75
+ - Parallel processing with auto-tuned worker threads
76
+
49
77
  ## Installation
50
78
 
51
79
  Install globally via npm:
@@ -144,10 +172,13 @@ Override configuration settings via environment variables in your MCP config:
144
172
  | `SMART_CODING_SEMANTIC_WEIGHT` | number | `0.7` | Weight for semantic similarity (0-1) |
145
173
  | `SMART_CODING_EXACT_MATCH_BOOST` | number | `1.5` | Boost for exact text matches |
146
174
  | `SMART_CODING_EMBEDDING_MODEL` | string | `nomic-ai/nomic-embed-text-v1.5` | AI embedding model to use |
147
- | `SMART_CODING_EMBEDDING_DIMENSION` | number | `256` | MRL dimension (64, 128, 256, 512, 768) |
175
+ | `SMART_CODING_EMBEDDING_DIMENSION` | number | `128` | MRL dimension (64, 128, 256, 512, 768) |
148
176
  | `SMART_CODING_DEVICE` | string | `cpu` | Inference device (`cpu`, `webgpu`, `auto`) |
149
177
  | `SMART_CODING_CHUNKING_MODE` | string | `smart` | Code chunking (`smart`, `ast`, `line`) |
150
178
  | `SMART_CODING_WORKER_THREADS` | string | `auto` | Worker threads (`auto` or 1-32) |
179
+ | `SMART_CODING_MAX_CPU_PERCENT` | number | `50` | Max CPU usage during indexing (10-100%) |
180
+ | `SMART_CODING_BATCH_DELAY` | number | `100` | Delay between batches in ms (0-5000) |
181
+ | `SMART_CODING_MAX_WORKERS` | string | `auto` | Override max worker threads limit |
151
182
 
152
183
  **Example with environment variables:**
153
184
 
@@ -202,8 +233,9 @@ flowchart TB
202
233
  end
203
234
 
204
235
  subgraph Storage["Cache"]
205
- Vectors["Vector Store<br/>embeddings.json"]
236
+ Vectors["SQLite Database<br/>embeddings.db (WAL mode)"]
206
237
  Hashes["File Hashes<br/>Incremental updates"]
238
+ Progressive["Progressive Indexing<br/>Search works during indexing"]
207
239
  end
208
240
 
209
241
  Agent <-->|"MCP Protocol"| Protocol
package/config.json CHANGED
@@ -61,10 +61,12 @@
61
61
  "watchFiles": false,
62
62
  "verbose": false,
63
63
  "embeddingModel": "nomic-ai/nomic-embed-text-v1.5",
64
- "embeddingDimension": 256,
64
+ "embeddingDimension": 128,
65
65
  "device": "auto",
66
66
  "chunkingMode": "smart",
67
67
  "semanticWeight": 0.7,
68
68
  "exactMatchBoost": 1.5,
69
- "workerThreads": "auto"
69
+ "workerThreads": "auto",
70
+ "maxCpuPercent": 50,
71
+ "batchDelay": 100
70
72
  }
@@ -48,20 +48,43 @@ export class StatusReporter {
48
48
  // Get unique files from vector store
49
49
  const uniqueFiles = new Set(vectorStore.map(v => v.file));
50
50
 
51
- // Get cache size
51
+ // Get cache size (check for SQLite database)
52
52
  let cacheSizeBytes = 0;
53
+ let cacheType = 'none';
53
54
  try {
54
- const cachePath = path.join(this.config.cacheDirectory, 'embeddings.json');
55
- const stats = await fs.stat(cachePath);
55
+ // Check for SQLite cache first
56
+ const sqlitePath = path.join(this.config.cacheDirectory, 'embeddings.db');
57
+ const stats = await fs.stat(sqlitePath);
56
58
  cacheSizeBytes = stats.size;
59
+ cacheType = 'sqlite';
57
60
  } catch {
58
- // Cache file doesn't exist yet
61
+ // Try old JSON cache as fallback
62
+ try {
63
+ const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
64
+ const stats = await fs.stat(jsonPath);
65
+ cacheSizeBytes = stats.size;
66
+ cacheType = 'json';
67
+ } catch {
68
+ // No cache file exists
69
+ cacheType = 'none';
70
+ }
59
71
  }
60
72
 
61
- // Determine index status
73
+ // Determine index status and progressive indexing info
62
74
  let indexStatus = 'empty';
75
+ let progressiveIndexing = null;
76
+
63
77
  if (this.indexer?.isIndexing) {
64
78
  indexStatus = 'indexing';
79
+ // Include progressive indexing status
80
+ if (this.indexer.indexingStatus) {
81
+ progressiveIndexing = {
82
+ inProgress: this.indexer.indexingStatus.inProgress,
83
+ totalFiles: this.indexer.indexingStatus.totalFiles,
84
+ processedFiles: this.indexer.indexingStatus.processedFiles,
85
+ percentage: this.indexer.indexingStatus.percentage
86
+ };
87
+ }
65
88
  } else if (vectorStore.length > 0) {
66
89
  indexStatus = 'ready';
67
90
  }
@@ -85,11 +108,13 @@ export class StatusReporter {
85
108
  status: indexStatus,
86
109
  filesIndexed: uniqueFiles.size,
87
110
  chunksCount: vectorStore.length,
88
- chunkingMode: this.config.chunkingMode
111
+ chunkingMode: this.config.chunkingMode,
112
+ ...(progressiveIndexing && { progressiveIndexing })
89
113
  },
90
114
 
91
115
  cache: {
92
116
  enabled: this.config.enableCache,
117
+ type: cacheType,
93
118
  path: this.config.cacheDirectory,
94
119
  sizeBytes: cacheSizeBytes,
95
120
  sizeFormatted: formatBytes(cacheSizeBytes)
@@ -101,6 +126,12 @@ export class StatusReporter {
101
126
  semanticWeight: this.config.semanticWeight,
102
127
  exactMatchBoost: this.config.exactMatchBoost,
103
128
  workerThreads: this.config.workerThreads
129
+ },
130
+
131
+ resourceThrottling: {
132
+ maxCpuPercent: this.config.maxCpuPercent,
133
+ batchDelay: this.config.batchDelay,
134
+ maxWorkers: this.config.maxWorkers
104
135
  }
105
136
  };
106
137
  }
@@ -2,21 +2,35 @@ import path from "path";
2
2
  import { cosineSimilarity } from "../lib/utils.js";
3
3
 
4
4
  export class HybridSearch {
5
- constructor(embedder, cache, config) {
5
+ constructor(embedder, cache, config, indexer = null) {
6
6
  this.embedder = embedder;
7
7
  this.cache = cache;
8
8
  this.config = config;
9
+ this.indexer = indexer; // Reference to indexer for status checking
9
10
  }
10
11
 
11
12
  async search(query, maxResults) {
12
13
  const vectorStore = this.cache.getVectorStore();
13
14
 
14
15
  if (vectorStore.length === 0) {
16
+ // Check if indexing is in progress
17
+ if (this.indexer?.indexingStatus?.inProgress) {
18
+ return {
19
+ results: [],
20
+ message: `Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Search available but results may be incomplete. Please wait for indexing to finish for full coverage.`
21
+ };
22
+ }
15
23
  return {
16
24
  results: [],
17
25
  message: "No code has been indexed yet. Please wait for initial indexing to complete."
18
26
  };
19
27
  }
28
+
29
+ // Show warning if indexing is still in progress but we have some results
30
+ let indexingWarning = null;
31
+ if (this.indexer?.indexingStatus?.inProgress) {
32
+ indexingWarning = `⚠️ Indexing in progress (${this.indexer.indexingStatus.percentage}% complete). Results shown are from partially indexed codebase.\n\n`;
33
+ }
20
34
 
21
35
  // Generate query embedding
22
36
  const queryEmbed = await this.embedder(query, { pooling: "mean", normalize: true });
@@ -50,7 +64,7 @@ export class HybridSearch {
50
64
  .sort((a, b) => b.score - a.score)
51
65
  .slice(0, maxResults);
52
66
 
53
- return { results, message: null };
67
+ return { results, message: null, indexingWarning };
54
68
  }
55
69
 
56
70
  formatResults(results) {
@@ -105,7 +119,7 @@ export async function handleToolCall(request, hybridSearch) {
105
119
  const query = request.params.arguments.query;
106
120
  const maxResults = request.params.arguments.maxResults || hybridSearch.config.maxResults;
107
121
 
108
- const { results, message } = await hybridSearch.search(query, maxResults);
122
+ const { results, message, indexingWarning } = await hybridSearch.search(query, maxResults);
109
123
 
110
124
  if (message) {
111
125
  return {
@@ -113,7 +127,12 @@ export async function handleToolCall(request, hybridSearch) {
113
127
  };
114
128
  }
115
129
 
116
- const formattedText = hybridSearch.formatResults(results);
130
+ let formattedText = hybridSearch.formatResults(results);
131
+
132
+ // Prepend indexing warning if present
133
+ if (indexingWarning) {
134
+ formattedText = indexingWarning + formattedText;
135
+ }
117
136
 
118
137
  return {
119
138
  content: [{ type: "text", text: formattedText }]
@@ -6,6 +6,7 @@ import os from "os";
6
6
  import { Worker } from "worker_threads";
7
7
  import { fileURLToPath } from "url";
8
8
  import { smartChunk, hashContent } from "../lib/utils.js";
9
+ import { ResourceThrottle } from "../lib/resource-throttle.js";
9
10
 
10
11
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
11
12
 
@@ -19,6 +20,17 @@ export class CodebaseIndexer {
19
20
  this.workers = [];
20
21
  this.workerReady = [];
21
22
  this.isIndexing = false;
23
+
24
+ // Initialize resource throttling
25
+ this.throttle = new ResourceThrottle(config);
26
+
27
+ // Track indexing status for progressive search
28
+ this.indexingStatus = {
29
+ inProgress: false,
30
+ totalFiles: 0,
31
+ processedFiles: 0,
32
+ percentage: 0
33
+ };
22
34
  }
23
35
 
24
36
  /**
@@ -33,8 +45,8 @@ export class CodebaseIndexer {
33
45
  }
34
46
 
35
47
  const numWorkers = this.config.workerThreads === "auto"
36
- ? Math.max(1, os.cpus().length - 1)
37
- : (this.config.workerThreads || 1);
48
+ ? this.throttle.maxWorkers // Use throttled worker count
49
+ : this.throttle.getWorkerCount(this.config.workerThreads);
38
50
 
39
51
  // Only use workers if we have more than 1 CPU
40
52
  if (numWorkers <= 1) {
@@ -426,6 +438,14 @@ export class CodebaseIndexer {
426
438
 
427
439
  this.isIndexing = true;
428
440
 
441
+ // Initialize indexing status for progressive search
442
+ this.indexingStatus = {
443
+ inProgress: true,
444
+ totalFiles: 0,
445
+ processedFiles: 0,
446
+ percentage: 0
447
+ };
448
+
429
449
  try {
430
450
  if (force) {
431
451
  console.error("[Indexer] Force reindex requested: clearing cache");
@@ -470,35 +490,15 @@ export class CodebaseIndexer {
470
490
  }
471
491
  }
472
492
 
473
- // Step 2: Pre-filter unchanged files (early hash check)
474
- const filesToProcess = await this.preFilterFiles(files);
475
-
476
- if (filesToProcess.length === 0) {
477
- console.error("[Indexer] All files unchanged, nothing to index");
478
- this.sendProgress(100, 100, "All files up to date");
479
- await this.cache.save();
480
- const vectorStore = this.cache.getVectorStore();
481
- return {
482
- skipped: false,
483
- filesProcessed: 0,
484
- chunksCreated: 0,
485
- totalFiles: new Set(vectorStore.map(v => v.file)).size,
486
- totalChunks: vectorStore.length,
487
- message: "All files up to date"
488
- };
489
- }
490
-
491
- // Send progress: filtering complete
492
- this.sendProgress(10, 100, `Processing ${filesToProcess.length} changed files`);
493
-
494
- // Step 3: Determine batch size based on project size
493
+ // Step 2: Process files in adaptive batches with lazy filtering
494
+ // Instead of pre-filtering all files (expensive), check hashes during processing
495
495
  const adaptiveBatchSize = files.length > 10000 ? 500 :
496
496
  files.length > 1000 ? 200 :
497
497
  this.config.batchSize || 100;
498
498
 
499
- console.error(`[Indexer] Processing ${filesToProcess.length} files (batch size: ${adaptiveBatchSize})`);
499
+ console.error(`[Indexer] Processing ${files.length} files with lazy filtering (batch size: ${adaptiveBatchSize})`);
500
500
 
501
- // Step 4: Initialize worker threads (always use when multi-core available)
501
+ // Step 3: Initialize worker threads (always use when multi-core available)
502
502
  const useWorkers = os.cpus().length > 1;
503
503
 
504
504
  if (useWorkers) {
@@ -510,30 +510,69 @@ export class CodebaseIndexer {
510
510
 
511
511
  let totalChunks = 0;
512
512
  let processedFiles = 0;
513
+ let skippedFiles = 0;
514
+ let batchCounter = 0; // Track batches for incremental saves
515
+
516
+ // Update total file count for status tracking (estimated, will adjust as we filter)
517
+ this.indexingStatus.totalFiles = files.length;
513
518
 
514
- // Step 5: Process files in adaptive batches
515
- for (let i = 0; i < filesToProcess.length; i += adaptiveBatchSize) {
516
- const batch = filesToProcess.slice(i, i + adaptiveBatchSize);
519
+ // Step 4: Process files in adaptive batches with inline lazy filtering
520
+ for (let i = 0; i < files.length; i += adaptiveBatchSize) {
521
+ const batch = files.slice(i, i + adaptiveBatchSize);
517
522
 
518
- // Generate all chunks for this batch
523
+ // Lazy filter and generate chunks for this batch
519
524
  const allChunks = [];
525
+ const fileHashes = new Map();
520
526
 
521
- for (const { file, content, hash } of batch) {
522
- // Remove old chunks for this file
523
- this.cache.removeFileFromStore(file);
524
-
525
- const chunks = smartChunk(content, file, this.config);
526
-
527
- for (const chunk of chunks) {
528
- allChunks.push({
529
- file,
530
- text: chunk.text,
531
- startLine: chunk.startLine,
532
- endLine: chunk.endLine,
533
- hash
534
- });
527
+ for (const file of batch) {
528
+ try {
529
+ const stats = await fs.stat(file);
530
+
531
+ // Skip directories and oversized files
532
+ if (stats.isDirectory()) continue;
533
+ if (stats.size > this.config.maxFileSize) {
534
+ skippedFiles++;
535
+ continue;
536
+ }
537
+
538
+ // Read content and check hash
539
+ const content = await fs.readFile(file, "utf-8");
540
+ const hash = hashContent(content);
541
+
542
+ // Skip unchanged files inline (lazy check)
543
+ if (this.cache.getFileHash(file) === hash) {
544
+ skippedFiles++;
545
+ continue;
546
+ }
547
+
548
+ // File changed - remove old chunks and prepare new ones
549
+ this.cache.removeFileFromStore(file);
550
+ const chunks = smartChunk(content, file, this.config);
551
+
552
+ for (const chunk of chunks) {
553
+ allChunks.push({
554
+ file,
555
+ text: chunk.text,
556
+ startLine: chunk.startLine,
557
+ endLine: chunk.endLine,
558
+ hash
559
+ });
560
+ }
561
+
562
+ fileHashes.set(file, hash);
563
+ } catch (error) {
564
+ // Skip files with read errors
565
+ skippedFiles++;
566
+ if (this.config.verbose) {
567
+ console.error(`[Indexer] Error reading ${path.basename(file)}: ${error.message}`);
568
+ }
535
569
  }
536
570
  }
571
+
572
+ // Skip this batch if no chunks to process
573
+ if (allChunks.length === 0) {
574
+ continue;
575
+ }
537
576
 
538
577
  // Process chunks (with workers if available, otherwise single-threaded)
539
578
  let results;
@@ -543,11 +582,13 @@ export class CodebaseIndexer {
543
582
  results = await this.processChunksSingleThreaded(allChunks);
544
583
  }
545
584
 
546
- // Store successful results
547
- const fileHashes = new Map();
585
+ // Collect successful results for batch insert
586
+ const chunksToInsert = [];
587
+ const filesProcessedInBatch = new Set();
588
+
548
589
  for (const result of results) {
549
590
  if (result.success) {
550
- this.cache.addToStore({
591
+ chunksToInsert.push({
551
592
  file: result.file,
552
593
  startLine: result.startLine,
553
594
  endLine: result.endLine,
@@ -555,11 +596,17 @@ export class CodebaseIndexer {
555
596
  vector: result.vector
556
597
  });
557
598
  totalChunks++;
599
+ filesProcessedInBatch.add(result.file);
558
600
  }
559
- // Track hash for each file
560
- const chunkInfo = allChunks.find(c => c.file === result.file);
561
- if (chunkInfo) {
562
- fileHashes.set(result.file, chunkInfo.hash);
601
+ }
602
+
603
+ // Batch insert to SQLite (much faster than individual inserts)
604
+ if (chunksToInsert.length > 0 && typeof this.cache.addBatchToStore === 'function') {
605
+ this.cache.addBatchToStore(chunksToInsert);
606
+ } else {
607
+ // Fallback for old cache implementation
608
+ for (const chunk of chunksToInsert) {
609
+ this.cache.addToStore(chunk);
563
610
  }
564
611
  }
565
612
 
@@ -568,17 +615,35 @@ export class CodebaseIndexer {
568
615
  this.cache.setFileHash(file, hash);
569
616
  }
570
617
 
571
- processedFiles += batch.length;
618
+ processedFiles += filesProcessedInBatch.size;
619
+ batchCounter++;
620
+
621
+ // Update indexing status for progressive search
622
+ const estimatedTotal = files.length - skippedFiles;
623
+ this.indexingStatus.processedFiles = processedFiles;
624
+ this.indexingStatus.totalFiles = Math.max(estimatedTotal, processedFiles);
625
+ this.indexingStatus.percentage = estimatedTotal > 0 ? Math.floor((processedFiles / estimatedTotal) * 100) : 100;
626
+
627
+ // Incremental save to SQLite (every N batches)
628
+ const saveInterval = this.config.incrementalSaveInterval || 5;
629
+ if (batchCounter % saveInterval === 0) {
630
+ if (typeof this.cache.saveIncremental === 'function') {
631
+ await this.cache.saveIncremental();
632
+ }
633
+ }
634
+
635
+ // Apply CPU throttling (delay between batches)
636
+ await this.throttle.throttledBatch(null);
572
637
 
573
638
  // Progress indicator every batch
574
- if (processedFiles % (adaptiveBatchSize * 2) === 0 || processedFiles === filesToProcess.length) {
639
+ if (processedFiles > 0 && (processedFiles % (adaptiveBatchSize * 2) === 0 || i + adaptiveBatchSize >= files.length)) {
575
640
  const elapsed = ((Date.now() - totalStartTime) / 1000).toFixed(1);
576
641
  const rate = (processedFiles / parseFloat(elapsed)).toFixed(0);
577
- console.error(`[Indexer] Progress: ${processedFiles}/${filesToProcess.length} files (${rate} files/sec)`);
642
+ console.error(`[Indexer] Progress: ${processedFiles} changed, ${skippedFiles} skipped (${rate} files/sec)`);
578
643
 
579
644
  // Send MCP progress notification (10-95% range for batch processing)
580
- const progressPercent = Math.floor(10 + (processedFiles / filesToProcess.length) * 85);
581
- this.sendProgress(progressPercent, 100, `Indexed ${processedFiles}/${filesToProcess.length} files (${rate}/sec)`);
645
+ const progressPercent = Math.min(95, Math.floor(10 + (i / files.length) * 85));
646
+ this.sendProgress(progressPercent, 100, `Indexed ${processedFiles} files, ${skippedFiles} skipped (${rate}/sec)`);
582
647
  }
583
648
  }
584
649
 
@@ -588,25 +653,37 @@ export class CodebaseIndexer {
588
653
  }
589
654
 
590
655
  const totalTime = ((Date.now() - totalStartTime) / 1000).toFixed(1);
591
- console.error(`[Indexer] Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
656
+ const changedFiles = processedFiles;
657
+ console.error(`[Indexer] Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`);
658
+
659
+ // Mark indexing as complete
660
+ this.indexingStatus.inProgress = false;
661
+ this.indexingStatus.percentage = 100;
592
662
 
593
663
  // Send completion progress
594
- this.sendProgress(100, 100, `Complete: ${totalChunks} chunks from ${filesToProcess.length} files in ${totalTime}s`);
664
+ const summaryMsg = changedFiles > 0
665
+ ? `Complete: ${totalChunks} chunks from ${changedFiles} changed files (${skippedFiles} unchanged) in ${totalTime}s`
666
+ : `Complete: No files changed (${skippedFiles} files up to date)`;
667
+ this.sendProgress(100, 100, summaryMsg);
595
668
 
596
669
  await this.cache.save();
597
670
 
598
671
  const vectorStore = this.cache.getVectorStore();
599
672
  return {
600
673
  skipped: false,
601
- filesProcessed: filesToProcess.length,
674
+ filesProcessed: changedFiles,
602
675
  chunksCreated: totalChunks,
603
676
  totalFiles: new Set(vectorStore.map(v => v.file)).size,
604
677
  totalChunks: vectorStore.length,
605
678
  duration: totalTime,
606
- message: `Indexed ${filesToProcess.length} files (${totalChunks} chunks) in ${totalTime}s`
679
+ message: changedFiles > 0
680
+ ? `Indexed ${changedFiles} files (${totalChunks} chunks, ${skippedFiles} unchanged) in ${totalTime}s`
681
+ : `All ${skippedFiles} files up to date`
607
682
  };
608
683
  } finally {
609
684
  this.isIndexing = false;
685
+ // Adjust estimated total after completion
686
+ this.indexingStatus.totalFiles = processedFiles + skippedFiles;
610
687
  }
611
688
  }
612
689