semantic-code-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+ import { FILE_TYPE_MAP, IGNORE_PATTERNS } from "./ignore-patterns.js";
4
+
5
+ export class ProjectDetector {
6
+ constructor(searchDirectory) {
7
+ this.searchDirectory = searchDirectory;
8
+ this.detectedTypes = new Set();
9
+ }
10
+
11
+ async detectProjectTypes() {
12
+ const markerFiles = Object.keys(FILE_TYPE_MAP);
13
+
14
+ for (const marker of markerFiles) {
15
+ // Handle wildcard patterns like *.csproj
16
+ if (marker.includes('*')) {
17
+ await this.detectWithWildcard(marker);
18
+ } else {
19
+ await this.detectExactFile(marker);
20
+ }
21
+ }
22
+
23
+ return Array.from(this.detectedTypes);
24
+ }
25
+
26
+ async detectExactFile(markerFile) {
27
+ const markerPath = path.join(this.searchDirectory, markerFile);
28
+ try {
29
+ await fs.access(markerPath);
30
+ const projectType = FILE_TYPE_MAP[markerFile];
31
+ this.detectedTypes.add(projectType);
32
+ console.error(`[Detector] Detected ${projectType} project (${markerFile})`);
33
+ } catch {
34
+ // File doesn't exist, continue
35
+ }
36
+ }
37
+
38
+ async detectWithWildcard(pattern) {
39
+ try {
40
+ const files = await fs.readdir(this.searchDirectory);
41
+ const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
42
+
43
+ for (const file of files) {
44
+ if (regex.test(file)) {
45
+ const projectType = FILE_TYPE_MAP[pattern];
46
+ this.detectedTypes.add(projectType);
47
+ console.error(`[Detector] Detected ${projectType} project (${file})`);
48
+ break;
49
+ }
50
+ }
51
+ } catch {
52
+ // Directory read failed, continue
53
+ }
54
+ }
55
+
56
+ getSmartIgnorePatterns() {
57
+ const patterns = [...IGNORE_PATTERNS.common];
58
+
59
+ for (const type of this.detectedTypes) {
60
+ if (IGNORE_PATTERNS[type]) {
61
+ patterns.push(...IGNORE_PATTERNS[type]);
62
+ }
63
+ }
64
+
65
+ // Remove duplicates
66
+ return [...new Set(patterns)];
67
+ }
68
+
69
+ getSummary() {
70
+ return {
71
+ detectedTypes: Array.from(this.detectedTypes),
72
+ patternCount: this.getSmartIgnorePatterns().length
73
+ };
74
+ }
75
+ }
@@ -0,0 +1,85 @@
1
+ import os from 'os';
2
+
3
+ /**
4
+ * Resource throttling utility to prevent CPU/memory exhaustion during indexing
5
+ * Ensures the MCP server doesn't freeze the user's laptop
6
+ */
7
+ export class ResourceThrottle {
8
+ constructor(config) {
9
+ // Max CPU usage as percentage (default 50%)
10
+ this.maxCpuPercent = config.maxCpuPercent || 50;
11
+
12
+ // Delay between batches in milliseconds (reduced from 100ms for better performance)
13
+ this.batchDelay = config.batchDelay ?? 10;
14
+
15
+ // Max worker threads (override auto-detection)
16
+ const cpuCount = os.cpus().length;
17
+ if (config.maxWorkers === 'auto' || config.maxWorkers === undefined) {
18
+ // Use 50% of cores by default (balanced performance/responsiveness)
19
+ this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.5));
20
+ } else {
21
+ // Validate and parse the value
22
+ const parsed = typeof config.maxWorkers === 'number'
23
+ ? config.maxWorkers
24
+ : parseInt(config.maxWorkers, 10);
25
+
26
+ if (isNaN(parsed) || parsed < 1) {
27
+ console.error(`[Throttle] Invalid maxWorkers: ${config.maxWorkers}, using auto`);
28
+ this.maxWorkers = Math.max(1, Math.floor(cpuCount * 0.5));
29
+ } else {
30
+ this.maxWorkers = Math.max(1, parsed);
31
+ }
32
+ }
33
+
34
+ console.error(`[Throttle] CPU limit: ${this.maxCpuPercent}%, Batch delay: ${this.batchDelay}ms, Max workers: ${this.maxWorkers}`);
35
+ }
36
+
37
+ /**
38
+ * Execute work with delay to throttle CPU usage
39
+ */
40
+ async throttledBatch(work, signal = null) {
41
+ // Execute the work
42
+ if (work) {
43
+ await work();
44
+ }
45
+
46
+ // Apply delay if not aborted
47
+ if (!signal?.aborted && this.batchDelay > 0) {
48
+ await this.sleep(this.batchDelay);
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Sleep utility
54
+ */
55
+ sleep(ms) {
56
+ return new Promise(resolve => setTimeout(resolve, ms));
57
+ }
58
+
59
+ /**
60
+ * Calculate optimal worker count based on CPU limit
61
+ */
62
+ getWorkerCount(requestedWorkers) {
63
+ if (requestedWorkers === 'auto') {
64
+ return this.maxWorkers;
65
+ }
66
+ return Math.min(requestedWorkers, this.maxWorkers);
67
+ }
68
+
69
+ /**
70
+ * Check if we should pause due to high CPU usage
71
+ * This is a simple implementation - could be enhanced with actual CPU monitoring
72
+ */
73
+ async checkCpuUsage() {
74
+ // Future enhancement: monitor actual CPU usage and pause if needed
75
+ // For now, we rely on worker limits and batch delays
76
+ return true;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Sleep utility function
82
+ */
83
+ export function sleep(ms) {
84
+ return new Promise(resolve => setTimeout(resolve, ms));
85
+ }
@@ -0,0 +1,468 @@
1
+ import Database from 'better-sqlite3';
2
+ import fs from 'fs/promises';
3
+ import path from 'path';
4
+ import { cosineSimilarity } from './utils.js';
5
+
6
+ /**
7
+ * SQLite-based embeddings cache for fast, efficient storage
8
+ * Replaces JSON-based cache for better performance on large codebases
9
+ */
10
+ export class SQLiteCache {
11
+ constructor(config) {
12
+ this.config = config;
13
+ this.db = null;
14
+ this.isSaving = false;
15
+ this.dbPath = path.join(config.cacheDirectory, 'embeddings.db');
16
+
17
+ // Track indexing status for progressive indexing
18
+ this.indexingStatus = {
19
+ inProgress: false,
20
+ totalFiles: 0,
21
+ processedFiles: 0,
22
+ percentage: 0
23
+ };
24
+ }
25
+
26
+ /**
27
+ * Initialize SQLite database and create schema
28
+ */
29
+ async load() {
30
+ if (!this.config.enableCache) return;
31
+
32
+ try {
33
+ // Ensure cache directory exists
34
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
35
+
36
+ // Check if we need to migrate from JSON
37
+ const jsonCacheExists = await this.checkJSONCache();
38
+
39
+ // Open SQLite database
40
+ this.db = new Database(this.dbPath);
41
+
42
+ // Enable performance optimizations
43
+ this.db.pragma('journal_mode = WAL'); // Write-Ahead Logging for better concurrency
44
+ this.db.pragma('synchronous = NORMAL'); // Faster writes, still safe
45
+ this.db.pragma('cache_size = 10000'); // 10MB cache
46
+ this.db.pragma('temp_store = MEMORY'); // Temp tables in memory
47
+
48
+ // Create schema if not exists
49
+ this.createSchema();
50
+
51
+ // Migrate from JSON if needed
52
+ if (jsonCacheExists && this.getVectorCount() === 0) {
53
+ console.error('[Cache] Migrating from JSON to SQLite...');
54
+ await this.migrateFromJSON();
55
+ }
56
+
57
+ const count = this.getVectorCount();
58
+ const fileCount = this.getFileCount();
59
+ console.error(`[Cache] Loaded SQLite cache: ${count} embeddings from ${fileCount} files`);
60
+ } catch (error) {
61
+ console.error('[Cache] Failed to initialize SQLite cache:', error.message);
62
+ throw error;
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Create database schema
68
+ */
69
+ createSchema() {
70
+ this.db.exec(`
71
+ CREATE TABLE IF NOT EXISTS embeddings (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ file TEXT NOT NULL,
74
+ start_line INTEGER NOT NULL,
75
+ end_line INTEGER NOT NULL,
76
+ content TEXT NOT NULL,
77
+ vector BLOB NOT NULL,
78
+ indexed_at INTEGER NOT NULL
79
+ );
80
+
81
+ CREATE TABLE IF NOT EXISTS file_hashes (
82
+ file TEXT PRIMARY KEY,
83
+ hash TEXT NOT NULL,
84
+ mtime REAL,
85
+ indexed_at INTEGER NOT NULL
86
+ );
87
+
88
+ CREATE INDEX IF NOT EXISTS idx_file ON embeddings(file);
89
+ CREATE INDEX IF NOT EXISTS idx_indexed_at ON embeddings(indexed_at);
90
+ `);
91
+
92
+ // Migration: Add mtime column if it doesn't exist (for existing databases)
93
+ try {
94
+ this.db.exec('ALTER TABLE file_hashes ADD COLUMN mtime REAL');
95
+ } catch (e) {
96
+ // Column already exists, ignore
97
+ }
98
+ }
99
+
100
+ /**
101
+ * Check if JSON cache exists
102
+ */
103
+ async checkJSONCache() {
104
+ try {
105
+ const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
106
+ await fs.access(jsonPath);
107
+ return true;
108
+ } catch {
109
+ return false;
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Migrate from JSON cache to SQLite
115
+ */
116
+ async migrateFromJSON() {
117
+ try {
118
+ const jsonCachePath = path.join(this.config.cacheDirectory, 'embeddings.json');
119
+ const jsonHashPath = path.join(this.config.cacheDirectory, 'file-hashes.json');
120
+
121
+ const [cacheData, hashData] = await Promise.all([
122
+ fs.readFile(jsonCachePath, 'utf-8').catch(() => null),
123
+ fs.readFile(jsonHashPath, 'utf-8').catch(() => null)
124
+ ]);
125
+
126
+ if (!cacheData || !hashData) {
127
+ console.error('[Cache] No JSON cache found to migrate');
128
+ return;
129
+ }
130
+
131
+ const vectorStore = JSON.parse(cacheData);
132
+ const fileHashes = new Map(Object.entries(JSON.parse(hashData)));
133
+
134
+ console.error(`[Cache] Migrating ${vectorStore.length} embeddings...`);
135
+
136
+ // Use transaction for fast batch insert
137
+ const insertVector = this.db.prepare(`
138
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
139
+ VALUES (?, ?, ?, ?, ?, ?)
140
+ `);
141
+
142
+ const insertHash = this.db.prepare(`
143
+ INSERT OR REPLACE INTO file_hashes (file, hash, indexed_at)
144
+ VALUES (?, ?, ?)
145
+ `);
146
+
147
+ const transaction = this.db.transaction(() => {
148
+ const now = Date.now();
149
+
150
+ for (const chunk of vectorStore) {
151
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
152
+ insertVector.run(
153
+ chunk.file,
154
+ chunk.startLine,
155
+ chunk.endLine,
156
+ chunk.content,
157
+ vectorBuffer,
158
+ now
159
+ );
160
+ }
161
+
162
+ for (const [file, hash] of fileHashes) {
163
+ insertHash.run(file, hash, now);
164
+ }
165
+ });
166
+
167
+ transaction();
168
+
169
+ console.error('[Cache] Migration complete! Backing up JSON files...');
170
+
171
+ // Backup old JSON files
172
+ await fs.rename(jsonCachePath, jsonCachePath + '.backup');
173
+ await fs.rename(jsonHashPath, jsonHashPath + '.backup');
174
+
175
+ console.error('[Cache] JSON cache backed up (you can delete .backup files if everything works)');
176
+ } catch (error) {
177
+ console.error('[Cache] Migration failed:', error.message);
178
+ throw error;
179
+ }
180
+ }
181
+
182
+ /**
183
+ * Convert Float32Array/Array to Buffer for SQLite storage
184
+ */
185
+ vectorToBuffer(vector) {
186
+ const float32 = new Float32Array(vector);
187
+ return Buffer.from(float32.buffer);
188
+ }
189
+
190
+ /**
191
+ * Convert Buffer back to Array for compatibility
192
+ */
193
+ bufferToVector(buffer) {
194
+ const float32 = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
195
+ return Array.from(float32);
196
+ }
197
+
198
+ /**
199
+ * Get all vectors from store (lazy loaded)
200
+ */
201
+ getVectorStore() {
202
+ if (!this.db) return [];
203
+
204
+ const stmt = this.db.prepare(`
205
+ SELECT file, start_line, end_line, content, vector
206
+ FROM embeddings
207
+ ORDER BY file, start_line
208
+ `);
209
+
210
+ const rows = stmt.all();
211
+ return rows.map(row => ({
212
+ file: row.file,
213
+ startLine: row.start_line,
214
+ endLine: row.end_line,
215
+ content: row.content,
216
+ vector: this.bufferToVector(row.vector)
217
+ }));
218
+ }
219
+
220
+ /**
221
+ * Vector search compatibility API used by ANN-capable search path.
222
+ * SQLite fallback performs in-process cosine scoring.
223
+ */
224
+ searchByVector(queryVector, topK = 10) {
225
+ const normalizedTopK = Number.isInteger(topK) && topK > 0 ? topK : 10;
226
+ const vectorStore = this.getVectorStore();
227
+
228
+ return vectorStore
229
+ .map((chunk) => ({
230
+ ...chunk,
231
+ score: cosineSimilarity(queryVector, chunk.vector)
232
+ }))
233
+ .sort((a, b) => b.score - a.score)
234
+ .slice(0, normalizedTopK);
235
+ }
236
+
237
+ /**
238
+ * Get vector count
239
+ */
240
+ getVectorCount() {
241
+ if (!this.db) return 0;
242
+ const result = this.db.prepare('SELECT COUNT(*) as count FROM embeddings').get();
243
+ return result.count;
244
+ }
245
+
246
+ /**
247
+ * Get unique file count
248
+ */
249
+ getFileCount() {
250
+ if (!this.db) return 0;
251
+ const result = this.db.prepare('SELECT COUNT(DISTINCT file) as count FROM embeddings').get();
252
+ return result.count;
253
+ }
254
+
255
+ getStats() {
256
+ return {
257
+ totalChunks: this.getVectorCount(),
258
+ totalFiles: this.getFileCount()
259
+ };
260
+ }
261
+
262
+ /**
263
+ * Add chunk to store with batch optimization
264
+ */
265
+ addToStore(chunk) {
266
+ if (!this.db) return;
267
+
268
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
269
+ const stmt = this.db.prepare(`
270
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
271
+ VALUES (?, ?, ?, ?, ?, ?)
272
+ `);
273
+
274
+ stmt.run(
275
+ chunk.file,
276
+ chunk.startLine,
277
+ chunk.endLine,
278
+ chunk.content,
279
+ vectorBuffer,
280
+ Date.now()
281
+ );
282
+ }
283
+
284
+ /**
285
+ * Add multiple chunks in a transaction (much faster)
286
+ */
287
+ addBatchToStore(chunks) {
288
+ if (!this.db || chunks.length === 0) return;
289
+
290
+ const stmt = this.db.prepare(`
291
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
292
+ VALUES (?, ?, ?, ?, ?, ?)
293
+ `);
294
+
295
+ const transaction = this.db.transaction(() => {
296
+ const now = Date.now();
297
+ for (const chunk of chunks) {
298
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
299
+ stmt.run(
300
+ chunk.file,
301
+ chunk.startLine,
302
+ chunk.endLine,
303
+ chunk.content,
304
+ vectorBuffer,
305
+ now
306
+ );
307
+ }
308
+ });
309
+
310
+ transaction();
311
+ }
312
+
313
+ /**
314
+ * Remove all chunks for a specific file
315
+ */
316
+ removeFileFromStore(file) {
317
+ if (!this.db) return;
318
+
319
+ const stmt = this.db.prepare('DELETE FROM embeddings WHERE file = ?');
320
+ stmt.run(file);
321
+ }
322
+
323
+ /**
324
+ * Get file hash
325
+ */
326
+ getFileHash(file) {
327
+ if (!this.db) return null;
328
+
329
+ const stmt = this.db.prepare('SELECT hash FROM file_hashes WHERE file = ?');
330
+ const row = stmt.get(file);
331
+ return row ? row.hash : null;
332
+ }
333
+
334
+ /**
335
+ * Get file mtime (modification time) for fast change detection
336
+ */
337
+ getFileMtime(file) {
338
+ if (!this.db) return null;
339
+
340
+ const stmt = this.db.prepare('SELECT mtime FROM file_hashes WHERE file = ?');
341
+ const row = stmt.get(file);
342
+ return row ? row.mtime : null;
343
+ }
344
+
345
+ /**
346
+ * Set file hash with optional mtime
347
+ */
348
+ setFileHash(file, hash, mtime = null) {
349
+ if (!this.db) return;
350
+
351
+ const stmt = this.db.prepare(`
352
+ INSERT OR REPLACE INTO file_hashes (file, hash, mtime, indexed_at)
353
+ VALUES (?, ?, ?, ?)
354
+ `);
355
+ stmt.run(file, hash, mtime, Date.now());
356
+ }
357
+
358
+ /**
359
+ * Delete file hash
360
+ */
361
+ deleteFileHash(file) {
362
+ if (!this.db) return;
363
+
364
+ const stmt = this.db.prepare('DELETE FROM file_hashes WHERE file = ?');
365
+ stmt.run(file);
366
+ }
367
+
368
+ /**
369
+ * Get all file hashes as Map
370
+ */
371
+ getAllFileHashes() {
372
+ if (!this.db) return new Map();
373
+
374
+ const stmt = this.db.prepare('SELECT file, hash FROM file_hashes');
375
+ const rows = stmt.all();
376
+ return new Map(rows.map(row => [row.file, row.hash]));
377
+ }
378
+
379
+ /**
380
+ * Save (checkpoint WAL for durability)
381
+ * With SQLite, writes are already persisted, this just checkpoints the WAL
382
+ */
383
+ async save() {
384
+ if (!this.config.enableCache || !this.db) return;
385
+
386
+ this.isSaving = true;
387
+
388
+ try {
389
+ // Checkpoint WAL to ensure durability
390
+ this.db.pragma('wal_checkpoint(PASSIVE)');
391
+ } catch (error) {
392
+ console.error('[Cache] Failed to checkpoint WAL:', error.message);
393
+ } finally {
394
+ this.isSaving = false;
395
+ }
396
+ }
397
+
398
+ /**
399
+ * Incremental save during indexing (no-op for SQLite, already persisted)
400
+ */
401
+ async saveIncremental() {
402
+ // SQLite writes are already persisted due to WAL mode
403
+ // This is a no-op but kept for API compatibility
404
+ return;
405
+ }
406
+
407
+ /**
408
+ * Clear all cache data
409
+ */
410
+ async clear() {
411
+ if (!this.config.enableCache) return;
412
+
413
+ try {
414
+ if (this.db) {
415
+ this.db.close();
416
+ this.db = null;
417
+ }
418
+
419
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
420
+ console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
421
+ } catch (error) {
422
+ console.error('[Cache] Failed to clear cache:', error.message);
423
+ throw error;
424
+ }
425
+ }
426
+
427
+ /**
428
+ * Close database connection
429
+ */
430
+ close() {
431
+ if (this.db) {
432
+ this.db.close();
433
+ this.db = null;
434
+ }
435
+ }
436
+
437
+ /**
438
+ * Clear all file hashes from the database
439
+ */
440
+ clearAllFileHashes() {
441
+ if (!this.db) return;
442
+ this.db.exec('DELETE FROM file_hashes');
443
+ }
444
+
445
+ /**
446
+ * Reset vectors + file hashes for full reindex.
447
+ */
448
+ async resetForFullReindex() {
449
+ this.setVectorStore([]);
450
+ this.clearAllFileHashes();
451
+ }
452
+
453
+ /**
454
+ * Set vector store (for compatibility with test code)
455
+ * This is less efficient than batch operations but kept for compatibility
456
+ */
457
+ setVectorStore(store) {
458
+ if (!this.db) return;
459
+
460
+ // Clear existing data
461
+ this.db.exec('DELETE FROM embeddings');
462
+
463
+ // Insert new data
464
+ if (store.length > 0) {
465
+ this.addBatchToStore(store);
466
+ }
467
+ }
468
+ }