smart-coding-mcp 1.4.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,408 @@
1
+ import Database from 'better-sqlite3';
2
+ import fs from 'fs/promises';
3
+ import path from 'path';
4
+
5
+ /**
6
+ * SQLite-based embeddings cache for fast, efficient storage
7
+ * Replaces JSON-based cache for better performance on large codebases
8
+ */
9
+ export class SQLiteCache {
10
+ constructor(config) {
11
+ this.config = config;
12
+ this.db = null;
13
+ this.isSaving = false;
14
+ this.dbPath = path.join(config.cacheDirectory, 'embeddings.db');
15
+
16
+ // Track indexing status for progressive indexing
17
+ this.indexingStatus = {
18
+ inProgress: false,
19
+ totalFiles: 0,
20
+ processedFiles: 0,
21
+ percentage: 0
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Initialize SQLite database and create schema
27
+ */
28
+ async load() {
29
+ if (!this.config.enableCache) return;
30
+
31
+ try {
32
+ // Ensure cache directory exists
33
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
34
+
35
+ // Check if we need to migrate from JSON
36
+ const jsonCacheExists = await this.checkJSONCache();
37
+
38
+ // Open SQLite database
39
+ this.db = new Database(this.dbPath);
40
+
41
+ // Enable performance optimizations
42
+ this.db.pragma('journal_mode = WAL'); // Write-Ahead Logging for better concurrency
43
+ this.db.pragma('synchronous = NORMAL'); // Faster writes, still safe
44
+ this.db.pragma('cache_size = 10000'); // 10MB cache
45
+ this.db.pragma('temp_store = MEMORY'); // Temp tables in memory
46
+
47
+ // Create schema if not exists
48
+ this.createSchema();
49
+
50
+ // Migrate from JSON if needed
51
+ if (jsonCacheExists && this.getVectorCount() === 0) {
52
+ console.error('[Cache] Migrating from JSON to SQLite...');
53
+ await this.migrateFromJSON();
54
+ }
55
+
56
+ const count = this.getVectorCount();
57
+ const fileCount = this.getFileCount();
58
+ console.error(`[Cache] Loaded SQLite cache: ${count} embeddings from ${fileCount} files`);
59
+ } catch (error) {
60
+ console.error('[Cache] Failed to initialize SQLite cache:', error.message);
61
+ throw error;
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Create database schema
67
+ */
68
+ createSchema() {
69
+ this.db.exec(`
70
+ CREATE TABLE IF NOT EXISTS embeddings (
71
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
72
+ file TEXT NOT NULL,
73
+ start_line INTEGER NOT NULL,
74
+ end_line INTEGER NOT NULL,
75
+ content TEXT NOT NULL,
76
+ vector BLOB NOT NULL,
77
+ indexed_at INTEGER NOT NULL
78
+ );
79
+
80
+ CREATE TABLE IF NOT EXISTS file_hashes (
81
+ file TEXT PRIMARY KEY,
82
+ hash TEXT NOT NULL,
83
+ indexed_at INTEGER NOT NULL
84
+ );
85
+
86
+ CREATE INDEX IF NOT EXISTS idx_file ON embeddings(file);
87
+ CREATE INDEX IF NOT EXISTS idx_indexed_at ON embeddings(indexed_at);
88
+ `);
89
+ }
90
+
91
+ /**
92
+ * Check if JSON cache exists
93
+ */
94
+ async checkJSONCache() {
95
+ try {
96
+ const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
97
+ await fs.access(jsonPath);
98
+ return true;
99
+ } catch {
100
+ return false;
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Migrate from JSON cache to SQLite
106
+ */
107
+ async migrateFromJSON() {
108
+ try {
109
+ const jsonCachePath = path.join(this.config.cacheDirectory, 'embeddings.json');
110
+ const jsonHashPath = path.join(this.config.cacheDirectory, 'file-hashes.json');
111
+
112
+ const [cacheData, hashData] = await Promise.all([
113
+ fs.readFile(jsonCachePath, 'utf-8').catch(() => null),
114
+ fs.readFile(jsonHashPath, 'utf-8').catch(() => null)
115
+ ]);
116
+
117
+ if (!cacheData || !hashData) {
118
+ console.error('[Cache] No JSON cache found to migrate');
119
+ return;
120
+ }
121
+
122
+ const vectorStore = JSON.parse(cacheData);
123
+ const fileHashes = new Map(Object.entries(JSON.parse(hashData)));
124
+
125
+ console.error(`[Cache] Migrating ${vectorStore.length} embeddings...`);
126
+
127
+ // Use transaction for fast batch insert
128
+ const insertVector = this.db.prepare(`
129
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
130
+ VALUES (?, ?, ?, ?, ?, ?)
131
+ `);
132
+
133
+ const insertHash = this.db.prepare(`
134
+ INSERT OR REPLACE INTO file_hashes (file, hash, indexed_at)
135
+ VALUES (?, ?, ?)
136
+ `);
137
+
138
+ const transaction = this.db.transaction(() => {
139
+ const now = Date.now();
140
+
141
+ for (const chunk of vectorStore) {
142
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
143
+ insertVector.run(
144
+ chunk.file,
145
+ chunk.startLine,
146
+ chunk.endLine,
147
+ chunk.content,
148
+ vectorBuffer,
149
+ now
150
+ );
151
+ }
152
+
153
+ for (const [file, hash] of fileHashes) {
154
+ insertHash.run(file, hash, now);
155
+ }
156
+ });
157
+
158
+ transaction();
159
+
160
+ console.error('[Cache] Migration complete! Backing up JSON files...');
161
+
162
+ // Backup old JSON files
163
+ await fs.rename(jsonCachePath, jsonCachePath + '.backup');
164
+ await fs.rename(jsonHashPath, jsonHashPath + '.backup');
165
+
166
+ console.error('[Cache] JSON cache backed up (you can delete .backup files if everything works)');
167
+ } catch (error) {
168
+ console.error('[Cache] Migration failed:', error.message);
169
+ throw error;
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Convert Float32Array/Array to Buffer for SQLite storage
175
+ */
176
+ vectorToBuffer(vector) {
177
+ const float32 = new Float32Array(vector);
178
+ return Buffer.from(float32.buffer);
179
+ }
180
+
181
+ /**
182
+ * Convert Buffer back to Array for compatibility
183
+ */
184
+ bufferToVector(buffer) {
185
+ const float32 = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
186
+ return Array.from(float32);
187
+ }
188
+
189
+ /**
190
+ * Get all vectors from store (lazy loaded)
191
+ */
192
+ getVectorStore() {
193
+ if (!this.db) return [];
194
+
195
+ const stmt = this.db.prepare(`
196
+ SELECT file, start_line, end_line, content, vector
197
+ FROM embeddings
198
+ ORDER BY file, start_line
199
+ `);
200
+
201
+ const rows = stmt.all();
202
+ return rows.map(row => ({
203
+ file: row.file,
204
+ startLine: row.start_line,
205
+ endLine: row.end_line,
206
+ content: row.content,
207
+ vector: this.bufferToVector(row.vector)
208
+ }));
209
+ }
210
+
211
+ /**
212
+ * Get vector count
213
+ */
214
+ getVectorCount() {
215
+ if (!this.db) return 0;
216
+ const result = this.db.prepare('SELECT COUNT(*) as count FROM embeddings').get();
217
+ return result.count;
218
+ }
219
+
220
+ /**
221
+ * Get unique file count
222
+ */
223
+ getFileCount() {
224
+ if (!this.db) return 0;
225
+ const result = this.db.prepare('SELECT COUNT(DISTINCT file) as count FROM embeddings').get();
226
+ return result.count;
227
+ }
228
+
229
+ /**
230
+ * Add chunk to store with batch optimization
231
+ */
232
+ addToStore(chunk) {
233
+ if (!this.db) return;
234
+
235
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
236
+ const stmt = this.db.prepare(`
237
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
238
+ VALUES (?, ?, ?, ?, ?, ?)
239
+ `);
240
+
241
+ stmt.run(
242
+ chunk.file,
243
+ chunk.startLine,
244
+ chunk.endLine,
245
+ chunk.content,
246
+ vectorBuffer,
247
+ Date.now()
248
+ );
249
+ }
250
+
251
+ /**
252
+ * Add multiple chunks in a transaction (much faster)
253
+ */
254
+ addBatchToStore(chunks) {
255
+ if (!this.db || chunks.length === 0) return;
256
+
257
+ const stmt = this.db.prepare(`
258
+ INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
259
+ VALUES (?, ?, ?, ?, ?, ?)
260
+ `);
261
+
262
+ const transaction = this.db.transaction(() => {
263
+ const now = Date.now();
264
+ for (const chunk of chunks) {
265
+ const vectorBuffer = this.vectorToBuffer(chunk.vector);
266
+ stmt.run(
267
+ chunk.file,
268
+ chunk.startLine,
269
+ chunk.endLine,
270
+ chunk.content,
271
+ vectorBuffer,
272
+ now
273
+ );
274
+ }
275
+ });
276
+
277
+ transaction();
278
+ }
279
+
280
+ /**
281
+ * Remove all chunks for a specific file
282
+ */
283
+ removeFileFromStore(file) {
284
+ if (!this.db) return;
285
+
286
+ const stmt = this.db.prepare('DELETE FROM embeddings WHERE file = ?');
287
+ stmt.run(file);
288
+ }
289
+
290
+ /**
291
+ * Get file hash
292
+ */
293
+ getFileHash(file) {
294
+ if (!this.db) return null;
295
+
296
+ const stmt = this.db.prepare('SELECT hash FROM file_hashes WHERE file = ?');
297
+ const row = stmt.get(file);
298
+ return row ? row.hash : null;
299
+ }
300
+
301
+ /**
302
+ * Set file hash
303
+ */
304
+ setFileHash(file, hash) {
305
+ if (!this.db) return;
306
+
307
+ const stmt = this.db.prepare(`
308
+ INSERT OR REPLACE INTO file_hashes (file, hash, indexed_at)
309
+ VALUES (?, ?, ?)
310
+ `);
311
+ stmt.run(file, hash, Date.now());
312
+ }
313
+
314
+ /**
315
+ * Delete file hash
316
+ */
317
+ deleteFileHash(file) {
318
+ if (!this.db) return;
319
+
320
+ const stmt = this.db.prepare('DELETE FROM file_hashes WHERE file = ?');
321
+ stmt.run(file);
322
+ }
323
+
324
+ /**
325
+ * Get all file hashes as Map
326
+ */
327
+ getAllFileHashes() {
328
+ if (!this.db) return new Map();
329
+
330
+ const stmt = this.db.prepare('SELECT file, hash FROM file_hashes');
331
+ const rows = stmt.all();
332
+ return new Map(rows.map(row => [row.file, row.hash]));
333
+ }
334
+
335
+ /**
336
+ * Save (checkpoint WAL for durability)
337
+ * With SQLite, writes are already persisted, this just checkpoints the WAL
338
+ */
339
+ async save() {
340
+ if (!this.config.enableCache || !this.db) return;
341
+
342
+ this.isSaving = true;
343
+
344
+ try {
345
+ // Checkpoint WAL to ensure durability
346
+ this.db.pragma('wal_checkpoint(PASSIVE)');
347
+ } catch (error) {
348
+ console.error('[Cache] Failed to checkpoint WAL:', error.message);
349
+ } finally {
350
+ this.isSaving = false;
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Incremental save during indexing (no-op for SQLite, already persisted)
356
+ */
357
+ async saveIncremental() {
358
+ // SQLite writes are already persisted due to WAL mode
359
+ // This is a no-op but kept for API compatibility
360
+ return;
361
+ }
362
+
363
+ /**
364
+ * Clear all cache data
365
+ */
366
+ async clear() {
367
+ if (!this.config.enableCache) return;
368
+
369
+ try {
370
+ if (this.db) {
371
+ this.db.close();
372
+ this.db = null;
373
+ }
374
+
375
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
376
+ console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
377
+ } catch (error) {
378
+ console.error('[Cache] Failed to clear cache:', error.message);
379
+ throw error;
380
+ }
381
+ }
382
+
383
+ /**
384
+ * Close database connection
385
+ */
386
+ close() {
387
+ if (this.db) {
388
+ this.db.close();
389
+ this.db = null;
390
+ }
391
+ }
392
+
393
+ /**
394
+ * Set vector store (for compatibility with test code)
395
+ * This is less efficient than batch operations but kept for compatibility
396
+ */
397
+ setVectorStore(store) {
398
+ if (!this.db) return;
399
+
400
+ // Clear existing data
401
+ this.db.exec('DELETE FROM embeddings');
402
+
403
+ // Insert new data
404
+ if (store.length > 0) {
405
+ this.addBatchToStore(store);
406
+ }
407
+ }
408
+ }
package/lib/tokenizer.js CHANGED
@@ -10,6 +10,10 @@
10
10
  * Each model has its own maximum sequence length
11
11
  */
12
12
  export const MODEL_TOKEN_LIMITS = {
13
+ // MRL / Nomic models (longer context)
14
+ "nomic-ai/nomic-embed-text-v1.5": 8192,
15
+ "nomic-ai/nomic-embed-text-v1": 2048,
16
+
13
17
  // Sentence Transformers / MiniLM family
14
18
  "Xenova/all-MiniLM-L6-v2": 256,
15
19
  "Xenova/all-MiniLM-L12-v2": 256,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-coding-mcp",
3
- "version": "1.4.1",
3
+ "version": "2.1.0",
4
4
  "description": "An extensible MCP server that enhances coding productivity with AI-powered features including semantic code search, intelligent indexing, and more, using local LLMs",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -45,11 +45,14 @@
45
45
  "homepage": "https://github.com/omar-haris/smart-coding-mcp#readme",
46
46
  "license": "MIT",
47
47
  "dependencies": {
48
+ "@huggingface/transformers": "^3.8.1",
48
49
  "@modelcontextprotocol/sdk": "^1.0.4",
49
- "@xenova/transformers": "^2.17.2",
50
+ "better-sqlite3": "^11.7.0",
50
51
  "chokidar": "^3.5.3",
52
+ "fastembed": "^2.1.0",
51
53
  "fdir": "^6.5.0",
52
- "glob": "^10.3.10"
54
+ "glob": "^10.3.10",
55
+ "web-tree-sitter": "^0.24.6"
53
56
  },
54
57
  "engines": {
55
58
  "node": ">=18.0.0"
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Tests for AST Chunker
3
+ *
4
+ * Tests the AST-based code chunking functionality:
5
+ * - Tree-sitter initialization
6
+ * - Language detection
7
+ * - Semantic chunking vs smart chunking fallback
8
+ */
9
+
10
+ import { describe, it, expect, beforeAll } from 'vitest';
11
+ import { ASTChunker, getChunker } from '../lib/ast-chunker.js';
12
+ import { loadConfig } from '../lib/config.js';
13
+
14
+ describe('AST Chunker', () => {
15
+ let config;
16
+
17
+ beforeAll(async () => {
18
+ config = await loadConfig();
19
+ });
20
+
21
+ describe('Chunker Factory', () => {
22
+ it('should return AST chunker when mode is ast', () => {
23
+ const chunker = getChunker({ ...config, chunkingMode: 'ast' });
24
+ expect(chunker).toBeInstanceOf(ASTChunker);
25
+ });
26
+
27
+ it('should return smart chunker wrapper when mode is smart', () => {
28
+ const chunker = getChunker({ ...config, chunkingMode: 'smart' });
29
+ expect(typeof chunker.chunk).toBe('function');
30
+ expect(chunker).not.toBeInstanceOf(ASTChunker);
31
+ });
32
+ });
33
+
34
+ describe('Language Detection', () => {
35
+ it('should detect JavaScript files', () => {
36
+ const chunker = new ASTChunker(config);
37
+ expect(chunker.getLanguageForFile('test.js')).toBe('javascript');
38
+ expect(chunker.getLanguageForFile('test.mjs')).toBe('javascript');
39
+ expect(chunker.getLanguageForFile('test.jsx')).toBe('javascript');
40
+ });
41
+
42
+ it('should detect TypeScript files', () => {
43
+ const chunker = new ASTChunker(config);
44
+ expect(chunker.getLanguageForFile('test.ts')).toBe('typescript');
45
+ expect(chunker.getLanguageForFile('test.tsx')).toBe('typescript');
46
+ });
47
+
48
+ it('should detect Python files', () => {
49
+ const chunker = new ASTChunker(config);
50
+ expect(chunker.getLanguageForFile('test.py')).toBe('python');
51
+ });
52
+
53
+ it('should return null for unsupported files', () => {
54
+ const chunker = new ASTChunker(config);
55
+ expect(chunker.getLanguageForFile('test.sql')).toBeNull();
56
+ expect(chunker.getLanguageForFile('test.md')).toBeNull();
57
+ });
58
+ });
59
+
60
+ describe('Fallback Behavior', () => {
61
+ it('should fall back to smart chunking for unsupported languages', async () => {
62
+ const chunker = new ASTChunker(config);
63
+ const sqlContent = 'SELECT * FROM users WHERE id = 1;';
64
+
65
+ const chunks = await chunker.chunk(sqlContent, 'query.sql');
66
+ expect(Array.isArray(chunks)).toBe(true);
67
+ });
68
+
69
+ it('should handle empty content', async () => {
70
+ const chunker = new ASTChunker(config);
71
+ const chunks = await chunker.chunk('', 'empty.js');
72
+ expect(Array.isArray(chunks)).toBe(true);
73
+ });
74
+ });
75
+
76
+ describe('JavaScript Chunking', () => {
77
+ it('should chunk JavaScript functions', async () => {
78
+ const chunker = new ASTChunker(config);
79
+ const jsCode = `
80
+ function add(a, b) {
81
+ return a + b;
82
+ }
83
+
84
+ function multiply(a, b) {
85
+ return a * b;
86
+ }
87
+
88
+ class Calculator {
89
+ constructor() {
90
+ this.result = 0;
91
+ }
92
+
93
+ add(n) {
94
+ this.result += n;
95
+ return this;
96
+ }
97
+ }
98
+ `;
99
+
100
+ const chunks = await chunker.chunk(jsCode, 'calc.js');
101
+ expect(Array.isArray(chunks)).toBe(true);
102
+ // Should have found some chunks (exact number depends on Tree-sitter grammar availability)
103
+ });
104
+ });
105
+ });
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Tests for Device Detection
3
+ *
4
+ * Tests device detection and configuration:
5
+ * - CPU fallback detection
6
+ * - SMART_CODING_DEVICE env var handling
7
+ * - Config device option
8
+ */
9
+
10
+ import { describe, it, expect, beforeAll, afterEach } from 'vitest';
11
+ import { loadConfig, DEFAULT_CONFIG } from '../lib/config.js';
12
+
13
+ describe('Device Detection', () => {
14
+ const originalEnv = process.env;
15
+
16
+ afterEach(() => {
17
+ // Restore original environment
18
+ process.env = { ...originalEnv };
19
+ });
20
+
21
+ describe('Default Configuration', () => {
22
+ it('should default to cpu device', () => {
23
+ expect(DEFAULT_CONFIG.device).toBe('cpu');
24
+ });
25
+
26
+ it('should have valid device options', () => {
27
+ const validDevices = ['cpu', 'webgpu', 'auto'];
28
+ expect(validDevices).toContain(DEFAULT_CONFIG.device);
29
+ });
30
+ });
31
+
32
+ describe('Environment Variable Override', () => {
33
+ it('should accept cpu device from env', async () => {
34
+ process.env.SMART_CODING_DEVICE = 'cpu';
35
+ const config = await loadConfig();
36
+ expect(config.device).toBe('cpu');
37
+ });
38
+
39
+ it('should accept webgpu device from env', async () => {
40
+ process.env.SMART_CODING_DEVICE = 'webgpu';
41
+ const config = await loadConfig();
42
+ expect(config.device).toBe('webgpu');
43
+ });
44
+
45
+ it('should accept auto device from env', async () => {
46
+ process.env.SMART_CODING_DEVICE = 'auto';
47
+ const config = await loadConfig();
48
+ expect(config.device).toBe('auto');
49
+ });
50
+
51
+ it('should reject invalid device values', async () => {
52
+ process.env.SMART_CODING_DEVICE = 'invalid';
53
+ const config = await loadConfig();
54
+ // Should fall back to default
55
+ expect(config.device).toBe(DEFAULT_CONFIG.device);
56
+ });
57
+
58
+ it('should be case-insensitive', async () => {
59
+ process.env.SMART_CODING_DEVICE = 'CPU';
60
+ const config = await loadConfig();
61
+ expect(config.device).toBe('cpu');
62
+ });
63
+ });
64
+
65
+ describe('Embedding Dimension Config', () => {
66
+ it('should default to 256 dimensions', () => {
67
+ expect(DEFAULT_CONFIG.embeddingDimension).toBe(256);
68
+ });
69
+
70
+ it('should accept valid dimensions from env', async () => {
71
+ process.env.SMART_CODING_EMBEDDING_DIMENSION = '512';
72
+ const config = await loadConfig();
73
+ expect(config.embeddingDimension).toBe(512);
74
+ });
75
+
76
+ it('should accept all valid dimensions', async () => {
77
+ for (const dim of [64, 128, 256, 512, 768]) {
78
+ process.env.SMART_CODING_EMBEDDING_DIMENSION = String(dim);
79
+ const config = await loadConfig();
80
+ expect(config.embeddingDimension).toBe(dim);
81
+ }
82
+ });
83
+
84
+ it('should reject invalid dimensions', async () => {
85
+ process.env.SMART_CODING_EMBEDDING_DIMENSION = '100';
86
+ const config = await loadConfig();
87
+ expect(config.embeddingDimension).toBe(DEFAULT_CONFIG.embeddingDimension);
88
+ });
89
+ });
90
+
91
+ describe('Chunking Mode Config', () => {
92
+ it('should default to smart chunking', () => {
93
+ expect(DEFAULT_CONFIG.chunkingMode).toBe('smart');
94
+ });
95
+
96
+ it('should accept valid modes from env', async () => {
97
+ for (const mode of ['smart', 'ast', 'line']) {
98
+ process.env.SMART_CODING_CHUNKING_MODE = mode;
99
+ const config = await loadConfig();
100
+ expect(config.chunkingMode).toBe(mode);
101
+ }
102
+ });
103
+
104
+ it('should reject invalid modes', async () => {
105
+ process.env.SMART_CODING_CHUNKING_MODE = 'invalid';
106
+ const config = await loadConfig();
107
+ expect(config.chunkingMode).toBe(DEFAULT_CONFIG.chunkingMode);
108
+ });
109
+ });
110
+ });