npm - smart-coding-mcp - Versions diffs - 1.4.1 → 2.1.0 - Mend

smart-coding-mcp 1.4.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +131 -31
package/config.json +7 -2
package/features/get-status.js +163 -0
package/features/hybrid-search.js +23 -4
package/features/index-codebase.js +145 -60
package/features/set-workspace.js +155 -0
package/index.js +152 -64
package/lib/ast-chunker.js +273 -0
package/lib/config.js +91 -2
package/lib/embedding-worker.js +29 -2
package/lib/mrl-embedder.js +133 -0
package/lib/resource-throttle.js +85 -0
package/lib/sqlite-cache.js +408 -0
package/lib/tokenizer.js +4 -0
package/package.json +6 -3
package/test/ast-chunker.test.js +105 -0
package/test/device-detection.test.js +110 -0
package/test/embedding-model.test.js +14 -11
package/test/helpers.js +3 -3
package/test/mrl-embedder.test.js +108 -0

package/lib/sqlite-cache.js ADDED Viewed

@@ -0,0 +1,408 @@
+import Database from 'better-sqlite3';
+import fs from 'fs/promises';
+import path from 'path';
+/**
+ * SQLite-based embeddings cache for fast, efficient storage
+ * Replaces JSON-based cache for better performance on large codebases
+ */
+export class SQLiteCache {
+  constructor(config) {
+    this.config = config;
+    this.db = null;
+    this.isSaving = false;
+    this.dbPath = path.join(config.cacheDirectory, 'embeddings.db');
+    // Track indexing status for progressive indexing
+    this.indexingStatus = {
+      inProgress: false,
+      totalFiles: 0,
+      processedFiles: 0,
+      percentage: 0
+    };
+  }
+  /**
+   * Initialize SQLite database and create schema
+   */
+  async load() {
+    if (!this.config.enableCache) return;
+    try {
+      // Ensure cache directory exists
+      await fs.mkdir(this.config.cacheDirectory, { recursive: true });
+      // Check if we need to migrate from JSON
+      const jsonCacheExists = await this.checkJSONCache();
+      // Open SQLite database
+      this.db = new Database(this.dbPath);
+      // Enable performance optimizations
+      this.db.pragma('journal_mode = WAL'); // Write-Ahead Logging for better concurrency
+      this.db.pragma('synchronous = NORMAL'); // Faster writes, still safe
+      this.db.pragma('cache_size = 10000'); // 10MB cache
+      this.db.pragma('temp_store = MEMORY'); // Temp tables in memory
+      // Create schema if not exists
+      this.createSchema();
+      // Migrate from JSON if needed
+      if (jsonCacheExists && this.getVectorCount() === 0) {
+        console.error('[Cache] Migrating from JSON to SQLite...');
+        await this.migrateFromJSON();
+      }
+      const count = this.getVectorCount();
+      const fileCount = this.getFileCount();
+      console.error(`[Cache] Loaded SQLite cache: ${count} embeddings from ${fileCount} files`);
+    } catch (error) {
+      console.error('[Cache] Failed to initialize SQLite cache:', error.message);
+      throw error;
+    }
+  }
+  /**
+   * Create database schema
+   */
+  createSchema() {
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS embeddings (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        file TEXT NOT NULL,
+        start_line INTEGER NOT NULL,
+        end_line INTEGER NOT NULL,
+        content TEXT NOT NULL,
+        vector BLOB NOT NULL,
+        indexed_at INTEGER NOT NULL
+      );
+      CREATE TABLE IF NOT EXISTS file_hashes (
+        file TEXT PRIMARY KEY,
+        hash TEXT NOT NULL,
+        indexed_at INTEGER NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS idx_file ON embeddings(file);
+      CREATE INDEX IF NOT EXISTS idx_indexed_at ON embeddings(indexed_at);
+    `);
+  }
+  /**
+   * Check if JSON cache exists
+   */
+  async checkJSONCache() {
+    try {
+      const jsonPath = path.join(this.config.cacheDirectory, 'embeddings.json');
+      await fs.access(jsonPath);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  /**
+   * Migrate from JSON cache to SQLite
+   */
+  async migrateFromJSON() {
+    try {
+      const jsonCachePath = path.join(this.config.cacheDirectory, 'embeddings.json');
+      const jsonHashPath = path.join(this.config.cacheDirectory, 'file-hashes.json');
+      const [cacheData, hashData] = await Promise.all([
+        fs.readFile(jsonCachePath, 'utf-8').catch(() => null),
+        fs.readFile(jsonHashPath, 'utf-8').catch(() => null)
+      ]);
+      if (!cacheData || !hashData) {
+        console.error('[Cache] No JSON cache found to migrate');
+        return;
+      }
+      const vectorStore = JSON.parse(cacheData);
+      const fileHashes = new Map(Object.entries(JSON.parse(hashData)));
+      console.error(`[Cache] Migrating ${vectorStore.length} embeddings...`);
+      // Use transaction for fast batch insert
+      const insertVector = this.db.prepare(`
+        INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
+        VALUES (?, ?, ?, ?, ?, ?)
+      `);
+      const insertHash = this.db.prepare(`
+        INSERT OR REPLACE INTO file_hashes (file, hash, indexed_at)
+        VALUES (?, ?, ?)
+      `);
+      const transaction = this.db.transaction(() => {
+        const now = Date.now();
+        for (const chunk of vectorStore) {
+          const vectorBuffer = this.vectorToBuffer(chunk.vector);
+          insertVector.run(
+            chunk.file,
+            chunk.startLine,
+            chunk.endLine,
+            chunk.content,
+            vectorBuffer,
+            now
+          );
+        }
+        for (const [file, hash] of fileHashes) {
+          insertHash.run(file, hash, now);
+        }
+      });
+      transaction();
+      console.error('[Cache] Migration complete! Backing up JSON files...');
+      // Backup old JSON files
+      await fs.rename(jsonCachePath, jsonCachePath + '.backup');
+      await fs.rename(jsonHashPath, jsonHashPath + '.backup');
+      console.error('[Cache] JSON cache backed up (you can delete .backup files if everything works)');
+    } catch (error) {
+      console.error('[Cache] Migration failed:', error.message);
+      throw error;
+    }
+  }
+  /**
+   * Convert Float32Array/Array to Buffer for SQLite storage
+   */
+  vectorToBuffer(vector) {
+    const float32 = new Float32Array(vector);
+    return Buffer.from(float32.buffer);
+  }
+  /**
+   * Convert Buffer back to Array for compatibility
+   */
+  bufferToVector(buffer) {
+    const float32 = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
+    return Array.from(float32);
+  }
+  /**
+   * Get all vectors from store (lazy loaded)
+   */
+  getVectorStore() {
+    if (!this.db) return [];
+    const stmt = this.db.prepare(`
+      SELECT file, start_line, end_line, content, vector
+      FROM embeddings
+      ORDER BY file, start_line
+    `);
+    const rows = stmt.all();
+    return rows.map(row => ({
+      file: row.file,
+      startLine: row.start_line,
+      endLine: row.end_line,
+      content: row.content,
+      vector: this.bufferToVector(row.vector)
+    }));
+  }
+  /**
+   * Get vector count
+   */
+  getVectorCount() {
+    if (!this.db) return 0;
+    const result = this.db.prepare('SELECT COUNT(*) as count FROM embeddings').get();
+    return result.count;
+  }
+  /**
+   * Get unique file count
+   */
+  getFileCount() {
+    if (!this.db) return 0;
+    const result = this.db.prepare('SELECT COUNT(DISTINCT file) as count FROM embeddings').get();
+    return result.count;
+  }
+  /**
+   * Add chunk to store with batch optimization
+   */
+  addToStore(chunk) {
+    if (!this.db) return;
+    const vectorBuffer = this.vectorToBuffer(chunk.vector);
+    const stmt = this.db.prepare(`
+      INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
+      VALUES (?, ?, ?, ?, ?, ?)
+    `);
+    stmt.run(
+      chunk.file,
+      chunk.startLine,
+      chunk.endLine,
+      chunk.content,
+      vectorBuffer,
+      Date.now()
+    );
+  }
+  /**
+   * Add multiple chunks in a transaction (much faster)
+   */
+  addBatchToStore(chunks) {
+    if (!this.db || chunks.length === 0) return;
+    const stmt = this.db.prepare(`
+      INSERT INTO embeddings (file, start_line, end_line, content, vector, indexed_at)
+      VALUES (?, ?, ?, ?, ?, ?)
+    `);
+    const transaction = this.db.transaction(() => {
+      const now = Date.now();
+      for (const chunk of chunks) {
+        const vectorBuffer = this.vectorToBuffer(chunk.vector);
+        stmt.run(
+          chunk.file,
+          chunk.startLine,
+          chunk.endLine,
+          chunk.content,
+          vectorBuffer,
+          now
+        );
+      }
+    });
+    transaction();
+  }
+  /**
+   * Remove all chunks for a specific file
+   */
+  removeFileFromStore(file) {
+    if (!this.db) return;
+    const stmt = this.db.prepare('DELETE FROM embeddings WHERE file = ?');
+    stmt.run(file);
+  }
+  /**
+   * Get file hash
+   */
+  getFileHash(file) {
+    if (!this.db) return null;
+    const stmt = this.db.prepare('SELECT hash FROM file_hashes WHERE file = ?');
+    const row = stmt.get(file);
+    return row ? row.hash : null;
+  }
+  /**
+   * Set file hash
+   */
+  setFileHash(file, hash) {
+    if (!this.db) return;
+    const stmt = this.db.prepare(`
+      INSERT OR REPLACE INTO file_hashes (file, hash, indexed_at)
+      VALUES (?, ?, ?)
+    `);
+    stmt.run(file, hash, Date.now());
+  }
+  /**
+   * Delete file hash
+   */
+  deleteFileHash(file) {
+    if (!this.db) return;
+    const stmt = this.db.prepare('DELETE FROM file_hashes WHERE file = ?');
+    stmt.run(file);
+  }
+  /**
+   * Get all file hashes as Map
+   */
+  getAllFileHashes() {
+    if (!this.db) return new Map();
+    const stmt = this.db.prepare('SELECT file, hash FROM file_hashes');
+    const rows = stmt.all();
+    return new Map(rows.map(row => [row.file, row.hash]));
+  }
+  /**
+   * Save (checkpoint WAL for durability)
+   * With SQLite, writes are already persisted, this just checkpoints the WAL
+   */
+  async save() {
+    if (!this.config.enableCache || !this.db) return;
+    this.isSaving = true;
+    try {
+      // Checkpoint WAL to ensure durability
+      this.db.pragma('wal_checkpoint(PASSIVE)');
+    } catch (error) {
+      console.error('[Cache] Failed to checkpoint WAL:', error.message);
+    } finally {
+      this.isSaving = false;
+    }
+  }
+  /**
+   * Incremental save during indexing (no-op for SQLite, already persisted)
+   */
+  async saveIncremental() {
+    // SQLite writes are already persisted due to WAL mode
+    // This is a no-op but kept for API compatibility
+    return;
+  }
+  /**
+   * Clear all cache data
+   */
+  async clear() {
+    if (!this.config.enableCache) return;
+    try {
+      if (this.db) {
+        this.db.close();
+        this.db = null;
+      }
+      await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
+      console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
+    } catch (error) {
+      console.error('[Cache] Failed to clear cache:', error.message);
+      throw error;
+    }
+  }
+  /**
+   * Close database connection
+   */
+  close() {
+    if (this.db) {
+      this.db.close();
+      this.db = null;
+    }
+  }
+  /**
+   * Set vector store (for compatibility with test code)
+   * This is less efficient than batch operations but kept for compatibility
+   */
+  setVectorStore(store) {
+    if (!this.db) return;
+    // Clear existing data
+    this.db.exec('DELETE FROM embeddings');
+    // Insert new data
+    if (store.length > 0) {
+      this.addBatchToStore(store);
+    }
+  }
+}

package/lib/tokenizer.js CHANGED Viewed

@@ -10,6 +10,10 @@
  * Each model has its own maximum sequence length
  */
 export const MODEL_TOKEN_LIMITS = {
+  // MRL / Nomic models (longer context)
+  "nomic-ai/nomic-embed-text-v1.5": 8192,
+  "nomic-ai/nomic-embed-text-v1": 2048,
   // Sentence Transformers / MiniLM family
   "Xenova/all-MiniLM-L6-v2": 256,
   "Xenova/all-MiniLM-L12-v2": 256,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smart-coding-mcp",
-  "version": "1.4.1",
+  "version": "2.1.0",
   "description": "An extensible MCP server that enhances coding productivity with AI-powered features including semantic code search, intelligent indexing, and more, using local LLMs",
   "type": "module",
   "main": "index.js",
@@ -45,11 +45,14 @@
   "homepage": "https://github.com/omar-haris/smart-coding-mcp#readme",
   "license": "MIT",
   "dependencies": {
+    "@huggingface/transformers": "^3.8.1",
     "@modelcontextprotocol/sdk": "^1.0.4",
-    "@xenova/transformers": "^2.17.2",
+    "better-sqlite3": "^11.7.0",
     "chokidar": "^3.5.3",
+    "fastembed": "^2.1.0",
     "fdir": "^6.5.0",
-    "glob": "^10.3.10"
+    "glob": "^10.3.10",
+    "web-tree-sitter": "^0.24.6"
   },
   "engines": {
     "node": ">=18.0.0"

package/test/ast-chunker.test.js ADDED Viewed

@@ -0,0 +1,105 @@
+/**
+ * Tests for AST Chunker
+ *
+ * Tests the AST-based code chunking functionality:
+ * - Tree-sitter initialization
+ * - Language detection
+ * - Semantic chunking vs smart chunking fallback
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { ASTChunker, getChunker } from '../lib/ast-chunker.js';
+import { loadConfig } from '../lib/config.js';
+describe('AST Chunker', () => {
+  let config;
+  beforeAll(async () => {
+    config = await loadConfig();
+  });
+  describe('Chunker Factory', () => {
+    it('should return AST chunker when mode is ast', () => {
+      const chunker = getChunker({ ...config, chunkingMode: 'ast' });
+      expect(chunker).toBeInstanceOf(ASTChunker);
+    });
+    it('should return smart chunker wrapper when mode is smart', () => {
+      const chunker = getChunker({ ...config, chunkingMode: 'smart' });
+      expect(typeof chunker.chunk).toBe('function');
+      expect(chunker).not.toBeInstanceOf(ASTChunker);
+    });
+  });
+  describe('Language Detection', () => {
+    it('should detect JavaScript files', () => {
+      const chunker = new ASTChunker(config);
+      expect(chunker.getLanguageForFile('test.js')).toBe('javascript');
+      expect(chunker.getLanguageForFile('test.mjs')).toBe('javascript');
+      expect(chunker.getLanguageForFile('test.jsx')).toBe('javascript');
+    });
+    it('should detect TypeScript files', () => {
+      const chunker = new ASTChunker(config);
+      expect(chunker.getLanguageForFile('test.ts')).toBe('typescript');
+      expect(chunker.getLanguageForFile('test.tsx')).toBe('typescript');
+    });
+    it('should detect Python files', () => {
+      const chunker = new ASTChunker(config);
+      expect(chunker.getLanguageForFile('test.py')).toBe('python');
+    });
+    it('should return null for unsupported files', () => {
+      const chunker = new ASTChunker(config);
+      expect(chunker.getLanguageForFile('test.sql')).toBeNull();
+      expect(chunker.getLanguageForFile('test.md')).toBeNull();
+    });
+  });
+  describe('Fallback Behavior', () => {
+    it('should fall back to smart chunking for unsupported languages', async () => {
+      const chunker = new ASTChunker(config);
+      const sqlContent = 'SELECT * FROM users WHERE id = 1;';
+      const chunks = await chunker.chunk(sqlContent, 'query.sql');
+      expect(Array.isArray(chunks)).toBe(true);
+    });
+    it('should handle empty content', async () => {
+      const chunker = new ASTChunker(config);
+      const chunks = await chunker.chunk('', 'empty.js');
+      expect(Array.isArray(chunks)).toBe(true);
+    });
+  });
+  describe('JavaScript Chunking', () => {
+    it('should chunk JavaScript functions', async () => {
+      const chunker = new ASTChunker(config);
+      const jsCode = `
+function add(a, b) {
+  return a + b;
+}
+function multiply(a, b) {
+  return a * b;
+}
+class Calculator {
+  constructor() {
+    this.result = 0;
+  }
+  add(n) {
+    this.result += n;
+    return this;
+  }
+}
+`;
+      const chunks = await chunker.chunk(jsCode, 'calc.js');
+      expect(Array.isArray(chunks)).toBe(true);
+      // Should have found some chunks (exact number depends on Tree-sitter grammar availability)
+    });
+  });
+});

package/test/device-detection.test.js ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * Tests for Device Detection
+ *
+ * Tests device detection and configuration:
+ * - CPU fallback detection
+ * - SMART_CODING_DEVICE env var handling
+ * - Config device option
+ */
+import { describe, it, expect, beforeAll, afterEach } from 'vitest';
+import { loadConfig, DEFAULT_CONFIG } from '../lib/config.js';
+describe('Device Detection', () => {
+  const originalEnv = process.env;
+  afterEach(() => {
+    // Restore original environment
+    process.env = { ...originalEnv };
+  });
+  describe('Default Configuration', () => {
+    it('should default to cpu device', () => {
+      expect(DEFAULT_CONFIG.device).toBe('cpu');
+    });
+    it('should have valid device options', () => {
+      const validDevices = ['cpu', 'webgpu', 'auto'];
+      expect(validDevices).toContain(DEFAULT_CONFIG.device);
+    });
+  });
+  describe('Environment Variable Override', () => {
+    it('should accept cpu device from env', async () => {
+      process.env.SMART_CODING_DEVICE = 'cpu';
+      const config = await loadConfig();
+      expect(config.device).toBe('cpu');
+    });
+    it('should accept webgpu device from env', async () => {
+      process.env.SMART_CODING_DEVICE = 'webgpu';
+      const config = await loadConfig();
+      expect(config.device).toBe('webgpu');
+    });
+    it('should accept auto device from env', async () => {
+      process.env.SMART_CODING_DEVICE = 'auto';
+      const config = await loadConfig();
+      expect(config.device).toBe('auto');
+    });
+    it('should reject invalid device values', async () => {
+      process.env.SMART_CODING_DEVICE = 'invalid';
+      const config = await loadConfig();
+      // Should fall back to default
+      expect(config.device).toBe(DEFAULT_CONFIG.device);
+    });
+    it('should be case-insensitive', async () => {
+      process.env.SMART_CODING_DEVICE = 'CPU';
+      const config = await loadConfig();
+      expect(config.device).toBe('cpu');
+    });
+  });
+  describe('Embedding Dimension Config', () => {
+    it('should default to 256 dimensions', () => {
+      expect(DEFAULT_CONFIG.embeddingDimension).toBe(256);
+    });
+    it('should accept valid dimensions from env', async () => {
+      process.env.SMART_CODING_EMBEDDING_DIMENSION = '512';
+      const config = await loadConfig();
+      expect(config.embeddingDimension).toBe(512);
+    });
+    it('should accept all valid dimensions', async () => {
+      for (const dim of [64, 128, 256, 512, 768]) {
+        process.env.SMART_CODING_EMBEDDING_DIMENSION = String(dim);
+        const config = await loadConfig();
+        expect(config.embeddingDimension).toBe(dim);
+      }
+    });
+    it('should reject invalid dimensions', async () => {
+      process.env.SMART_CODING_EMBEDDING_DIMENSION = '100';
+      const config = await loadConfig();
+      expect(config.embeddingDimension).toBe(DEFAULT_CONFIG.embeddingDimension);
+    });
+  });
+  describe('Chunking Mode Config', () => {
+    it('should default to smart chunking', () => {
+      expect(DEFAULT_CONFIG.chunkingMode).toBe('smart');
+    });
+    it('should accept valid modes from env', async () => {
+      for (const mode of ['smart', 'ast', 'line']) {
+        process.env.SMART_CODING_CHUNKING_MODE = mode;
+        const config = await loadConfig();
+        expect(config.chunkingMode).toBe(mode);
+      }
+    });
+    it('should reject invalid modes', async () => {
+      process.env.SMART_CODING_CHUNKING_MODE = 'invalid';
+      const config = await loadConfig();
+      expect(config.chunkingMode).toBe(DEFAULT_CONFIG.chunkingMode);
+    });
+  });
+});