npm - smart-coding-mcp - Versions diffs - 1.3.0 → 1.3.1 - Mend

smart-coding-mcp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +6 -1
package/features/clear-cache.js +30 -7
package/features/index-codebase.js +66 -13
package/index.js +1 -1
package/lib/cache.js +5 -0
package/lib/config.js +2 -1
package/package.json +6 -1
package/test/clear-cache.test.js +288 -0
package/test/embedding-model.test.js +230 -0
package/test/helpers.js +128 -0
package/test/hybrid-search.test.js +243 -0
package/test/index-codebase.test.js +246 -0
package/test/integration.test.js +223 -0
package/test/tokenizer.test.js +225 -0
package/vitest.config.js +29 -0

package/test/embedding-model.test.js ADDED Viewed

@@ -0,0 +1,230 @@
+/**
+ * Tests for Local LLM (Embedding Model)
+ *
+ * Tests the embedding model functionality including:
+ * - Model loading
+ * - Embedding generation
+ * - Vector properties
+ * - Similarity calculations
+ */
+import { describe, it, expect, beforeAll } from 'vitest';
+import { pipeline } from '@xenova/transformers';
+import { cosineSimilarity } from '../lib/utils.js';
+import { loadConfig } from '../lib/config.js';
+describe('Local Embedding Model', () => {
+  let embedder;
+  let config;
+  beforeAll(async () => {
+    config = await loadConfig();
+    console.log(`[Test] Loading embedding model: ${config.embeddingModel}`);
+    embedder = await pipeline('feature-extraction', config.embeddingModel);
+    console.log('[Test] Embedding model loaded successfully');
+  });
+  describe('Model Loading', () => {
+    it('should load the embedding model', () => {
+      expect(embedder).toBeDefined();
+      expect(typeof embedder).toBe('function');
+    });
+    it('should use the configured model', () => {
+      expect(config.embeddingModel).toBe('Xenova/all-MiniLM-L6-v2');
+    });
+  });
+  describe('Embedding Generation', () => {
+    it('should generate embeddings for text', async () => {
+      const text = 'Hello, world!';
+      const output = await embedder(text, { pooling: 'mean', normalize: true });
+      expect(output).toBeDefined();
+      expect(output.data).toBeDefined();
+    });
+    it('should return vectors of correct dimensions', async () => {
+      const text = 'Test input for embedding';
+      const output = await embedder(text, { pooling: 'mean', normalize: true });
+      const vector = Array.from(output.data);
+      // MiniLM-L6 produces 384-dimensional vectors
+      expect(vector.length).toBe(384);
+    });
+    it('should return normalized vectors', async () => {
+      const text = 'Normalized vector test';
+      const output = await embedder(text, { pooling: 'mean', normalize: true });
+      const vector = Array.from(output.data);
+      // Calculate magnitude (should be ~1 for normalized vectors)
+      const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
+      expect(magnitude).toBeCloseTo(1, 4);
+    });
+    it('should generate different embeddings for different text', async () => {
+      const output1 = await embedder('apple fruit', { pooling: 'mean', normalize: true });
+      const output2 = await embedder('programming code', { pooling: 'mean', normalize: true });
+      const vector1 = Array.from(output1.data);
+      const vector2 = Array.from(output2.data);
+      // Vectors should be different
+      const areSame = vector1.every((v, i) => Math.abs(v - vector2[i]) < 0.0001);
+      expect(areSame).toBe(false);
+    });
+    it('should handle code snippets', async () => {
+      const code = `
+        function add(a, b) {
+          return a + b;
+        }
+      `;
+      const output = await embedder(code, { pooling: 'mean', normalize: true });
+      const vector = Array.from(output.data);
+      expect(vector.length).toBe(384);
+    });
+    it('should handle multiline text', async () => {
+      const multiline = 'Line one\nLine two\nLine three';
+      const output = await embedder(multiline, { pooling: 'mean', normalize: true });
+      const vector = Array.from(output.data);
+      expect(vector.length).toBe(384);
+    });
+    it('should handle special characters', async () => {
+      const special = '{}[]()<>!@#$%^&*';
+      const output = await embedder(special, { pooling: 'mean', normalize: true });
+      const vector = Array.from(output.data);
+      expect(vector.length).toBe(384);
+    });
+  });
+  describe('Semantic Similarity', () => {
+    it('should give high similarity for semantically similar text', async () => {
+      const output1 = await embedder('user authentication login', { pooling: 'mean', normalize: true });
+      const output2 = await embedder('user login authentication', { pooling: 'mean', normalize: true });
+      const vector1 = Array.from(output1.data);
+      const vector2 = Array.from(output2.data);
+      const similarity = cosineSimilarity(vector1, vector2);
+      // Same words, different order - should be very similar
+      expect(similarity).toBeGreaterThan(0.9);
+    });
+    it('should give lower similarity for different topics', async () => {
+      const output1 = await embedder('database query SQL', { pooling: 'mean', normalize: true });
+      const output2 = await embedder('pizza delivery food', { pooling: 'mean', normalize: true });
+      const vector1 = Array.from(output1.data);
+      const vector2 = Array.from(output2.data);
+      const similarity = cosineSimilarity(vector1, vector2);
+      // Different topics - should have low similarity
+      expect(similarity).toBeLessThan(0.5);
+    });
+    it('should capture code semantic similarity', async () => {
+      const output1 = await embedder('function that handles user login', { pooling: 'mean', normalize: true });
+      const output2 = await embedder('async authenticate(user, password)', { pooling: 'mean', normalize: true });
+      const output3 = await embedder('function to sort array elements', { pooling: 'mean', normalize: true });
+      const v1 = Array.from(output1.data);
+      const v2 = Array.from(output2.data);
+      const v3 = Array.from(output3.data);
+      const sim12 = cosineSimilarity(v1, v2); // login-related
+      const sim13 = cosineSimilarity(v1, v3); // login vs sorting
+      // Login concepts should be more similar to each other than to sorting
+      expect(sim12).toBeGreaterThan(sim13);
+    });
+    it('should recognize programming language constructs', async () => {
+      const output1 = await embedder('import React from "react"', { pooling: 'mean', normalize: true });
+      const output2 = await embedder('import Vue from "vue"', { pooling: 'mean', normalize: true });
+      const output3 = await embedder('The weather is sunny today', { pooling: 'mean', normalize: true });
+      const v1 = Array.from(output1.data);
+      const v2 = Array.from(output2.data);
+      const v3 = Array.from(output3.data);
+      const sim12 = cosineSimilarity(v1, v2); // Both imports
+      const sim13 = cosineSimilarity(v1, v3); // Import vs weather
+      // Import statements should be more similar to each other
+      expect(sim12).toBeGreaterThan(sim13);
+    });
+  });
+  describe('Cosine Similarity Function', () => {
+    it('should return 1 for identical vectors', () => {
+      const vector = [0.1, 0.2, 0.3, 0.4, 0.5];
+      expect(cosineSimilarity(vector, vector)).toBeCloseTo(1, 5);
+    });
+    it('should return -1 for opposite vectors', () => {
+      const vector1 = [1, 0, 0];
+      const vector2 = [-1, 0, 0];
+      expect(cosineSimilarity(vector1, vector2)).toBeCloseTo(-1, 5);
+    });
+    it('should return 0 for orthogonal vectors', () => {
+      const vector1 = [1, 0, 0];
+      const vector2 = [0, 1, 0];
+      expect(cosineSimilarity(vector1, vector2)).toBeCloseTo(0, 5);
+    });
+    it('should handle high-dimensional vectors', () => {
+      const dim = 384;
+      const vector1 = Array(dim).fill(0).map(() => Math.random());
+      const vector2 = Array(dim).fill(0).map(() => Math.random());
+      const similarity = cosineSimilarity(vector1, vector2);
+      expect(similarity).toBeGreaterThanOrEqual(-1);
+      expect(similarity).toBeLessThanOrEqual(1);
+    });
+  });
+  describe('Performance', () => {
+    it('should generate embeddings in reasonable time', async () => {
+      const text = 'This is a test sentence for measuring embedding generation speed.';
+      const start = Date.now();
+      await embedder(text, { pooling: 'mean', normalize: true });
+      const duration = Date.now() - start;
+      // Should be fast (under 500ms for single embedding)
+      expect(duration).toBeLessThan(500);
+    });
+    it('should handle multiple sequential embeddings', async () => {
+      const texts = [
+        'First test input',
+        'Second test input',
+        'Third test input',
+        'Fourth test input',
+        'Fifth test input'
+      ];
+      const start = Date.now();
+      for (const text of texts) {
+        await embedder(text, { pooling: 'mean', normalize: true });
+      }
+      const duration = Date.now() - start;
+      // 5 embeddings should complete in reasonable time
+      expect(duration).toBeLessThan(2500);
+      console.log(`[Test] 5 embeddings generated in ${duration}ms (${(duration/5).toFixed(0)}ms avg)`);
+    });
+  });
+});

package/test/helpers.js ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Test helper utilities for Smart Coding MCP tests
+ * Provides shared setup, teardown, and mock utilities
+ */
+import { loadConfig } from '../lib/config.js';
+import { EmbeddingsCache } from '../lib/cache.js';
+import { CodebaseIndexer } from '../features/index-codebase.js';
+import { CacheClearer } from '../features/clear-cache.js';
+import { HybridSearch } from '../features/hybrid-search.js';
+import { pipeline } from '@xenova/transformers';
+import fs from 'fs/promises';
+import path from 'path';
+// Cached embedder instance (shared across tests for speed)
+let sharedEmbedder = null;
+/**
+ * Get or initialize the shared embedder instance
+ * Loading the model once and reusing saves significant time
+ */
+export async function getEmbedder(config) {
+  if (!sharedEmbedder) {
+    console.log('[TestHelper] Loading embedding model (first time)...');
+    sharedEmbedder = await pipeline('feature-extraction', config.embeddingModel);
+    console.log('[TestHelper] Embedding model loaded');
+  }
+  return sharedEmbedder;
+}
+/**
+ * Create test fixtures with initialized components
+ * @param {Object} options - Options for fixture creation
+ * @returns {Object} Initialized components for testing
+ */
+export async function createTestFixtures(options = {}) {
+  const config = await loadConfig();
+  // Override config for testing if needed
+  if (options.verbose !== undefined) config.verbose = options.verbose;
+  if (options.workerThreads !== undefined) config.workerThreads = options.workerThreads;
+  const embedder = await getEmbedder(config);
+  const cache = new EmbeddingsCache(config);
+  await cache.load();
+  const indexer = new CodebaseIndexer(embedder, cache, config, null);
+  const cacheClearer = new CacheClearer(embedder, cache, config, indexer);
+  const hybridSearch = new HybridSearch(embedder, cache, config);
+  return {
+    config,
+    embedder,
+    cache,
+    indexer,
+    cacheClearer,
+    hybridSearch
+  };
+}
+/**
+ * Clean up test resources
+ * @param {Object} fixtures - Test fixtures to clean up
+ */
+export async function cleanupFixtures(fixtures) {
+  if (fixtures.indexer) {
+    fixtures.indexer.terminateWorkers();
+    if (fixtures.indexer.watcher) {
+      await fixtures.indexer.watcher.close();
+    }
+  }
+}
+/**
+ * Clear the cache directory for a clean test state
+ * @param {Object} config - Configuration object
+ */
+export async function clearTestCache(config) {
+  try {
+    await fs.rm(config.cacheDirectory, { recursive: true, force: true });
+  } catch (err) {
+    // Ignore if doesn't exist
+  }
+}
+/**
+ * Create a mock MCP request object
+ * @param {string} toolName - Tool name
+ * @param {Object} args - Tool arguments
+ * @returns {Object} Mock request object
+ */
+export function createMockRequest(toolName, args = {}) {
+  return {
+    params: {
+      name: toolName,
+      arguments: args
+    }
+  };
+}
+/**
+ * Wait for a condition with timeout
+ * @param {Function} condition - Async function returning boolean
+ * @param {number} timeout - Max wait time in ms
+ * @param {number} interval - Check interval in ms
+ * @returns {boolean} Whether condition was met
+ */
+export async function waitFor(condition, timeout = 5000, interval = 100) {
+  const start = Date.now();
+  while (Date.now() - start < timeout) {
+    if (await condition()) return true;
+    await new Promise(resolve => setTimeout(resolve, interval));
+  }
+  return false;
+}
+/**
+ * Measure execution time of an async function
+ * @param {Function} fn - Async function to measure
+ * @returns {Object} Result and duration
+ */
+export async function measureTime(fn) {
+  const start = Date.now();
+  const result = await fn();
+  const duration = Date.now() - start;
+  return { result, duration };
+}

package/test/hybrid-search.test.js ADDED Viewed

@@ -0,0 +1,243 @@
+/**
+ * Tests for HybridSearch feature
+ *
+ * Tests the search functionality including:
+ * - Semantic search with embeddings
+ * - Exact match boosting
+ * - Result formatting
+ * - Empty index handling
+ * - Score calculation
+ */
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createTestFixtures,
+  cleanupFixtures,
+  clearTestCache,
+  createMockRequest
+} from './helpers.js';
+import * as HybridSearchFeature from '../features/hybrid-search.js';
+import { HybridSearch } from '../features/hybrid-search.js';
+describe('HybridSearch', () => {
+  let fixtures;
+  beforeAll(async () => {
+    fixtures = await createTestFixtures({ workerThreads: 2 });
+    // Ensure we have indexed content
+    await clearTestCache(fixtures.config);
+    fixtures.cache.setVectorStore([]);
+    fixtures.cache.fileHashes = new Map();
+    await fixtures.indexer.indexAll(true);
+  });
+  afterAll(async () => {
+    await cleanupFixtures(fixtures);
+  });
+  describe('Search Functionality', () => {
+    it('should find relevant code for semantic queries', async () => {
+      // Search for something that should exist in the codebase
+      const { results, message } = await fixtures.hybridSearch.search('embedding model', 5);
+      expect(message).toBeNull();
+      expect(results.length).toBeGreaterThan(0);
+      // Results should have required properties
+      for (const result of results) {
+        expect(result).toHaveProperty('file');
+        expect(result).toHaveProperty('content');
+        expect(result).toHaveProperty('score');
+        expect(result).toHaveProperty('startLine');
+        expect(result).toHaveProperty('endLine');
+        expect(result).toHaveProperty('vector');
+      }
+    });
+    it('should return results sorted by score (highest first)', async () => {
+      const { results } = await fixtures.hybridSearch.search('function', 10);
+      expect(results.length).toBeGreaterThan(1);
+      // Verify descending order
+      for (let i = 1; i < results.length; i++) {
+        expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
+      }
+    });
+    it('should respect maxResults parameter', async () => {
+      const maxResults = 3;
+      const { results } = await fixtures.hybridSearch.search('const', maxResults);
+      expect(results.length).toBeLessThanOrEqual(maxResults);
+    });
+    it('should boost exact matches', async () => {
+      // Search for an exact term that exists
+      const { results: exactResults } = await fixtures.hybridSearch.search('embedder', 5);
+      // At least one result should contain the exact term
+      const hasExactMatch = exactResults.some(r =>
+        r.content.toLowerCase().includes('embedder')
+      );
+      expect(hasExactMatch).toBe(true);
+    });
+    it('should handle natural language queries', async () => {
+      const { results } = await fixtures.hybridSearch.search('where is the configuration loaded', 5);
+      expect(results.length).toBeGreaterThan(0);
+    });
+  });
+  describe('Empty Index Handling', () => {
+    it('should return helpful message when index is empty', async () => {
+      // Create a search instance with empty cache
+      const emptyCache = {
+        getVectorStore: () => [],
+        setVectorStore: () => {},
+        getFileHash: () => null,
+        setFileHash: () => {}
+      };
+      const emptySearch = new HybridSearch(fixtures.embedder, emptyCache, fixtures.config);
+      const { results, message } = await emptySearch.search('test', 5);
+      expect(results.length).toBe(0);
+      expect(message).toContain('No code has been indexed');
+    });
+  });
+  describe('Result Formatting', () => {
+    it('should format results as markdown', async () => {
+      const { results } = await fixtures.hybridSearch.search('function', 3);
+      const formatted = fixtures.hybridSearch.formatResults(results);
+      // Should contain markdown elements
+      expect(formatted).toContain('## Result');
+      expect(formatted).toContain('**File:**');
+      expect(formatted).toContain('**Lines:**');
+      expect(formatted).toContain('```');
+      expect(formatted).toContain('Relevance:');
+    });
+    it('should return no matches message for empty results', () => {
+      const formatted = fixtures.hybridSearch.formatResults([]);
+      expect(formatted).toContain('No matching code found');
+    });
+    it('should include relative file paths', async () => {
+      const { results } = await fixtures.hybridSearch.search('export', 1);
+      const formatted = fixtures.hybridSearch.formatResults(results);
+      // Should not contain absolute paths in the output
+      expect(formatted).not.toContain(fixtures.config.searchDirectory);
+    });
+  });
+  describe('Score Calculation', () => {
+    it('should give higher scores to more relevant results', async () => {
+      // Search for a specific term
+      const { results } = await fixtures.hybridSearch.search('CodebaseIndexer', 5);
+      if (results.length > 0) {
+        // Top result should have high relevance
+        expect(results[0].score).toBeGreaterThan(0.3);
+      }
+    });
+    it('should apply semantic weight from config', async () => {
+      const { results } = await fixtures.hybridSearch.search('async function', 5);
+      // All results should have positive scores
+      for (const result of results) {
+        expect(result.score).toBeGreaterThan(0);
+      }
+    });
+  });
+});
+describe('Hybrid Search Tool Handler', () => {
+  let fixtures;
+  beforeAll(async () => {
+    fixtures = await createTestFixtures({ workerThreads: 2 });
+    // Ensure indexed content
+    await fixtures.indexer.indexAll(false);
+  });
+  afterAll(async () => {
+    await cleanupFixtures(fixtures);
+  });
+  describe('Tool Definition', () => {
+    it('should have correct tool definition', () => {
+      const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
+      expect(toolDef.name).toBe('a_semantic_search');
+      expect(toolDef.description).toContain('semantic');
+      expect(toolDef.description).toContain('hybrid');
+      expect(toolDef.inputSchema.properties.query).toBeDefined();
+      expect(toolDef.inputSchema.properties.maxResults).toBeDefined();
+      expect(toolDef.inputSchema.required).toContain('query');
+    });
+    it('should use config default for maxResults', () => {
+      const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
+      expect(toolDef.inputSchema.properties.maxResults.default).toBe(fixtures.config.maxResults);
+    });
+  });
+  describe('Tool Handler', () => {
+    it('should return search results for valid query', async () => {
+      const request = createMockRequest('a_semantic_search', {
+        query: 'function that handles indexing'
+      });
+      const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
+      expect(result.content[0].type).toBe('text');
+      expect(result.content[0].text).toContain('Result');
+    });
+    it('should use default maxResults when not provided', async () => {
+      const request = createMockRequest('a_semantic_search', {
+        query: 'import'
+      });
+      const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
+      // Should return results (up to default max)
+      expect(result.content[0].text.length).toBeGreaterThan(0);
+    });
+    it('should respect custom maxResults', async () => {
+      const request = createMockRequest('a_semantic_search', {
+        query: 'const',
+        maxResults: 2
+      });
+      const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
+      // Count result headers
+      const resultCount = (result.content[0].text.match(/## Result/g) || []).length;
+      expect(resultCount).toBeLessThanOrEqual(2);
+    });
+    it('should handle queries with no matches gracefully', async () => {
+      const request = createMockRequest('a_semantic_search', {
+        query: 'xyzzy_nonexistent_symbol_12345'
+      });
+      const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
+      // Should return something (either no matches message or low-score results)
+      expect(result.content[0].text.length).toBeGreaterThan(0);
+    });
+  });
+});