smart-coding-mcp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Tests for Local LLM (Embedding Model)
3
+ *
4
+ * Tests the embedding model functionality including:
5
+ * - Model loading
6
+ * - Embedding generation
7
+ * - Vector properties
8
+ * - Similarity calculations
9
+ */
10
+
11
+ import { describe, it, expect, beforeAll } from 'vitest';
12
+ import { pipeline } from '@xenova/transformers';
13
+ import { cosineSimilarity } from '../lib/utils.js';
14
+ import { loadConfig } from '../lib/config.js';
15
+
16
+ describe('Local Embedding Model', () => {
17
+ let embedder;
18
+ let config;
19
+
20
+ beforeAll(async () => {
21
+ config = await loadConfig();
22
+ console.log(`[Test] Loading embedding model: ${config.embeddingModel}`);
23
+ embedder = await pipeline('feature-extraction', config.embeddingModel);
24
+ console.log('[Test] Embedding model loaded successfully');
25
+ });
26
+
27
+ describe('Model Loading', () => {
28
+ it('should load the embedding model', () => {
29
+ expect(embedder).toBeDefined();
30
+ expect(typeof embedder).toBe('function');
31
+ });
32
+
33
+ it('should use the configured model', () => {
34
+ expect(config.embeddingModel).toBe('Xenova/all-MiniLM-L6-v2');
35
+ });
36
+ });
37
+
38
+ describe('Embedding Generation', () => {
39
+ it('should generate embeddings for text', async () => {
40
+ const text = 'Hello, world!';
41
+ const output = await embedder(text, { pooling: 'mean', normalize: true });
42
+
43
+ expect(output).toBeDefined();
44
+ expect(output.data).toBeDefined();
45
+ });
46
+
47
+ it('should return vectors of correct dimensions', async () => {
48
+ const text = 'Test input for embedding';
49
+ const output = await embedder(text, { pooling: 'mean', normalize: true });
50
+ const vector = Array.from(output.data);
51
+
52
+ // MiniLM-L6 produces 384-dimensional vectors
53
+ expect(vector.length).toBe(384);
54
+ });
55
+
56
+ it('should return normalized vectors', async () => {
57
+ const text = 'Normalized vector test';
58
+ const output = await embedder(text, { pooling: 'mean', normalize: true });
59
+ const vector = Array.from(output.data);
60
+
61
+ // Calculate magnitude (should be ~1 for normalized vectors)
62
+ const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
63
+ expect(magnitude).toBeCloseTo(1, 4);
64
+ });
65
+
66
+ it('should generate different embeddings for different text', async () => {
67
+ const output1 = await embedder('apple fruit', { pooling: 'mean', normalize: true });
68
+ const output2 = await embedder('programming code', { pooling: 'mean', normalize: true });
69
+
70
+ const vector1 = Array.from(output1.data);
71
+ const vector2 = Array.from(output2.data);
72
+
73
+ // Vectors should be different
74
+ const areSame = vector1.every((v, i) => Math.abs(v - vector2[i]) < 0.0001);
75
+ expect(areSame).toBe(false);
76
+ });
77
+
78
+ it('should handle code snippets', async () => {
79
+ const code = `
80
+ function add(a, b) {
81
+ return a + b;
82
+ }
83
+ `;
84
+
85
+ const output = await embedder(code, { pooling: 'mean', normalize: true });
86
+ const vector = Array.from(output.data);
87
+
88
+ expect(vector.length).toBe(384);
89
+ });
90
+
91
+ it('should handle multiline text', async () => {
92
+ const multiline = 'Line one\nLine two\nLine three';
93
+ const output = await embedder(multiline, { pooling: 'mean', normalize: true });
94
+ const vector = Array.from(output.data);
95
+
96
+ expect(vector.length).toBe(384);
97
+ });
98
+
99
+ it('should handle special characters', async () => {
100
+ const special = '{}[]()<>!@#$%^&*';
101
+ const output = await embedder(special, { pooling: 'mean', normalize: true });
102
+ const vector = Array.from(output.data);
103
+
104
+ expect(vector.length).toBe(384);
105
+ });
106
+ });
107
+
108
+ describe('Semantic Similarity', () => {
109
+ it('should give high similarity for semantically similar text', async () => {
110
+ const output1 = await embedder('user authentication login', { pooling: 'mean', normalize: true });
111
+ const output2 = await embedder('user login authentication', { pooling: 'mean', normalize: true });
112
+
113
+ const vector1 = Array.from(output1.data);
114
+ const vector2 = Array.from(output2.data);
115
+
116
+ const similarity = cosineSimilarity(vector1, vector2);
117
+
118
+ // Same words, different order - should be very similar
119
+ expect(similarity).toBeGreaterThan(0.9);
120
+ });
121
+
122
+ it('should give lower similarity for different topics', async () => {
123
+ const output1 = await embedder('database query SQL', { pooling: 'mean', normalize: true });
124
+ const output2 = await embedder('pizza delivery food', { pooling: 'mean', normalize: true });
125
+
126
+ const vector1 = Array.from(output1.data);
127
+ const vector2 = Array.from(output2.data);
128
+
129
+ const similarity = cosineSimilarity(vector1, vector2);
130
+
131
+ // Different topics - should have low similarity
132
+ expect(similarity).toBeLessThan(0.5);
133
+ });
134
+
135
+ it('should capture code semantic similarity', async () => {
136
+ const output1 = await embedder('function that handles user login', { pooling: 'mean', normalize: true });
137
+ const output2 = await embedder('async authenticate(user, password)', { pooling: 'mean', normalize: true });
138
+ const output3 = await embedder('function to sort array elements', { pooling: 'mean', normalize: true });
139
+
140
+ const v1 = Array.from(output1.data);
141
+ const v2 = Array.from(output2.data);
142
+ const v3 = Array.from(output3.data);
143
+
144
+ const sim12 = cosineSimilarity(v1, v2); // login-related
145
+ const sim13 = cosineSimilarity(v1, v3); // login vs sorting
146
+
147
+ // Login concepts should be more similar to each other than to sorting
148
+ expect(sim12).toBeGreaterThan(sim13);
149
+ });
150
+
151
+ it('should recognize programming language constructs', async () => {
152
+ const output1 = await embedder('import React from "react"', { pooling: 'mean', normalize: true });
153
+ const output2 = await embedder('import Vue from "vue"', { pooling: 'mean', normalize: true });
154
+ const output3 = await embedder('The weather is sunny today', { pooling: 'mean', normalize: true });
155
+
156
+ const v1 = Array.from(output1.data);
157
+ const v2 = Array.from(output2.data);
158
+ const v3 = Array.from(output3.data);
159
+
160
+ const sim12 = cosineSimilarity(v1, v2); // Both imports
161
+ const sim13 = cosineSimilarity(v1, v3); // Import vs weather
162
+
163
+ // Import statements should be more similar to each other
164
+ expect(sim12).toBeGreaterThan(sim13);
165
+ });
166
+ });
167
+
168
+ describe('Cosine Similarity Function', () => {
169
+ it('should return 1 for identical vectors', () => {
170
+ const vector = [0.1, 0.2, 0.3, 0.4, 0.5];
171
+ expect(cosineSimilarity(vector, vector)).toBeCloseTo(1, 5);
172
+ });
173
+
174
+ it('should return -1 for opposite vectors', () => {
175
+ const vector1 = [1, 0, 0];
176
+ const vector2 = [-1, 0, 0];
177
+ expect(cosineSimilarity(vector1, vector2)).toBeCloseTo(-1, 5);
178
+ });
179
+
180
+ it('should return 0 for orthogonal vectors', () => {
181
+ const vector1 = [1, 0, 0];
182
+ const vector2 = [0, 1, 0];
183
+ expect(cosineSimilarity(vector1, vector2)).toBeCloseTo(0, 5);
184
+ });
185
+
186
+ it('should handle high-dimensional vectors', () => {
187
+ const dim = 384;
188
+ const vector1 = Array(dim).fill(0).map(() => Math.random());
189
+ const vector2 = Array(dim).fill(0).map(() => Math.random());
190
+
191
+ const similarity = cosineSimilarity(vector1, vector2);
192
+
193
+ expect(similarity).toBeGreaterThanOrEqual(-1);
194
+ expect(similarity).toBeLessThanOrEqual(1);
195
+ });
196
+ });
197
+
198
+ describe('Performance', () => {
199
+ it('should generate embeddings in reasonable time', async () => {
200
+ const text = 'This is a test sentence for measuring embedding generation speed.';
201
+
202
+ const start = Date.now();
203
+ await embedder(text, { pooling: 'mean', normalize: true });
204
+ const duration = Date.now() - start;
205
+
206
+ // Should be fast (under 500ms for single embedding)
207
+ expect(duration).toBeLessThan(500);
208
+ });
209
+
210
+ it('should handle multiple sequential embeddings', async () => {
211
+ const texts = [
212
+ 'First test input',
213
+ 'Second test input',
214
+ 'Third test input',
215
+ 'Fourth test input',
216
+ 'Fifth test input'
217
+ ];
218
+
219
+ const start = Date.now();
220
+ for (const text of texts) {
221
+ await embedder(text, { pooling: 'mean', normalize: true });
222
+ }
223
+ const duration = Date.now() - start;
224
+
225
+ // 5 embeddings should complete in reasonable time
226
+ expect(duration).toBeLessThan(2500);
227
+ console.log(`[Test] 5 embeddings generated in ${duration}ms (${(duration/5).toFixed(0)}ms avg)`);
228
+ });
229
+ });
230
+ });
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Test helper utilities for Smart Coding MCP tests
3
+ * Provides shared setup, teardown, and mock utilities
4
+ */
5
+
6
+ import { loadConfig } from '../lib/config.js';
7
+ import { EmbeddingsCache } from '../lib/cache.js';
8
+ import { CodebaseIndexer } from '../features/index-codebase.js';
9
+ import { CacheClearer } from '../features/clear-cache.js';
10
+ import { HybridSearch } from '../features/hybrid-search.js';
11
+ import { pipeline } from '@xenova/transformers';
12
+ import fs from 'fs/promises';
13
+ import path from 'path';
14
+
15
+ // Cached embedder instance (shared across tests for speed)
16
+ let sharedEmbedder = null;
17
+
18
+ /**
19
+ * Get or initialize the shared embedder instance
20
+ * Loading the model once and reusing saves significant time
21
+ */
22
+ export async function getEmbedder(config) {
23
+ if (!sharedEmbedder) {
24
+ console.log('[TestHelper] Loading embedding model (first time)...');
25
+ sharedEmbedder = await pipeline('feature-extraction', config.embeddingModel);
26
+ console.log('[TestHelper] Embedding model loaded');
27
+ }
28
+ return sharedEmbedder;
29
+ }
30
+
31
+ /**
32
+ * Create test fixtures with initialized components
33
+ * @param {Object} options - Options for fixture creation
34
+ * @returns {Object} Initialized components for testing
35
+ */
36
+ export async function createTestFixtures(options = {}) {
37
+ const config = await loadConfig();
38
+
39
+ // Override config for testing if needed
40
+ if (options.verbose !== undefined) config.verbose = options.verbose;
41
+ if (options.workerThreads !== undefined) config.workerThreads = options.workerThreads;
42
+
43
+ const embedder = await getEmbedder(config);
44
+
45
+ const cache = new EmbeddingsCache(config);
46
+ await cache.load();
47
+
48
+ const indexer = new CodebaseIndexer(embedder, cache, config, null);
49
+ const cacheClearer = new CacheClearer(embedder, cache, config, indexer);
50
+ const hybridSearch = new HybridSearch(embedder, cache, config);
51
+
52
+ return {
53
+ config,
54
+ embedder,
55
+ cache,
56
+ indexer,
57
+ cacheClearer,
58
+ hybridSearch
59
+ };
60
+ }
61
+
62
+ /**
63
+ * Clean up test resources
64
+ * @param {Object} fixtures - Test fixtures to clean up
65
+ */
66
+ export async function cleanupFixtures(fixtures) {
67
+ if (fixtures.indexer) {
68
+ fixtures.indexer.terminateWorkers();
69
+ if (fixtures.indexer.watcher) {
70
+ await fixtures.indexer.watcher.close();
71
+ }
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Clear the cache directory for a clean test state
77
+ * @param {Object} config - Configuration object
78
+ */
79
+ export async function clearTestCache(config) {
80
+ try {
81
+ await fs.rm(config.cacheDirectory, { recursive: true, force: true });
82
+ } catch (err) {
83
+ // Ignore if doesn't exist
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Create a mock MCP request object
89
+ * @param {string} toolName - Tool name
90
+ * @param {Object} args - Tool arguments
91
+ * @returns {Object} Mock request object
92
+ */
93
+ export function createMockRequest(toolName, args = {}) {
94
+ return {
95
+ params: {
96
+ name: toolName,
97
+ arguments: args
98
+ }
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Wait for a condition with timeout
104
+ * @param {Function} condition - Async function returning boolean
105
+ * @param {number} timeout - Max wait time in ms
106
+ * @param {number} interval - Check interval in ms
107
+ * @returns {boolean} Whether condition was met
108
+ */
109
+ export async function waitFor(condition, timeout = 5000, interval = 100) {
110
+ const start = Date.now();
111
+ while (Date.now() - start < timeout) {
112
+ if (await condition()) return true;
113
+ await new Promise(resolve => setTimeout(resolve, interval));
114
+ }
115
+ return false;
116
+ }
117
+
118
+ /**
119
+ * Measure execution time of an async function
120
+ * @param {Function} fn - Async function to measure
121
+ * @returns {Object} Result and duration
122
+ */
123
+ export async function measureTime(fn) {
124
+ const start = Date.now();
125
+ const result = await fn();
126
+ const duration = Date.now() - start;
127
+ return { result, duration };
128
+ }
@@ -0,0 +1,243 @@
1
+ /**
2
+ * Tests for HybridSearch feature
3
+ *
4
+ * Tests the search functionality including:
5
+ * - Semantic search with embeddings
6
+ * - Exact match boosting
7
+ * - Result formatting
8
+ * - Empty index handling
9
+ * - Score calculation
10
+ */
11
+
12
+ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
13
+ import {
14
+ createTestFixtures,
15
+ cleanupFixtures,
16
+ clearTestCache,
17
+ createMockRequest
18
+ } from './helpers.js';
19
+ import * as HybridSearchFeature from '../features/hybrid-search.js';
20
+ import { HybridSearch } from '../features/hybrid-search.js';
21
+
22
+ describe('HybridSearch', () => {
23
+ let fixtures;
24
+
25
+ beforeAll(async () => {
26
+ fixtures = await createTestFixtures({ workerThreads: 2 });
27
+
28
+ // Ensure we have indexed content
29
+ await clearTestCache(fixtures.config);
30
+ fixtures.cache.setVectorStore([]);
31
+ fixtures.cache.fileHashes = new Map();
32
+ await fixtures.indexer.indexAll(true);
33
+ });
34
+
35
+ afterAll(async () => {
36
+ await cleanupFixtures(fixtures);
37
+ });
38
+
39
+ describe('Search Functionality', () => {
40
+ it('should find relevant code for semantic queries', async () => {
41
+ // Search for something that should exist in the codebase
42
+ const { results, message } = await fixtures.hybridSearch.search('embedding model', 5);
43
+
44
+ expect(message).toBeNull();
45
+ expect(results.length).toBeGreaterThan(0);
46
+
47
+ // Results should have required properties
48
+ for (const result of results) {
49
+ expect(result).toHaveProperty('file');
50
+ expect(result).toHaveProperty('content');
51
+ expect(result).toHaveProperty('score');
52
+ expect(result).toHaveProperty('startLine');
53
+ expect(result).toHaveProperty('endLine');
54
+ expect(result).toHaveProperty('vector');
55
+ }
56
+ });
57
+
58
+ it('should return results sorted by score (highest first)', async () => {
59
+ const { results } = await fixtures.hybridSearch.search('function', 10);
60
+
61
+ expect(results.length).toBeGreaterThan(1);
62
+
63
+ // Verify descending order
64
+ for (let i = 1; i < results.length; i++) {
65
+ expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
66
+ }
67
+ });
68
+
69
+ it('should respect maxResults parameter', async () => {
70
+ const maxResults = 3;
71
+ const { results } = await fixtures.hybridSearch.search('const', maxResults);
72
+
73
+ expect(results.length).toBeLessThanOrEqual(maxResults);
74
+ });
75
+
76
+ it('should boost exact matches', async () => {
77
+ // Search for an exact term that exists
78
+ const { results: exactResults } = await fixtures.hybridSearch.search('embedder', 5);
79
+
80
+ // At least one result should contain the exact term
81
+ const hasExactMatch = exactResults.some(r =>
82
+ r.content.toLowerCase().includes('embedder')
83
+ );
84
+
85
+ expect(hasExactMatch).toBe(true);
86
+ });
87
+
88
+ it('should handle natural language queries', async () => {
89
+ const { results } = await fixtures.hybridSearch.search('where is the configuration loaded', 5);
90
+
91
+ expect(results.length).toBeGreaterThan(0);
92
+ });
93
+ });
94
+
95
+ describe('Empty Index Handling', () => {
96
+ it('should return helpful message when index is empty', async () => {
97
+ // Create a search instance with empty cache
98
+ const emptyCache = {
99
+ getVectorStore: () => [],
100
+ setVectorStore: () => {},
101
+ getFileHash: () => null,
102
+ setFileHash: () => {}
103
+ };
104
+
105
+ const emptySearch = new HybridSearch(fixtures.embedder, emptyCache, fixtures.config);
106
+ const { results, message } = await emptySearch.search('test', 5);
107
+
108
+ expect(results.length).toBe(0);
109
+ expect(message).toContain('No code has been indexed');
110
+ });
111
+ });
112
+
113
+ describe('Result Formatting', () => {
114
+ it('should format results as markdown', async () => {
115
+ const { results } = await fixtures.hybridSearch.search('function', 3);
116
+ const formatted = fixtures.hybridSearch.formatResults(results);
117
+
118
+ // Should contain markdown elements
119
+ expect(formatted).toContain('## Result');
120
+ expect(formatted).toContain('**File:**');
121
+ expect(formatted).toContain('**Lines:**');
122
+ expect(formatted).toContain('```');
123
+ expect(formatted).toContain('Relevance:');
124
+ });
125
+
126
+ it('should return no matches message for empty results', () => {
127
+ const formatted = fixtures.hybridSearch.formatResults([]);
128
+
129
+ expect(formatted).toContain('No matching code found');
130
+ });
131
+
132
+ it('should include relative file paths', async () => {
133
+ const { results } = await fixtures.hybridSearch.search('export', 1);
134
+ const formatted = fixtures.hybridSearch.formatResults(results);
135
+
136
+ // Should not contain absolute paths in the output
137
+ expect(formatted).not.toContain(fixtures.config.searchDirectory);
138
+ });
139
+ });
140
+
141
+ describe('Score Calculation', () => {
142
+ it('should give higher scores to more relevant results', async () => {
143
+ // Search for a specific term
144
+ const { results } = await fixtures.hybridSearch.search('CodebaseIndexer', 5);
145
+
146
+ if (results.length > 0) {
147
+ // Top result should have high relevance
148
+ expect(results[0].score).toBeGreaterThan(0.3);
149
+ }
150
+ });
151
+
152
+ it('should apply semantic weight from config', async () => {
153
+ const { results } = await fixtures.hybridSearch.search('async function', 5);
154
+
155
+ // All results should have positive scores
156
+ for (const result of results) {
157
+ expect(result.score).toBeGreaterThan(0);
158
+ }
159
+ });
160
+ });
161
+ });
162
+
163
+ describe('Hybrid Search Tool Handler', () => {
164
+ let fixtures;
165
+
166
+ beforeAll(async () => {
167
+ fixtures = await createTestFixtures({ workerThreads: 2 });
168
+
169
+ // Ensure indexed content
170
+ await fixtures.indexer.indexAll(false);
171
+ });
172
+
173
+ afterAll(async () => {
174
+ await cleanupFixtures(fixtures);
175
+ });
176
+
177
+ describe('Tool Definition', () => {
178
+ it('should have correct tool definition', () => {
179
+ const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
180
+
181
+ expect(toolDef.name).toBe('a_semantic_search');
182
+ expect(toolDef.description).toContain('semantic');
183
+ expect(toolDef.description).toContain('hybrid');
184
+ expect(toolDef.inputSchema.properties.query).toBeDefined();
185
+ expect(toolDef.inputSchema.properties.maxResults).toBeDefined();
186
+ expect(toolDef.inputSchema.required).toContain('query');
187
+ });
188
+
189
+ it('should use config default for maxResults', () => {
190
+ const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
191
+
192
+ expect(toolDef.inputSchema.properties.maxResults.default).toBe(fixtures.config.maxResults);
193
+ });
194
+ });
195
+
196
+ describe('Tool Handler', () => {
197
+ it('should return search results for valid query', async () => {
198
+ const request = createMockRequest('a_semantic_search', {
199
+ query: 'function that handles indexing'
200
+ });
201
+
202
+ const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
203
+
204
+ expect(result.content[0].type).toBe('text');
205
+ expect(result.content[0].text).toContain('Result');
206
+ });
207
+
208
+ it('should use default maxResults when not provided', async () => {
209
+ const request = createMockRequest('a_semantic_search', {
210
+ query: 'import'
211
+ });
212
+
213
+ const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
214
+
215
+ // Should return results (up to default max)
216
+ expect(result.content[0].text.length).toBeGreaterThan(0);
217
+ });
218
+
219
+ it('should respect custom maxResults', async () => {
220
+ const request = createMockRequest('a_semantic_search', {
221
+ query: 'const',
222
+ maxResults: 2
223
+ });
224
+
225
+ const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
226
+
227
+ // Count result headers
228
+ const resultCount = (result.content[0].text.match(/## Result/g) || []).length;
229
+ expect(resultCount).toBeLessThanOrEqual(2);
230
+ });
231
+
232
+ it('should handle queries with no matches gracefully', async () => {
233
+ const request = createMockRequest('a_semantic_search', {
234
+ query: 'xyzzy_nonexistent_symbol_12345'
235
+ });
236
+
237
+ const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
238
+
239
+ // Should return something (either no matches message or low-score results)
240
+ expect(result.content[0].text.length).toBeGreaterThan(0);
241
+ });
242
+ });
243
+ });