smart-coding-mcp 1.4.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -31
- package/config.json +7 -2
- package/features/get-status.js +163 -0
- package/features/hybrid-search.js +23 -4
- package/features/index-codebase.js +145 -60
- package/features/set-workspace.js +155 -0
- package/index.js +152 -64
- package/lib/ast-chunker.js +273 -0
- package/lib/config.js +91 -2
- package/lib/embedding-worker.js +29 -2
- package/lib/mrl-embedder.js +133 -0
- package/lib/resource-throttle.js +85 -0
- package/lib/sqlite-cache.js +408 -0
- package/lib/tokenizer.js +4 -0
- package/package.json +6 -3
- package/test/ast-chunker.test.js +105 -0
- package/test/device-detection.test.js +110 -0
- package/test/embedding-model.test.js +14 -11
- package/test/helpers.js +3 -3
- package/test/mrl-embedder.test.js +108 -0
|
@@ -9,20 +9,22 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import { describe, it, expect, beforeAll } from 'vitest';
|
|
12
|
-
import {
|
|
12
|
+
import { createEmbedder } from '../lib/mrl-embedder.js';
|
|
13
13
|
import { cosineSimilarity } from '../lib/utils.js';
|
|
14
14
|
import { loadConfig } from '../lib/config.js';
|
|
15
15
|
|
|
16
16
|
describe('Local Embedding Model', () => {
|
|
17
17
|
let embedder;
|
|
18
18
|
let config;
|
|
19
|
+
let expectedDimension;
|
|
19
20
|
|
|
20
21
|
beforeAll(async () => {
|
|
21
22
|
config = await loadConfig();
|
|
22
23
|
console.log(`[Test] Loading embedding model: ${config.embeddingModel}`);
|
|
23
|
-
embedder = await
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
embedder = await createEmbedder(config);
|
|
25
|
+
expectedDimension = embedder.dimension;
|
|
26
|
+
console.log(`[Test] Embedding model loaded (${expectedDimension}d)`);
|
|
27
|
+
}, 120000);
|
|
26
28
|
|
|
27
29
|
describe('Model Loading', () => {
|
|
28
30
|
it('should load the embedding model', () => {
|
|
@@ -30,8 +32,9 @@ describe('Local Embedding Model', () => {
|
|
|
30
32
|
expect(typeof embedder).toBe('function');
|
|
31
33
|
});
|
|
32
34
|
|
|
33
|
-
it('should
|
|
34
|
-
expect(
|
|
35
|
+
it('should have model metadata', () => {
|
|
36
|
+
expect(embedder.modelName).toBeDefined();
|
|
37
|
+
expect(embedder.dimension).toBeGreaterThan(0);
|
|
35
38
|
});
|
|
36
39
|
});
|
|
37
40
|
|
|
@@ -49,8 +52,8 @@ describe('Local Embedding Model', () => {
|
|
|
49
52
|
const output = await embedder(text, { pooling: 'mean', normalize: true });
|
|
50
53
|
const vector = Array.from(output.data);
|
|
51
54
|
|
|
52
|
-
// MiniLM
|
|
53
|
-
expect(vector.length).toBe(
|
|
55
|
+
// Dimension depends on model (MRL: configurable, MiniLM: 384)
|
|
56
|
+
expect(vector.length).toBe(expectedDimension);
|
|
54
57
|
});
|
|
55
58
|
|
|
56
59
|
it('should return normalized vectors', async () => {
|
|
@@ -85,7 +88,7 @@ describe('Local Embedding Model', () => {
|
|
|
85
88
|
const output = await embedder(code, { pooling: 'mean', normalize: true });
|
|
86
89
|
const vector = Array.from(output.data);
|
|
87
90
|
|
|
88
|
-
expect(vector.length).toBe(
|
|
91
|
+
expect(vector.length).toBe(expectedDimension);
|
|
89
92
|
});
|
|
90
93
|
|
|
91
94
|
it('should handle multiline text', async () => {
|
|
@@ -93,7 +96,7 @@ describe('Local Embedding Model', () => {
|
|
|
93
96
|
const output = await embedder(multiline, { pooling: 'mean', normalize: true });
|
|
94
97
|
const vector = Array.from(output.data);
|
|
95
98
|
|
|
96
|
-
expect(vector.length).toBe(
|
|
99
|
+
expect(vector.length).toBe(expectedDimension);
|
|
97
100
|
});
|
|
98
101
|
|
|
99
102
|
it('should handle special characters', async () => {
|
|
@@ -101,7 +104,7 @@ describe('Local Embedding Model', () => {
|
|
|
101
104
|
const output = await embedder(special, { pooling: 'mean', normalize: true });
|
|
102
105
|
const vector = Array.from(output.data);
|
|
103
106
|
|
|
104
|
-
expect(vector.length).toBe(
|
|
107
|
+
expect(vector.length).toBe(expectedDimension);
|
|
105
108
|
});
|
|
106
109
|
});
|
|
107
110
|
|
package/test/helpers.js
CHANGED
|
@@ -8,7 +8,7 @@ import { EmbeddingsCache } from '../lib/cache.js';
|
|
|
8
8
|
import { CodebaseIndexer } from '../features/index-codebase.js';
|
|
9
9
|
import { CacheClearer } from '../features/clear-cache.js';
|
|
10
10
|
import { HybridSearch } from '../features/hybrid-search.js';
|
|
11
|
-
import {
|
|
11
|
+
import { createEmbedder } from '../lib/mrl-embedder.js';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
13
|
import path from 'path';
|
|
14
14
|
|
|
@@ -22,8 +22,8 @@ let sharedEmbedder = null;
|
|
|
22
22
|
export async function getEmbedder(config) {
|
|
23
23
|
if (!sharedEmbedder) {
|
|
24
24
|
console.log('[TestHelper] Loading embedding model (first time)...');
|
|
25
|
-
sharedEmbedder = await
|
|
26
|
-
console.log(
|
|
25
|
+
sharedEmbedder = await createEmbedder(config);
|
|
26
|
+
console.log(`[TestHelper] Embedding model loaded (${sharedEmbedder.dimension}d)`);
|
|
27
27
|
}
|
|
28
28
|
return sharedEmbedder;
|
|
29
29
|
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for MRL Embedder
|
|
3
|
+
*
|
|
4
|
+
* Tests the Matryoshka Representation Learning embedder:
|
|
5
|
+
* - Configurable dimensions (64, 128, 256, 512, 768)
|
|
6
|
+
* - Layer normalization and slicing
|
|
7
|
+
* - Semantic similarity at different dimensions
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect, beforeAll } from 'vitest';
|
|
11
|
+
import { createMRLEmbedder, createLegacyEmbedder, createEmbedder, VALID_DIMENSIONS } from '../lib/mrl-embedder.js';
|
|
12
|
+
import { cosineSimilarity } from '../lib/utils.js';
|
|
13
|
+
|
|
14
|
+
describe('MRL Embedder', () => {
|
|
15
|
+
let embedder256;
|
|
16
|
+
|
|
17
|
+
beforeAll(async () => {
|
|
18
|
+
// Load embedder with 256d (default)
|
|
19
|
+
console.log('[Test] Loading MRL embedder (256d)...');
|
|
20
|
+
embedder256 = await createMRLEmbedder('nomic-ai/nomic-embed-text-v1.5', { dimension: 256 });
|
|
21
|
+
console.log('[Test] MRL embedder loaded');
|
|
22
|
+
}, 120000); // 2 min timeout for model download
|
|
23
|
+
|
|
24
|
+
describe('Dimension Configuration', () => {
|
|
25
|
+
it('should export valid dimensions', () => {
|
|
26
|
+
expect(VALID_DIMENSIONS).toEqual([64, 128, 256, 512, 768]);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('should produce 256d embeddings by default', async () => {
|
|
30
|
+
const output = await embedder256('test text');
|
|
31
|
+
const vector = Array.from(output.data);
|
|
32
|
+
expect(vector.length).toBe(256);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('should attach dimension metadata', () => {
|
|
36
|
+
expect(embedder256.dimension).toBe(256);
|
|
37
|
+
expect(embedder256.modelName).toBe('nomic-ai/nomic-embed-text-v1.5');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe('Embedding Quality', () => {
|
|
42
|
+
it('should produce normalized vectors', async () => {
|
|
43
|
+
const output = await embedder256('normalized vector test');
|
|
44
|
+
const vector = Array.from(output.data);
|
|
45
|
+
const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
46
|
+
expect(magnitude).toBeCloseTo(1, 3);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should generate different embeddings for different text', async () => {
|
|
50
|
+
const output1 = await embedder256('apple fruit');
|
|
51
|
+
const output2 = await embedder256('programming code');
|
|
52
|
+
|
|
53
|
+
const vector1 = Array.from(output1.data);
|
|
54
|
+
const vector2 = Array.from(output2.data);
|
|
55
|
+
|
|
56
|
+
const areSame = vector1.every((v, i) => Math.abs(v - vector2[i]) < 0.0001);
|
|
57
|
+
expect(areSame).toBe(false);
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
describe('Semantic Similarity', () => {
|
|
62
|
+
it('should give high similarity for semantically similar text', async () => {
|
|
63
|
+
const output1 = await embedder256('user authentication login');
|
|
64
|
+
const output2 = await embedder256('user login authentication');
|
|
65
|
+
|
|
66
|
+
const vector1 = Array.from(output1.data);
|
|
67
|
+
const vector2 = Array.from(output2.data);
|
|
68
|
+
|
|
69
|
+
const similarity = cosineSimilarity(vector1, vector2);
|
|
70
|
+
expect(similarity).toBeGreaterThan(0.85);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should give lower similarity for different topics', async () => {
|
|
74
|
+
const output1 = await embedder256('database query SQL');
|
|
75
|
+
const output2 = await embedder256('pizza delivery food');
|
|
76
|
+
|
|
77
|
+
const vector1 = Array.from(output1.data);
|
|
78
|
+
const vector2 = Array.from(output2.data);
|
|
79
|
+
|
|
80
|
+
const similarity = cosineSimilarity(vector1, vector2);
|
|
81
|
+
expect(similarity).toBeLessThan(0.5);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
describe('createEmbedder Factory', () => {
|
|
87
|
+
it('should create MRL embedder for nomic models', async () => {
|
|
88
|
+
const config = {
|
|
89
|
+
embeddingModel: 'nomic-ai/nomic-embed-text-v1.5',
|
|
90
|
+
embeddingDimension: 128,
|
|
91
|
+
device: 'cpu'
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const embedder = await createEmbedder(config);
|
|
95
|
+
expect(embedder.modelName).toContain('nomic');
|
|
96
|
+
expect(embedder.dimension).toBe(128);
|
|
97
|
+
}, 120000);
|
|
98
|
+
|
|
99
|
+
it('should fall back to legacy for MiniLM', async () => {
|
|
100
|
+
const config = {
|
|
101
|
+
embeddingModel: 'Xenova/all-MiniLM-L6-v2',
|
|
102
|
+
device: 'cpu'
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const embedder = await createEmbedder(config);
|
|
106
|
+
expect(embedder.dimension).toBe(384);
|
|
107
|
+
}, 120000);
|
|
108
|
+
});
|