@softerist/heuristic-mcp 2.1.46 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -76
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tests for HybridSearch feature
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
4
|
* Tests the search functionality including:
|
|
5
5
|
* - Semantic search with embeddings
|
|
6
6
|
* - Exact match boosting
|
|
@@ -10,28 +10,29 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
|
13
|
-
import {
|
|
14
|
-
createTestFixtures,
|
|
15
|
-
cleanupFixtures,
|
|
13
|
+
import {
|
|
14
|
+
createTestFixtures,
|
|
15
|
+
cleanupFixtures,
|
|
16
16
|
clearTestCache,
|
|
17
|
-
createMockRequest
|
|
17
|
+
createMockRequest,
|
|
18
|
+
createHybridSearchCacheStub,
|
|
18
19
|
} from './helpers.js';
|
|
19
20
|
import * as HybridSearchFeature from '../features/hybrid-search.js';
|
|
20
21
|
import { HybridSearch } from '../features/hybrid-search.js';
|
|
21
22
|
|
|
22
23
|
describe('HybridSearch', () => {
|
|
23
24
|
let fixtures;
|
|
24
|
-
|
|
25
|
+
|
|
25
26
|
beforeAll(async () => {
|
|
26
|
-
fixtures = await createTestFixtures({ workerThreads: 1 });
|
|
27
|
-
|
|
27
|
+
fixtures = await createTestFixtures({ workerThreads: 1, verbose: true });
|
|
28
|
+
|
|
28
29
|
// Ensure we have indexed content
|
|
29
30
|
await clearTestCache(fixtures.config);
|
|
30
31
|
fixtures.cache.setVectorStore([]);
|
|
31
|
-
fixtures.cache.
|
|
32
|
+
fixtures.cache.clearFileHashes();
|
|
32
33
|
await fixtures.indexer.indexAll(true);
|
|
33
34
|
});
|
|
34
|
-
|
|
35
|
+
|
|
35
36
|
afterAll(async () => {
|
|
36
37
|
await cleanupFixtures(fixtures);
|
|
37
38
|
});
|
|
@@ -40,10 +41,10 @@ describe('HybridSearch', () => {
|
|
|
40
41
|
it('should find relevant code for semantic queries', async () => {
|
|
41
42
|
// Search for something that should exist in the codebase
|
|
42
43
|
const { results, message } = await fixtures.hybridSearch.search('embedding model', 5);
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
expect(message).toBeNull();
|
|
45
46
|
expect(results.length).toBeGreaterThan(0);
|
|
46
|
-
|
|
47
|
+
|
|
47
48
|
// Results should have required properties
|
|
48
49
|
for (const result of results) {
|
|
49
50
|
expect(result).toHaveProperty('file');
|
|
@@ -54,40 +55,41 @@ describe('HybridSearch', () => {
|
|
|
54
55
|
expect(result).toHaveProperty('vector');
|
|
55
56
|
}
|
|
56
57
|
});
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
it('should return results sorted by score (highest first)', async () => {
|
|
59
60
|
const { results } = await fixtures.hybridSearch.search('function', 10);
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
expect(results.length).toBeGreaterThan(1);
|
|
62
|
-
|
|
63
|
+
|
|
63
64
|
// Verify descending order
|
|
64
65
|
for (let i = 1; i < results.length; i++) {
|
|
65
66
|
expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score);
|
|
66
67
|
}
|
|
67
68
|
});
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
it('should respect maxResults parameter', async () => {
|
|
70
71
|
const maxResults = 3;
|
|
71
72
|
const { results } = await fixtures.hybridSearch.search('const', maxResults);
|
|
72
|
-
|
|
73
|
+
|
|
73
74
|
expect(results.length).toBeLessThanOrEqual(maxResults);
|
|
74
75
|
});
|
|
75
|
-
|
|
76
|
+
|
|
76
77
|
it('should boost exact matches', async () => {
|
|
77
78
|
// Search for an exact term that exists
|
|
78
79
|
const { results: exactResults } = await fixtures.hybridSearch.search('embedder', 5);
|
|
79
|
-
|
|
80
|
+
|
|
80
81
|
// At least one result should contain the exact term
|
|
81
|
-
const hasExactMatch = exactResults.some(r =>
|
|
82
|
-
|
|
83
|
-
);
|
|
84
|
-
|
|
82
|
+
const hasExactMatch = exactResults.some((r) => r.content.toLowerCase().includes('embedder'));
|
|
83
|
+
|
|
85
84
|
expect(hasExactMatch).toBe(true);
|
|
86
85
|
});
|
|
87
|
-
|
|
86
|
+
|
|
88
87
|
it('should handle natural language queries', async () => {
|
|
89
|
-
const { results } = await fixtures.hybridSearch.search(
|
|
90
|
-
|
|
88
|
+
const { results } = await fixtures.hybridSearch.search(
|
|
89
|
+
'where is the configuration loaded',
|
|
90
|
+
5
|
|
91
|
+
);
|
|
92
|
+
|
|
91
93
|
expect(results.length).toBeGreaterThan(0);
|
|
92
94
|
});
|
|
93
95
|
});
|
|
@@ -95,16 +97,15 @@ describe('HybridSearch', () => {
|
|
|
95
97
|
describe('Empty Index Handling', () => {
|
|
96
98
|
it('should return helpful message when index is empty', async () => {
|
|
97
99
|
// Create a search instance with empty cache
|
|
98
|
-
const emptyCache = {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
100
|
+
const emptyCache = createHybridSearchCacheStub({
|
|
101
|
+
vectorStore: [],
|
|
102
|
+
getVector: () => null,
|
|
103
|
+
getChunk: () => null,
|
|
104
|
+
});
|
|
105
|
+
|
|
105
106
|
const emptySearch = new HybridSearch(fixtures.embedder, emptyCache, fixtures.config);
|
|
106
107
|
const { results, message } = await emptySearch.search('test', 5);
|
|
107
|
-
|
|
108
|
+
|
|
108
109
|
expect(results.length).toBe(0);
|
|
109
110
|
expect(message).toContain('No code has been indexed');
|
|
110
111
|
});
|
|
@@ -113,8 +114,8 @@ describe('HybridSearch', () => {
|
|
|
113
114
|
describe('Result Formatting', () => {
|
|
114
115
|
it('should format results as markdown', async () => {
|
|
115
116
|
const { results } = await fixtures.hybridSearch.search('function', 3);
|
|
116
|
-
const formatted = fixtures.hybridSearch.formatResults(results);
|
|
117
|
-
|
|
117
|
+
const formatted = await fixtures.hybridSearch.formatResults(results);
|
|
118
|
+
|
|
118
119
|
// Should contain markdown elements
|
|
119
120
|
expect(formatted).toContain('## Result');
|
|
120
121
|
expect(formatted).toContain('**File:**');
|
|
@@ -122,17 +123,17 @@ describe('HybridSearch', () => {
|
|
|
122
123
|
expect(formatted).toContain('```');
|
|
123
124
|
expect(formatted).toContain('Relevance:');
|
|
124
125
|
});
|
|
125
|
-
|
|
126
|
-
it('should return no matches message for empty results', () => {
|
|
127
|
-
const formatted = fixtures.hybridSearch.formatResults([]);
|
|
128
|
-
|
|
126
|
+
|
|
127
|
+
it('should return no matches message for empty results', async () => {
|
|
128
|
+
const formatted = await fixtures.hybridSearch.formatResults([]);
|
|
129
|
+
|
|
129
130
|
expect(formatted).toContain('No matching code found');
|
|
130
131
|
});
|
|
131
|
-
|
|
132
|
+
|
|
132
133
|
it('should include relative file paths', async () => {
|
|
133
134
|
const { results } = await fixtures.hybridSearch.search('export', 1);
|
|
134
|
-
const formatted = fixtures.hybridSearch.formatResults(results);
|
|
135
|
-
|
|
135
|
+
const formatted = await fixtures.hybridSearch.formatResults(results);
|
|
136
|
+
|
|
136
137
|
// Should not contain absolute paths in the output
|
|
137
138
|
expect(formatted).not.toContain(fixtures.config.searchDirectory);
|
|
138
139
|
});
|
|
@@ -142,34 +143,430 @@ describe('HybridSearch', () => {
|
|
|
142
143
|
it('should give higher scores to more relevant results', async () => {
|
|
143
144
|
// Search for a specific term
|
|
144
145
|
const { results } = await fixtures.hybridSearch.search('CodebaseIndexer', 5);
|
|
145
|
-
|
|
146
|
+
|
|
146
147
|
if (results.length > 0) {
|
|
147
148
|
// Top result should have high relevance
|
|
148
149
|
expect(results[0].score).toBeGreaterThan(0.3);
|
|
149
150
|
}
|
|
150
151
|
});
|
|
151
|
-
|
|
152
|
+
|
|
152
153
|
it('should apply semantic weight from config', async () => {
|
|
153
154
|
const { results } = await fixtures.hybridSearch.search('async function', 5);
|
|
154
|
-
|
|
155
|
+
|
|
155
156
|
// All results should have positive scores
|
|
156
157
|
for (const result of results) {
|
|
157
158
|
expect(result.score).toBeGreaterThan(0);
|
|
158
159
|
}
|
|
159
160
|
});
|
|
160
161
|
});
|
|
162
|
+
|
|
163
|
+
describe('ANN Candidate Handling', () => {
|
|
164
|
+
it('should honor ANN min/max candidate settings', () => {
|
|
165
|
+
const cache = createHybridSearchCacheStub();
|
|
166
|
+
const config = {
|
|
167
|
+
annEnabled: true,
|
|
168
|
+
annMinCandidates: 4,
|
|
169
|
+
annMaxCandidates: 6,
|
|
170
|
+
annCandidateMultiplier: 2,
|
|
171
|
+
semanticWeight: 1,
|
|
172
|
+
exactMatchBoost: 0,
|
|
173
|
+
recencyBoost: 0,
|
|
174
|
+
callGraphEnabled: false,
|
|
175
|
+
callGraphBoost: 0,
|
|
176
|
+
searchDirectory: process.cwd(),
|
|
177
|
+
};
|
|
178
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
179
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
180
|
+
|
|
181
|
+
expect(hybrid.getAnnCandidateCount(2, 10)).toBe(4);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it('should use default ANN candidate settings when unset', () => {
|
|
185
|
+
const cache = createHybridSearchCacheStub();
|
|
186
|
+
const config = {
|
|
187
|
+
annEnabled: true,
|
|
188
|
+
semanticWeight: 1,
|
|
189
|
+
exactMatchBoost: 0,
|
|
190
|
+
recencyBoost: 0,
|
|
191
|
+
callGraphEnabled: false,
|
|
192
|
+
callGraphBoost: 0,
|
|
193
|
+
searchDirectory: process.cwd(),
|
|
194
|
+
};
|
|
195
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
196
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
197
|
+
|
|
198
|
+
expect(hybrid.getAnnCandidateCount(5, 2)).toBe(2);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('should dedupe ANN candidates and keep unique chunks', async () => {
|
|
202
|
+
const vectorStore = [
|
|
203
|
+
{
|
|
204
|
+
file: 'a.js',
|
|
205
|
+
content: 'alpha',
|
|
206
|
+
vector: [1, 0],
|
|
207
|
+
startLine: 1,
|
|
208
|
+
endLine: 1,
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
file: 'b.js',
|
|
212
|
+
content: 'beta',
|
|
213
|
+
vector: [0, 1],
|
|
214
|
+
startLine: 1,
|
|
215
|
+
endLine: 1,
|
|
216
|
+
},
|
|
217
|
+
];
|
|
218
|
+
const cache = createHybridSearchCacheStub({
|
|
219
|
+
vectorStore,
|
|
220
|
+
queryAnn: async () => [0, 0, 1],
|
|
221
|
+
});
|
|
222
|
+
const config = {
|
|
223
|
+
annEnabled: true,
|
|
224
|
+
annMinCandidates: 0,
|
|
225
|
+
annMaxCandidates: 10,
|
|
226
|
+
annCandidateMultiplier: 1,
|
|
227
|
+
semanticWeight: 1,
|
|
228
|
+
exactMatchBoost: 1,
|
|
229
|
+
recencyBoost: 0,
|
|
230
|
+
callGraphEnabled: false,
|
|
231
|
+
callGraphBoost: 0,
|
|
232
|
+
searchDirectory: process.cwd(),
|
|
233
|
+
};
|
|
234
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
235
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
236
|
+
|
|
237
|
+
const { results } = await hybrid.search('alpha', 2);
|
|
238
|
+
|
|
239
|
+
const files = results.map((result) => result.file);
|
|
240
|
+
expect(files).toContain('a.js');
|
|
241
|
+
expect(files).toContain('b.js');
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
it('should fall back to full candidates when ANN returns too few', async () => {
|
|
245
|
+
const vectorStore = [
|
|
246
|
+
{
|
|
247
|
+
file: 'a.js',
|
|
248
|
+
content: 'alpha',
|
|
249
|
+
vector: [1, 0],
|
|
250
|
+
startLine: 1,
|
|
251
|
+
endLine: 1,
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
file: 'b.js',
|
|
255
|
+
content: 'beta',
|
|
256
|
+
vector: [0, 1],
|
|
257
|
+
startLine: 1,
|
|
258
|
+
endLine: 1,
|
|
259
|
+
},
|
|
260
|
+
];
|
|
261
|
+
const cache = createHybridSearchCacheStub({
|
|
262
|
+
vectorStore,
|
|
263
|
+
queryAnn: async () => [0],
|
|
264
|
+
});
|
|
265
|
+
const config = {
|
|
266
|
+
annEnabled: true,
|
|
267
|
+
annMinCandidates: 0,
|
|
268
|
+
annMaxCandidates: 10,
|
|
269
|
+
annCandidateMultiplier: 1,
|
|
270
|
+
semanticWeight: 1,
|
|
271
|
+
exactMatchBoost: 0,
|
|
272
|
+
recencyBoost: 0,
|
|
273
|
+
callGraphEnabled: false,
|
|
274
|
+
callGraphBoost: 0,
|
|
275
|
+
searchDirectory: process.cwd(),
|
|
276
|
+
};
|
|
277
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
278
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
279
|
+
|
|
280
|
+
const { results } = await hybrid.search('beta', 2);
|
|
281
|
+
|
|
282
|
+
const files = results.map((result) => result.file);
|
|
283
|
+
expect(files).toContain('b.js');
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it('should fall back when ANN dedupe leaves too few results', async () => {
|
|
287
|
+
const vectorStore = [
|
|
288
|
+
{
|
|
289
|
+
file: 'a.js',
|
|
290
|
+
content: 'alpha',
|
|
291
|
+
vector: [1, 0, 0],
|
|
292
|
+
startLine: 1,
|
|
293
|
+
endLine: 1,
|
|
294
|
+
},
|
|
295
|
+
{
|
|
296
|
+
file: 'b.js',
|
|
297
|
+
content: 'beta',
|
|
298
|
+
vector: [0, 1, 0],
|
|
299
|
+
startLine: 1,
|
|
300
|
+
endLine: 1,
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
file: 'c.js',
|
|
304
|
+
content: 'gamma',
|
|
305
|
+
vector: [0, 0, 1],
|
|
306
|
+
startLine: 1,
|
|
307
|
+
endLine: 1,
|
|
308
|
+
},
|
|
309
|
+
];
|
|
310
|
+
const cache = createHybridSearchCacheStub({
|
|
311
|
+
vectorStore,
|
|
312
|
+
queryAnn: async () => [0, 0],
|
|
313
|
+
});
|
|
314
|
+
const config = {
|
|
315
|
+
annEnabled: true,
|
|
316
|
+
annMinCandidates: 0,
|
|
317
|
+
annMaxCandidates: 10,
|
|
318
|
+
annCandidateMultiplier: 1,
|
|
319
|
+
semanticWeight: 1,
|
|
320
|
+
exactMatchBoost: 0,
|
|
321
|
+
recencyBoost: 0,
|
|
322
|
+
callGraphEnabled: false,
|
|
323
|
+
callGraphBoost: 0,
|
|
324
|
+
searchDirectory: process.cwd(),
|
|
325
|
+
};
|
|
326
|
+
const embedder = async () => ({ data: new Float32Array([0, 0, 1]) });
|
|
327
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
328
|
+
|
|
329
|
+
const { results } = await hybrid.search('gamma', 2);
|
|
330
|
+
const files = results.map((result) => result.file);
|
|
331
|
+
|
|
332
|
+
expect(files).toContain('c.js');
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
it('should add exact matches missed by ANN and avoid duplicates (lines 110, 113 coverage)', async () => {
|
|
336
|
+
// Setup:
|
|
337
|
+
// - 2 chunks in store, both are exact matches.
|
|
338
|
+
// - ANN returns only the first one.
|
|
339
|
+
// - maxResults = 2.
|
|
340
|
+
//
|
|
341
|
+
// Expected flow:
|
|
342
|
+
// 1. ANN returns chunk 0. candidates = [chunk0].
|
|
343
|
+
// 2. exactMatchCount = 1.
|
|
344
|
+
// 3. exactMatchCount (1) < maxResults (2), so we enter the fallback block (line 110).
|
|
345
|
+
// 4. We iterate over vectorStore.
|
|
346
|
+
// - Chunk 0 is already in 'seen', so we skip it (line 113 coverage).
|
|
347
|
+
// - Chunk 1 is not in 'seen', so we add it.
|
|
348
|
+
const vectorStore = [
|
|
349
|
+
{
|
|
350
|
+
file: 'a.js',
|
|
351
|
+
content: 'target match',
|
|
352
|
+
vector: [1, 0],
|
|
353
|
+
startLine: 1,
|
|
354
|
+
endLine: 1,
|
|
355
|
+
},
|
|
356
|
+
{
|
|
357
|
+
file: 'b.js',
|
|
358
|
+
content: 'target match',
|
|
359
|
+
vector: [0, 1],
|
|
360
|
+
startLine: 1,
|
|
361
|
+
endLine: 1,
|
|
362
|
+
},
|
|
363
|
+
];
|
|
364
|
+
const cache = createHybridSearchCacheStub({
|
|
365
|
+
vectorStore,
|
|
366
|
+
queryAnn: async () => [0],
|
|
367
|
+
});
|
|
368
|
+
const config = {
|
|
369
|
+
annEnabled: true,
|
|
370
|
+
annMinCandidates: 0,
|
|
371
|
+
annMaxCandidates: 10,
|
|
372
|
+
annCandidateMultiplier: 1,
|
|
373
|
+
semanticWeight: 1,
|
|
374
|
+
exactMatchBoost: 1,
|
|
375
|
+
recencyBoost: 0,
|
|
376
|
+
callGraphEnabled: false,
|
|
377
|
+
callGraphBoost: 0,
|
|
378
|
+
searchDirectory: process.cwd(),
|
|
379
|
+
};
|
|
380
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
381
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
382
|
+
|
|
383
|
+
const { results } = await hybrid.search('target', 2);
|
|
384
|
+
|
|
385
|
+
expect(results).toHaveLength(2);
|
|
386
|
+
const files = results.map((r) => r.file).sort();
|
|
387
|
+
expect(files).toEqual(['a.js', 'b.js']);
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
it('should add exact-match chunks when ANN misses them', async () => {
|
|
391
|
+
const vectorStore = [
|
|
392
|
+
{
|
|
393
|
+
file: 'a.js',
|
|
394
|
+
content: 'alpha content',
|
|
395
|
+
vector: [1, 0],
|
|
396
|
+
startLine: 1,
|
|
397
|
+
endLine: 1,
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
file: 'b.js',
|
|
401
|
+
content: 'exact match term',
|
|
402
|
+
vector: [0, 1],
|
|
403
|
+
startLine: 1,
|
|
404
|
+
endLine: 1,
|
|
405
|
+
},
|
|
406
|
+
];
|
|
407
|
+
const cache = createHybridSearchCacheStub({
|
|
408
|
+
vectorStore,
|
|
409
|
+
queryAnn: async () => [0],
|
|
410
|
+
});
|
|
411
|
+
const config = {
|
|
412
|
+
annEnabled: true,
|
|
413
|
+
annMinCandidates: 0,
|
|
414
|
+
annMaxCandidates: 10,
|
|
415
|
+
annCandidateMultiplier: 1,
|
|
416
|
+
semanticWeight: 1,
|
|
417
|
+
exactMatchBoost: 1,
|
|
418
|
+
recencyBoost: 0,
|
|
419
|
+
callGraphEnabled: false,
|
|
420
|
+
callGraphBoost: 0,
|
|
421
|
+
searchDirectory: process.cwd(),
|
|
422
|
+
};
|
|
423
|
+
const embedder = async () => ({ data: new Float32Array([0, 1]) });
|
|
424
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
425
|
+
|
|
426
|
+
const { results } = await hybrid.search('exact', 1);
|
|
427
|
+
|
|
428
|
+
expect(results[0].file).toBe('b.js');
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
it('should skip empty content and duplicate keys when adding exact matches', async () => {
|
|
432
|
+
const vectorStore = [
|
|
433
|
+
{
|
|
434
|
+
file: 'a.js',
|
|
435
|
+
content: 'no match here',
|
|
436
|
+
vector: [1, 0],
|
|
437
|
+
startLine: 1,
|
|
438
|
+
endLine: 1,
|
|
439
|
+
},
|
|
440
|
+
{
|
|
441
|
+
file: 'b.js',
|
|
442
|
+
content: null,
|
|
443
|
+
vector: [0, 1],
|
|
444
|
+
startLine: 1,
|
|
445
|
+
endLine: 1,
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
file: 'a.js',
|
|
449
|
+
content: 'match term',
|
|
450
|
+
vector: [1, 0],
|
|
451
|
+
startLine: 1,
|
|
452
|
+
endLine: 1,
|
|
453
|
+
},
|
|
454
|
+
];
|
|
455
|
+
const cache = createHybridSearchCacheStub({
|
|
456
|
+
vectorStore,
|
|
457
|
+
queryAnn: async () => [0],
|
|
458
|
+
});
|
|
459
|
+
const config = {
|
|
460
|
+
annEnabled: true,
|
|
461
|
+
annMinCandidates: 0,
|
|
462
|
+
annMaxCandidates: 10,
|
|
463
|
+
annCandidateMultiplier: 1,
|
|
464
|
+
semanticWeight: 1,
|
|
465
|
+
exactMatchBoost: 1,
|
|
466
|
+
recencyBoost: 0,
|
|
467
|
+
callGraphEnabled: false,
|
|
468
|
+
callGraphBoost: 0,
|
|
469
|
+
searchDirectory: process.cwd(),
|
|
470
|
+
};
|
|
471
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
472
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
473
|
+
|
|
474
|
+
const { results } = await hybrid.search('match', 1);
|
|
475
|
+
|
|
476
|
+
expect(results).toHaveLength(1);
|
|
477
|
+
expect(results[0].content).toBe('no match here');
|
|
478
|
+
});
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
describe('Cache Invalidation', () => {
|
|
482
|
+
it('should clear file modification times', () => {
|
|
483
|
+
fixtures.hybridSearch.fileModTimes.set('a.js', 123);
|
|
484
|
+
fixtures.hybridSearch.clearFileModTime('a.js');
|
|
485
|
+
expect(fixtures.hybridSearch.fileModTimes.has('a.js')).toBe(false);
|
|
486
|
+
});
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
describe('Recency Boost', () => {
|
|
490
|
+
it('should apply recency boost using default decay days', async () => {
|
|
491
|
+
const vectorStore = [
|
|
492
|
+
{
|
|
493
|
+
file: 'recent.js',
|
|
494
|
+
content: 'recent',
|
|
495
|
+
vector: [1, 0],
|
|
496
|
+
startLine: 1,
|
|
497
|
+
endLine: 1,
|
|
498
|
+
},
|
|
499
|
+
];
|
|
500
|
+
const cache = createHybridSearchCacheStub({
|
|
501
|
+
vectorStore,
|
|
502
|
+
queryAnn: async () => null,
|
|
503
|
+
getFileMeta: () => null,
|
|
504
|
+
});
|
|
505
|
+
const config = {
|
|
506
|
+
annEnabled: false,
|
|
507
|
+
semanticWeight: 1,
|
|
508
|
+
exactMatchBoost: 0,
|
|
509
|
+
recencyBoost: 0.5,
|
|
510
|
+
recencyDecayDays: 0,
|
|
511
|
+
callGraphEnabled: false,
|
|
512
|
+
callGraphBoost: 0,
|
|
513
|
+
searchDirectory: process.cwd(),
|
|
514
|
+
};
|
|
515
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
516
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
517
|
+
hybrid.fileModTimes.set('recent.js', Date.now());
|
|
518
|
+
|
|
519
|
+
const { results } = await hybrid.search('recent', 1);
|
|
520
|
+
|
|
521
|
+
expect(results[0].score).toBeCloseTo(1.5, 3);
|
|
522
|
+
});
|
|
523
|
+
|
|
524
|
+
it('should apply recency boost with custom decay days', async () => {
|
|
525
|
+
const vectorStore = [
|
|
526
|
+
{
|
|
527
|
+
file: 'older.js',
|
|
528
|
+
content: 'older',
|
|
529
|
+
vector: [1, 0],
|
|
530
|
+
startLine: 1,
|
|
531
|
+
endLine: 1,
|
|
532
|
+
},
|
|
533
|
+
];
|
|
534
|
+
const cache = createHybridSearchCacheStub({
|
|
535
|
+
vectorStore,
|
|
536
|
+
queryAnn: async () => null,
|
|
537
|
+
getFileMeta: () => null,
|
|
538
|
+
});
|
|
539
|
+
const config = {
|
|
540
|
+
annEnabled: false,
|
|
541
|
+
semanticWeight: 1,
|
|
542
|
+
exactMatchBoost: 0,
|
|
543
|
+
recencyBoost: 0.5,
|
|
544
|
+
recencyDecayDays: 10,
|
|
545
|
+
callGraphEnabled: false,
|
|
546
|
+
callGraphBoost: 0,
|
|
547
|
+
searchDirectory: process.cwd(),
|
|
548
|
+
};
|
|
549
|
+
const embedder = async () => ({ data: new Float32Array([1, 0]) });
|
|
550
|
+
const hybrid = new HybridSearch(embedder, cache, config);
|
|
551
|
+
hybrid.fileModTimes.set('older.js', Date.now() - 5 * 24 * 60 * 60 * 1000);
|
|
552
|
+
|
|
553
|
+
const { results } = await hybrid.search('older', 1);
|
|
554
|
+
|
|
555
|
+
expect(results[0].score).toBeGreaterThan(1);
|
|
556
|
+
});
|
|
557
|
+
});
|
|
161
558
|
});
|
|
162
559
|
|
|
163
560
|
describe('Hybrid Search Tool Handler', () => {
|
|
164
561
|
let fixtures;
|
|
165
|
-
|
|
562
|
+
|
|
166
563
|
beforeAll(async () => {
|
|
167
564
|
fixtures = await createTestFixtures({ workerThreads: 1 });
|
|
168
|
-
|
|
565
|
+
|
|
169
566
|
// Ensure indexed content
|
|
170
567
|
await fixtures.indexer.indexAll(false);
|
|
171
568
|
});
|
|
172
|
-
|
|
569
|
+
|
|
173
570
|
afterAll(async () => {
|
|
174
571
|
await cleanupFixtures(fixtures);
|
|
175
572
|
});
|
|
@@ -177,7 +574,7 @@ describe('Hybrid Search Tool Handler', () => {
|
|
|
177
574
|
describe('Tool Definition', () => {
|
|
178
575
|
it('should have correct tool definition', () => {
|
|
179
576
|
const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
|
|
180
|
-
|
|
577
|
+
|
|
181
578
|
expect(toolDef.name).toBe('a_semantic_search');
|
|
182
579
|
expect(toolDef.description).toContain('semantic');
|
|
183
580
|
expect(toolDef.description).toContain('hybrid');
|
|
@@ -185,59 +582,75 @@ describe('Hybrid Search Tool Handler', () => {
|
|
|
185
582
|
expect(toolDef.inputSchema.properties.maxResults).toBeDefined();
|
|
186
583
|
expect(toolDef.inputSchema.required).toContain('query');
|
|
187
584
|
});
|
|
188
|
-
|
|
585
|
+
|
|
189
586
|
it('should use config default for maxResults', () => {
|
|
190
587
|
const toolDef = HybridSearchFeature.getToolDefinition(fixtures.config);
|
|
191
|
-
|
|
588
|
+
|
|
192
589
|
expect(toolDef.inputSchema.properties.maxResults.default).toBe(fixtures.config.maxResults);
|
|
193
590
|
});
|
|
194
591
|
});
|
|
195
592
|
|
|
196
593
|
describe('Tool Handler', () => {
|
|
197
594
|
it('should return search results for valid query', async () => {
|
|
198
|
-
const request = createMockRequest('a_semantic_search', {
|
|
199
|
-
query: 'function that handles indexing'
|
|
595
|
+
const request = createMockRequest('a_semantic_search', {
|
|
596
|
+
query: 'function that handles indexing',
|
|
200
597
|
});
|
|
201
|
-
|
|
598
|
+
|
|
202
599
|
const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
|
|
203
|
-
|
|
600
|
+
|
|
204
601
|
expect(result.content[0].type).toBe('text');
|
|
205
602
|
expect(result.content[0].text).toContain('Result');
|
|
206
603
|
});
|
|
207
|
-
|
|
604
|
+
|
|
208
605
|
it('should use default maxResults when not provided', async () => {
|
|
209
|
-
const request = createMockRequest('a_semantic_search', {
|
|
210
|
-
query: 'import'
|
|
606
|
+
const request = createMockRequest('a_semantic_search', {
|
|
607
|
+
query: 'import',
|
|
211
608
|
});
|
|
212
|
-
|
|
609
|
+
|
|
213
610
|
const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
|
|
214
|
-
|
|
611
|
+
|
|
215
612
|
// Should return results (up to default max)
|
|
216
613
|
expect(result.content[0].text.length).toBeGreaterThan(0);
|
|
217
614
|
});
|
|
218
|
-
|
|
615
|
+
|
|
219
616
|
it('should respect custom maxResults', async () => {
|
|
220
|
-
const request = createMockRequest('a_semantic_search', {
|
|
617
|
+
const request = createMockRequest('a_semantic_search', {
|
|
221
618
|
query: 'const',
|
|
222
|
-
maxResults: 2
|
|
619
|
+
maxResults: 2,
|
|
223
620
|
});
|
|
224
|
-
|
|
621
|
+
|
|
225
622
|
const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
|
|
226
|
-
|
|
623
|
+
|
|
227
624
|
// Count result headers
|
|
228
625
|
const resultCount = (result.content[0].text.match(/## Result/g) || []).length;
|
|
229
626
|
expect(resultCount).toBeLessThanOrEqual(2);
|
|
230
627
|
});
|
|
231
|
-
|
|
628
|
+
|
|
232
629
|
it('should handle queries with no matches gracefully', async () => {
|
|
233
|
-
const request = createMockRequest('a_semantic_search', {
|
|
234
|
-
query: 'xyzzy_nonexistent_symbol_12345'
|
|
630
|
+
const request = createMockRequest('a_semantic_search', {
|
|
631
|
+
query: 'xyzzy_nonexistent_symbol_12345',
|
|
235
632
|
});
|
|
236
|
-
|
|
633
|
+
|
|
237
634
|
const result = await HybridSearchFeature.handleToolCall(request, fixtures.hybridSearch);
|
|
238
|
-
|
|
635
|
+
|
|
239
636
|
// Should return something (either no matches message or low-score results)
|
|
240
637
|
expect(result.content[0].text.length).toBeGreaterThan(0);
|
|
241
638
|
});
|
|
639
|
+
|
|
640
|
+
it('should return message when no indexed data exists', async () => {
|
|
641
|
+
const emptyCache = createHybridSearchCacheStub({
|
|
642
|
+
vectorStore: [],
|
|
643
|
+
getVector: () => null,
|
|
644
|
+
getChunk: () => null,
|
|
645
|
+
});
|
|
646
|
+
const emptySearch = new HybridSearch(fixtures.embedder, emptyCache, fixtures.config);
|
|
647
|
+
const request = createMockRequest('a_semantic_search', {
|
|
648
|
+
query: 'anything',
|
|
649
|
+
});
|
|
650
|
+
|
|
651
|
+
const result = await HybridSearchFeature.handleToolCall(request, emptySearch);
|
|
652
|
+
|
|
653
|
+
expect(result.content[0].text).toContain('No code has been indexed');
|
|
654
|
+
});
|
|
242
655
|
});
|
|
243
656
|
});
|