@softerist/heuristic-mcp 3.0.17 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.jsonc +23 -6
- package/features/ann-config.js +7 -14
- package/features/clear-cache.js +3 -3
- package/features/find-similar-code.js +17 -22
- package/features/hybrid-search.js +59 -67
- package/features/index-codebase.js +305 -268
- package/features/lifecycle.js +370 -176
- package/features/package-version.js +15 -26
- package/features/register.js +75 -57
- package/features/resources.js +21 -47
- package/features/set-workspace.js +31 -43
- package/index.js +818 -172
- package/lib/cache-utils.js +95 -99
- package/lib/cache.js +121 -166
- package/lib/cli.js +246 -238
- package/lib/config.js +232 -62
- package/lib/constants.js +22 -2
- package/lib/embed-query-process.js +13 -29
- package/lib/embedding-process.js +29 -19
- package/lib/embedding-worker.js +166 -149
- package/lib/ignore-patterns.js +39 -39
- package/lib/json-writer.js +7 -34
- package/lib/logging.js +11 -42
- package/lib/onnx-backend.js +4 -4
- package/lib/path-utils.js +4 -21
- package/lib/project-detector.js +3 -3
- package/lib/server-lifecycle.js +109 -15
- package/lib/settings-editor.js +25 -18
- package/lib/slice-normalize.js +6 -16
- package/lib/tokenizer.js +56 -109
- package/lib/utils.js +62 -81
- package/lib/vector-store-binary.js +7 -7
- package/lib/vector-store-sqlite.js +35 -67
- package/lib/workspace-cache-key.js +36 -0
- package/lib/workspace-env.js +55 -14
- package/package.json +86 -86
package/config.jsonc
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
// Root directory to index (relative or absolute).
|
|
3
|
+
// Keep "." for dynamic workspace behavior so MCP roots / IDE workspace selection works.
|
|
3
4
|
"searchDirectory": ".",
|
|
4
5
|
// File extensions to include (without dot).
|
|
5
6
|
"fileExtensions": [
|
|
@@ -627,7 +628,7 @@
|
|
|
627
628
|
"**/.smart-coding-cache/**",
|
|
628
629
|
],
|
|
629
630
|
// Indexing controls.
|
|
630
|
-
"indexing": {
|
|
631
|
+
"indexing": {
|
|
631
632
|
// Enable project-type detection + smart ignore patterns.
|
|
632
633
|
"smartIndexing": true,
|
|
633
634
|
// Lines per chunk.
|
|
@@ -642,14 +643,27 @@
|
|
|
642
643
|
"maxResults": 5,
|
|
643
644
|
// Enable file watcher for incremental indexing.
|
|
644
645
|
"watchFiles": true,
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
646
|
+
// Save incremental index checkpoints every 2s so interrupted runs can resume with minimal rework.
|
|
647
|
+
// Increase to 5000-10000 on slower disks if checkpoint writes feel too frequent.
|
|
648
|
+
"indexCheckpointIntervalMs": 2000,
|
|
649
|
+
},
|
|
650
|
+
// Server lifecycle controls.
|
|
651
|
+
// When true, a newly started server instance will terminate older heuristic-mcp server processes.
|
|
652
|
+
// This helps prevent stale workspace binding after IDE reloads/window switches.
|
|
653
|
+
"autoStopOtherServersOnStartup": true,
|
|
654
|
+
// Safety guard for IDE sessions:
|
|
655
|
+
// when true, semantic/index tools require a current trusted workspace signal
|
|
656
|
+
// (MCP roots or explicit workspace env), otherwise the call fails fast.
|
|
657
|
+
"requireTrustedWorkspaceSignalForTools": true,
|
|
658
|
+
// Logging and diagnostics.
|
|
659
|
+
"logging": {
|
|
648
660
|
// Enable verbose logging.
|
|
649
661
|
"verbose": true,
|
|
650
662
|
},
|
|
651
663
|
// Cache engine and serialization controls.
|
|
652
664
|
"cache": {
|
|
665
|
+
// NOTE: leave top-level "cacheDirectory" unset to keep per-workspace global cache hashing.
|
|
666
|
+
// This preserves resume/incremental indexing across IDE restarts.
|
|
653
667
|
// Persist embeddings between sessions.
|
|
654
668
|
"enableCache": true,
|
|
655
669
|
// Assume vectors are finite (skip validation).
|
|
@@ -680,7 +694,10 @@
|
|
|
680
694
|
"worker": {
|
|
681
695
|
// Number of embedding workers (0 disables).
|
|
682
696
|
// Windows + heavy Jina models are more stable with child-process embedding than worker pools.
|
|
683
|
-
"workerThreads":
|
|
697
|
+
"workerThreads": 1,
|
|
698
|
+
// Safety guard for heavy models on Windows.
|
|
699
|
+
// Keep true (recommended). Set false only to opt in to experimental heavy-model workers on Windows.
|
|
700
|
+
"workerDisableHeavyModelOnWindows": false,
|
|
684
701
|
// Worker batch timeout in milliseconds.
|
|
685
702
|
"workerBatchTimeoutMs": 120000,
|
|
686
703
|
// Failures before worker circuit opens.
|
|
@@ -707,7 +724,7 @@
|
|
|
707
724
|
// Override embedding batch size (null = auto).
|
|
708
725
|
"embeddingBatchSize": null,
|
|
709
726
|
// ONNX threads used by embedding child process.
|
|
710
|
-
//
|
|
727
|
+
// 8 is a practical balance on 24-thread desktop CPUs for this workspace size.
|
|
711
728
|
"embeddingProcessNumThreads": 8,
|
|
712
729
|
// Embedding-child adaptive GC RSS threshold in MB (higher = less frequent GC).
|
|
713
730
|
"embeddingProcessGcRssThresholdMb": 2048,
|
package/features/ann-config.js
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
* ANN Config Tool - Runtime tuning of ANN search parameters
|
|
3
|
-
*
|
|
4
|
-
* Allows adjusting efSearch on the fly for speed/accuracy tradeoff,
|
|
5
|
-
* and querying current ANN index statistics.
|
|
6
|
-
*/
|
|
1
|
+
|
|
7
2
|
|
|
8
3
|
export class AnnConfigTool {
|
|
9
4
|
constructor(cache, config) {
|
|
@@ -11,9 +6,7 @@ export class AnnConfigTool {
|
|
|
11
6
|
this.config = config;
|
|
12
7
|
}
|
|
13
8
|
|
|
14
|
-
|
|
15
|
-
* Adjust efSearch and optionally trigger index rebuild
|
|
16
|
-
*/
|
|
9
|
+
|
|
17
10
|
async execute(args) {
|
|
18
11
|
const action = args.action || 'stats';
|
|
19
12
|
|
|
@@ -33,7 +26,7 @@ export class AnnConfigTool {
|
|
|
33
26
|
}
|
|
34
27
|
|
|
35
28
|
if (action === 'rebuild') {
|
|
36
|
-
|
|
29
|
+
|
|
37
30
|
this.cache.invalidateAnnIndex();
|
|
38
31
|
const index = await this.cache.ensureAnnIndex();
|
|
39
32
|
return {
|
|
@@ -56,7 +49,7 @@ export class AnnConfigTool {
|
|
|
56
49
|
}
|
|
57
50
|
|
|
58
51
|
if (result.enabled !== undefined) {
|
|
59
|
-
|
|
52
|
+
|
|
60
53
|
let output = '## ANN Index Statistics\n\n';
|
|
61
54
|
output += `- **Enabled**: ${result.enabled}\n`;
|
|
62
55
|
output += `- **Index Loaded**: ${result.indexLoaded}\n`;
|
|
@@ -79,12 +72,12 @@ export class AnnConfigTool {
|
|
|
79
72
|
return output;
|
|
80
73
|
}
|
|
81
74
|
|
|
82
|
-
|
|
75
|
+
|
|
83
76
|
return JSON.stringify(result, null, 2);
|
|
84
77
|
}
|
|
85
78
|
}
|
|
86
79
|
|
|
87
|
-
|
|
80
|
+
|
|
88
81
|
export function getToolDefinition() {
|
|
89
82
|
return {
|
|
90
83
|
name: 'd_ann_config',
|
|
@@ -119,7 +112,7 @@ export function getToolDefinition() {
|
|
|
119
112
|
};
|
|
120
113
|
}
|
|
121
114
|
|
|
122
|
-
|
|
115
|
+
|
|
123
116
|
export async function handleToolCall(request, annConfigTool) {
|
|
124
117
|
const args = request.params.arguments || {};
|
|
125
118
|
const result = await annConfigTool.execute(args);
|
package/features/clear-cache.js
CHANGED
|
@@ -7,21 +7,21 @@ export class CacheClearer {
|
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
async execute() {
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
if (this.indexer && this.indexer.isIndexing) {
|
|
12
12
|
throw new Error(
|
|
13
13
|
'Cannot clear cache while indexing is in progress. Please wait for indexing to complete.'
|
|
14
14
|
);
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
if (this.cache.isSaving) {
|
|
19
19
|
throw new Error(
|
|
20
20
|
'Cannot clear cache while cache is being saved. Please try again in a moment.'
|
|
21
21
|
);
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
if (this.isClearing) {
|
|
26
26
|
throw new Error('Cache clear operation already in progress. Please wait for it to complete.');
|
|
27
27
|
}
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
* FindSimilarCode feature
|
|
6
|
-
* Given a code snippet, finds similar patterns elsewhere in the codebase
|
|
7
|
-
*/
|
|
4
|
+
|
|
8
5
|
export class FindSimilarCode {
|
|
9
6
|
constructor(embedder, cache, config) {
|
|
10
7
|
this.embedder = embedder;
|
|
@@ -62,15 +59,15 @@ export class FindSimilarCode {
|
|
|
62
59
|
let codeToEmbed = code;
|
|
63
60
|
let warningMessage = null;
|
|
64
61
|
|
|
65
|
-
|
|
62
|
+
|
|
66
63
|
const estimatedTokens = estimateTokens(code);
|
|
67
64
|
const limit = getModelTokenLimit(this.config.embeddingModel);
|
|
68
65
|
|
|
69
|
-
|
|
66
|
+
|
|
70
67
|
if (estimatedTokens > limit) {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
74
71
|
const chunks = smartChunk(code, 'input.txt', this.config);
|
|
75
72
|
if (chunks.length > 0) {
|
|
76
73
|
codeToEmbed = chunks[0].text;
|
|
@@ -78,14 +75,14 @@ export class FindSimilarCode {
|
|
|
78
75
|
}
|
|
79
76
|
}
|
|
80
77
|
|
|
81
|
-
|
|
78
|
+
|
|
82
79
|
const codeEmbed = await this.embedder(codeToEmbed, {
|
|
83
80
|
pooling: 'mean',
|
|
84
81
|
normalize: true,
|
|
85
82
|
});
|
|
86
83
|
|
|
87
|
-
|
|
88
|
-
|
|
84
|
+
|
|
85
|
+
|
|
89
86
|
let codeVector;
|
|
90
87
|
try {
|
|
91
88
|
codeVector = new Float32Array(codeEmbed.data);
|
|
@@ -94,7 +91,7 @@ export class FindSimilarCode {
|
|
|
94
91
|
try {
|
|
95
92
|
codeEmbed.dispose();
|
|
96
93
|
} catch {
|
|
97
|
-
|
|
94
|
+
|
|
98
95
|
}
|
|
99
96
|
}
|
|
100
97
|
}
|
|
@@ -120,9 +117,7 @@ export class FindSimilarCode {
|
|
|
120
117
|
const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
|
|
121
118
|
const normalizedInput = normalizeText(codeToEmbed);
|
|
122
119
|
|
|
123
|
-
|
|
124
|
-
* Batch scoring function to prevent blocking the event loop
|
|
125
|
-
*/
|
|
120
|
+
|
|
126
121
|
const scoreAndFilter = async (chunks) => {
|
|
127
122
|
const BATCH_SIZE = 500;
|
|
128
123
|
const scored = [];
|
|
@@ -130,7 +125,7 @@ export class FindSimilarCode {
|
|
|
130
125
|
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
131
126
|
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
132
127
|
|
|
133
|
-
|
|
128
|
+
|
|
134
129
|
if (i > 0) {
|
|
135
130
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
136
131
|
}
|
|
@@ -159,14 +154,14 @@ export class FindSimilarCode {
|
|
|
159
154
|
|
|
160
155
|
let filteredResults = await scoreAndFilter(candidates);
|
|
161
156
|
|
|
162
|
-
|
|
163
|
-
|
|
157
|
+
|
|
158
|
+
|
|
164
159
|
const MAX_FULL_SCAN_SIZE = 5000;
|
|
165
160
|
if (usedAnn && filteredResults.length < safeMaxResults) {
|
|
166
161
|
if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
|
|
167
162
|
filteredResults = await scoreAndFilter(vectorStore);
|
|
168
163
|
} else {
|
|
169
|
-
|
|
164
|
+
|
|
170
165
|
}
|
|
171
166
|
}
|
|
172
167
|
const results = [];
|
|
@@ -220,7 +215,7 @@ export class FindSimilarCode {
|
|
|
220
215
|
}
|
|
221
216
|
}
|
|
222
217
|
|
|
223
|
-
|
|
218
|
+
|
|
224
219
|
export function getToolDefinition(_config) {
|
|
225
220
|
return {
|
|
226
221
|
name: 'd_find_similar_code',
|
|
@@ -256,7 +251,7 @@ export function getToolDefinition(_config) {
|
|
|
256
251
|
};
|
|
257
252
|
}
|
|
258
253
|
|
|
259
|
-
|
|
254
|
+
|
|
260
255
|
export async function handleToolCall(request, findSimilarCode) {
|
|
261
256
|
const args = request.params?.arguments || {};
|
|
262
257
|
const code = args.code;
|
|
@@ -14,8 +14,8 @@ export class HybridSearch {
|
|
|
14
14
|
this.embedder = embedder;
|
|
15
15
|
this.cache = cache;
|
|
16
16
|
this.config = config;
|
|
17
|
-
this.fileModTimes = new Map();
|
|
18
|
-
this._lastAccess = new Map();
|
|
17
|
+
this.fileModTimes = new Map();
|
|
18
|
+
this._lastAccess = new Map();
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
async getChunkContent(chunkOrIndex) {
|
|
@@ -41,16 +41,16 @@ export class HybridSearch {
|
|
|
41
41
|
|
|
42
42
|
for (const file of uniqueFiles) {
|
|
43
43
|
if (!this.fileModTimes.has(file)) {
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
const meta = this.cache.getFileMeta(file);
|
|
46
46
|
if (meta && typeof meta.mtimeMs === 'number') {
|
|
47
47
|
this.fileModTimes.set(file, meta.mtimeMs);
|
|
48
|
-
this._lastAccess.set(file, Date.now());
|
|
48
|
+
this._lastAccess.set(file, Date.now());
|
|
49
49
|
} else {
|
|
50
50
|
missing.push(file);
|
|
51
51
|
}
|
|
52
52
|
} else {
|
|
53
|
-
this._lastAccess.set(file, Date.now());
|
|
53
|
+
this._lastAccess.set(file, Date.now());
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
|
|
@@ -58,8 +58,8 @@ export class HybridSearch {
|
|
|
58
58
|
return;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
|
|
62
|
+
|
|
63
63
|
const workerCount = Math.min(STAT_CONCURRENCY_LIMIT, missing.length);
|
|
64
64
|
|
|
65
65
|
const worker = async (startIdx) => {
|
|
@@ -77,16 +77,16 @@ export class HybridSearch {
|
|
|
77
77
|
|
|
78
78
|
await Promise.all(Array.from({ length: workerCount }, (_, i) => worker(i)));
|
|
79
79
|
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
const lruMaxEntries = this.config.lruMaxEntries ?? 5000;
|
|
82
82
|
const lruTargetEntries = this.config.lruTargetEntries ?? 4000;
|
|
83
83
|
if (this.fileModTimes.size > lruMaxEntries) {
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
const entries = [...this.fileModTimes.keys()].map((k) => ({
|
|
86
86
|
key: k,
|
|
87
87
|
lastAccess: this._lastAccess?.get(k) ?? 0,
|
|
88
88
|
}));
|
|
89
|
-
entries.sort((a, b) => a.lastAccess - b.lastAccess);
|
|
89
|
+
entries.sort((a, b) => a.lastAccess - b.lastAccess);
|
|
90
90
|
const toEvict = entries.slice(0, entries.length - lruTargetEntries);
|
|
91
91
|
for (const { key } of toEvict) {
|
|
92
92
|
this.fileModTimes.delete(key);
|
|
@@ -95,20 +95,12 @@ export class HybridSearch {
|
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
clearFileModTime(file) {
|
|
100
100
|
this.fileModTimes.delete(file);
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
* Search the indexed codebase for relevant code snippets.
|
|
105
|
-
* Uses a hybrid approach combining semantic similarity (via embeddings) with
|
|
106
|
-
* keyword matching for optimal results.
|
|
107
|
-
* @param {string} query - Natural language or keyword search query
|
|
108
|
-
* @param {number} maxResults - Maximum number of results to return (default: 15)
|
|
109
|
-
* @returns {Promise<{results: Array<{file: string, startLine: number, endLine: number, content: string, score: number}>, message?: string}>}
|
|
110
|
-
* @throws {Error} If embedder is not initialized
|
|
111
|
-
*/
|
|
103
|
+
|
|
112
104
|
async search(query, maxResults) {
|
|
113
105
|
try {
|
|
114
106
|
if (typeof this.cache.ensureLoaded === 'function') {
|
|
@@ -125,19 +117,19 @@ export class HybridSearch {
|
|
|
125
117
|
};
|
|
126
118
|
}
|
|
127
119
|
|
|
128
|
-
|
|
120
|
+
|
|
129
121
|
if (this.config.verbose) {
|
|
130
122
|
console.info(`[Search] Query: "${query}"`);
|
|
131
123
|
}
|
|
132
124
|
|
|
133
125
|
let queryVector;
|
|
134
126
|
|
|
135
|
-
|
|
136
|
-
|
|
127
|
+
|
|
128
|
+
|
|
137
129
|
if (this.config.unloadModelAfterSearch) {
|
|
138
130
|
queryVector = await embedQueryInChildProcess(query, this.config);
|
|
139
131
|
} else {
|
|
140
|
-
|
|
132
|
+
|
|
141
133
|
const queryEmbed = await this.embedder(query, {
|
|
142
134
|
pooling: 'mean',
|
|
143
135
|
normalize: true,
|
|
@@ -150,13 +142,13 @@ export class HybridSearch {
|
|
|
150
142
|
try {
|
|
151
143
|
queryEmbed.dispose();
|
|
152
144
|
} catch {
|
|
153
|
-
|
|
145
|
+
|
|
154
146
|
}
|
|
155
147
|
}
|
|
156
148
|
}
|
|
157
149
|
}
|
|
158
150
|
|
|
159
|
-
let candidateIndices = null;
|
|
151
|
+
let candidateIndices = null;
|
|
160
152
|
let usedAnn = false;
|
|
161
153
|
|
|
162
154
|
if (this.config.annEnabled) {
|
|
@@ -167,7 +159,7 @@ export class HybridSearch {
|
|
|
167
159
|
if (this.config.verbose) {
|
|
168
160
|
console.info(`[Search] Using ANN index (${annLabels.length} candidates)`);
|
|
169
161
|
}
|
|
170
|
-
candidateIndices = Array.from(new Set(annLabels));
|
|
162
|
+
candidateIndices = Array.from(new Set(annLabels));
|
|
171
163
|
}
|
|
172
164
|
}
|
|
173
165
|
|
|
@@ -183,7 +175,7 @@ export class HybridSearch {
|
|
|
183
175
|
`[Search] ANN returned fewer results (${candidateIndices.length}) than requested (${maxResults}), augmenting with full scan...`
|
|
184
176
|
);
|
|
185
177
|
}
|
|
186
|
-
candidateIndices = null;
|
|
178
|
+
candidateIndices = null;
|
|
187
179
|
usedAnn = false;
|
|
188
180
|
}
|
|
189
181
|
|
|
@@ -202,38 +194,38 @@ export class HybridSearch {
|
|
|
202
194
|
}
|
|
203
195
|
|
|
204
196
|
if (exactMatchCount < maxResults) {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
208
200
|
const MAX_FULL_SCAN_SIZE = this.config.fullScanThreshold ?? 2000;
|
|
209
201
|
|
|
210
202
|
if (storeSize <= MAX_FULL_SCAN_SIZE) {
|
|
211
203
|
const seen = new Set(candidateIndices);
|
|
212
204
|
|
|
213
|
-
|
|
214
|
-
|
|
205
|
+
|
|
206
|
+
|
|
215
207
|
const FALLBACK_BATCH = 100;
|
|
216
208
|
let additionalMatches = 0;
|
|
217
209
|
const targetMatches = maxResults - exactMatchCount;
|
|
218
210
|
|
|
219
211
|
outerLoop:
|
|
220
212
|
for (let i = 0; i < storeSize; i += FALLBACK_BATCH) {
|
|
221
|
-
if (i > 0) await new Promise((r) => setTimeout(r, 0));
|
|
213
|
+
if (i > 0) await new Promise((r) => setTimeout(r, 0));
|
|
222
214
|
|
|
223
215
|
const limit = Math.min(storeSize, i + FALLBACK_BATCH);
|
|
224
216
|
|
|
225
|
-
|
|
217
|
+
|
|
226
218
|
const batchIndices = [];
|
|
227
219
|
for (let j = i; j < limit; j++) {
|
|
228
220
|
if (!seen.has(j)) batchIndices.push(j);
|
|
229
221
|
}
|
|
230
222
|
|
|
231
|
-
|
|
223
|
+
|
|
232
224
|
const contents = await Promise.all(
|
|
233
225
|
batchIndices.map(idx => this.getChunkContent(idx))
|
|
234
226
|
);
|
|
235
227
|
|
|
236
|
-
|
|
228
|
+
|
|
237
229
|
for (let k = 0; k < batchIndices.length; k++) {
|
|
238
230
|
const content = contents[k];
|
|
239
231
|
if (content && content.toLowerCase().includes(lowerQuery)) {
|
|
@@ -241,7 +233,7 @@ export class HybridSearch {
|
|
|
241
233
|
seen.add(idx);
|
|
242
234
|
candidateIndices.push(idx);
|
|
243
235
|
additionalMatches++;
|
|
244
|
-
|
|
236
|
+
|
|
245
237
|
if (additionalMatches >= targetMatches) break outerLoop;
|
|
246
238
|
}
|
|
247
239
|
}
|
|
@@ -254,7 +246,7 @@ export class HybridSearch {
|
|
|
254
246
|
}
|
|
255
247
|
}
|
|
256
248
|
|
|
257
|
-
|
|
249
|
+
|
|
258
250
|
let recencyBoostEnabled = this.config.recencyBoost > 0;
|
|
259
251
|
let now = Date.now();
|
|
260
252
|
let recencyDecayMs = (this.config.recencyDecayDays || 30) * 24 * 60 * 60 * 1000;
|
|
@@ -266,13 +258,13 @@ export class HybridSearch {
|
|
|
266
258
|
const candidates = candidateIndices
|
|
267
259
|
? candidateIndices.map((idx) => this.cache.getChunk(idx)).filter(Boolean)
|
|
268
260
|
: Array.from({ length: storeSize }, (_, i) => this.cache.getChunk(i)).filter(Boolean);
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
|
|
272
264
|
if (candidates.length <= 1000) {
|
|
273
265
|
await this.populateFileModTimes(candidates.map((chunk) => chunk.file));
|
|
274
266
|
} else {
|
|
275
|
-
|
|
267
|
+
|
|
276
268
|
for (const chunk of candidates) {
|
|
277
269
|
if (!this.fileModTimes.has(chunk.file)) {
|
|
278
270
|
const meta = this.cache.getFileMeta(chunk.file);
|
|
@@ -284,11 +276,11 @@ export class HybridSearch {
|
|
|
284
276
|
}
|
|
285
277
|
}
|
|
286
278
|
|
|
287
|
-
|
|
279
|
+
|
|
288
280
|
const scoredChunks = [];
|
|
289
281
|
|
|
290
|
-
|
|
291
|
-
|
|
282
|
+
|
|
283
|
+
|
|
292
284
|
const totalCandidates = candidateIndices ? candidateIndices.length : storeSize;
|
|
293
285
|
const textMatchMaxCandidates = Number.isInteger(this.config.textMatchMaxCandidates)
|
|
294
286
|
? this.config.textMatchMaxCandidates
|
|
@@ -297,7 +289,7 @@ export class HybridSearch {
|
|
|
297
289
|
const deferTextMatch = shouldApplyTextMatch && totalCandidates > textMatchMaxCandidates;
|
|
298
290
|
|
|
299
291
|
for (let i = 0; i < totalCandidates; i += SEARCH_BATCH_SIZE) {
|
|
300
|
-
|
|
292
|
+
|
|
301
293
|
if (i > 0) {
|
|
302
294
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
303
295
|
}
|
|
@@ -307,25 +299,25 @@ export class HybridSearch {
|
|
|
307
299
|
for (let j = i; j < limit; j++) {
|
|
308
300
|
const idx = candidateIndices ? candidateIndices[j] : j;
|
|
309
301
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
|
|
313
305
|
const chunkInfo = this.cache.getChunk(idx);
|
|
314
306
|
if (!chunkInfo) {
|
|
315
|
-
|
|
307
|
+
|
|
316
308
|
continue;
|
|
317
309
|
}
|
|
318
310
|
|
|
319
|
-
|
|
311
|
+
|
|
320
312
|
const vector = this.cache.getChunkVector(chunkInfo, idx);
|
|
321
313
|
if (!vector) continue;
|
|
322
314
|
|
|
323
|
-
|
|
315
|
+
|
|
324
316
|
let score;
|
|
325
317
|
try {
|
|
326
318
|
score = dotSimilarity(queryVector, vector) * semanticWeight;
|
|
327
319
|
} catch (err) {
|
|
328
|
-
|
|
320
|
+
|
|
329
321
|
if (this.config.verbose) {
|
|
330
322
|
console.warn(`[Search] ${err.message} at index ${idx}`);
|
|
331
323
|
}
|
|
@@ -340,7 +332,7 @@ export class HybridSearch {
|
|
|
340
332
|
if (lowerContent && lowerContent.includes(lowerQuery)) {
|
|
341
333
|
score += exactMatchBoost;
|
|
342
334
|
} else if (lowerContent && queryWordCount > 0) {
|
|
343
|
-
|
|
335
|
+
|
|
344
336
|
let matchedWords = 0;
|
|
345
337
|
for (let k = 0; k < queryWordCount; k++) {
|
|
346
338
|
if (lowerContent.includes(queryWords[k])) matchedWords++;
|
|
@@ -349,7 +341,7 @@ export class HybridSearch {
|
|
|
349
341
|
}
|
|
350
342
|
}
|
|
351
343
|
|
|
352
|
-
|
|
344
|
+
|
|
353
345
|
if (recencyBoostEnabled) {
|
|
354
346
|
const mtime = this.fileModTimes.get(chunkInfo.file);
|
|
355
347
|
if (typeof mtime === 'number') {
|
|
@@ -367,10 +359,10 @@ export class HybridSearch {
|
|
|
367
359
|
}
|
|
368
360
|
}
|
|
369
361
|
|
|
370
|
-
|
|
362
|
+
|
|
371
363
|
scoredChunks.sort((a, b) => b.score - a.score);
|
|
372
364
|
|
|
373
|
-
|
|
365
|
+
|
|
374
366
|
if (deferTextMatch) {
|
|
375
367
|
const textMatchCount = Math.min(textMatchMaxCandidates, scoredChunks.length);
|
|
376
368
|
for (let i = 0; i < textMatchCount; i++) {
|
|
@@ -395,9 +387,9 @@ export class HybridSearch {
|
|
|
395
387
|
scoredChunks.sort((a, b) => b.score - a.score);
|
|
396
388
|
}
|
|
397
389
|
|
|
398
|
-
|
|
390
|
+
|
|
399
391
|
if (this.config.callGraphEnabled && this.config.callGraphBoost > 0) {
|
|
400
|
-
|
|
392
|
+
|
|
401
393
|
const topN = Math.min(5, scoredChunks.length);
|
|
402
394
|
const symbolsFromTop = new Set();
|
|
403
395
|
for (let i = 0; i < topN; i++) {
|
|
@@ -409,22 +401,22 @@ export class HybridSearch {
|
|
|
409
401
|
}
|
|
410
402
|
|
|
411
403
|
if (symbolsFromTop.size > 0) {
|
|
412
|
-
|
|
404
|
+
|
|
413
405
|
const relatedFiles = await this.cache.getRelatedFiles(Array.from(symbolsFromTop));
|
|
414
406
|
|
|
415
|
-
|
|
407
|
+
|
|
416
408
|
for (const chunk of scoredChunks) {
|
|
417
409
|
const proximity = relatedFiles.get(chunk.file);
|
|
418
410
|
if (proximity) {
|
|
419
411
|
chunk.score += proximity * this.config.callGraphBoost;
|
|
420
412
|
}
|
|
421
413
|
}
|
|
422
|
-
|
|
414
|
+
|
|
423
415
|
scoredChunks.sort((a, b) => b.score - a.score);
|
|
424
416
|
}
|
|
425
417
|
}
|
|
426
418
|
|
|
427
|
-
|
|
419
|
+
|
|
428
420
|
const results = await Promise.all(
|
|
429
421
|
scoredChunks.slice(0, maxResults).map(async (chunk) => {
|
|
430
422
|
if (chunk.content === undefined || chunk.content === null) {
|
|
@@ -478,7 +470,7 @@ export class HybridSearch {
|
|
|
478
470
|
}
|
|
479
471
|
}
|
|
480
472
|
|
|
481
|
-
|
|
473
|
+
|
|
482
474
|
export function getToolDefinition(config) {
|
|
483
475
|
return {
|
|
484
476
|
name: 'a_semantic_search',
|
|
@@ -510,12 +502,12 @@ export function getToolDefinition(config) {
|
|
|
510
502
|
};
|
|
511
503
|
}
|
|
512
504
|
|
|
513
|
-
|
|
505
|
+
|
|
514
506
|
export async function handleToolCall(request, hybridSearch) {
|
|
515
507
|
const args = request.params?.arguments || {};
|
|
516
508
|
const query = args.query;
|
|
517
509
|
|
|
518
|
-
|
|
510
|
+
|
|
519
511
|
if (typeof query !== 'string' || query.trim().length === 0) {
|
|
520
512
|
return {
|
|
521
513
|
content: [{ type: 'text', text: 'Error: A non-empty query string is required.' }],
|