@softerist/heuristic-mcp 3.2.2 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -376
- package/config.jsonc +800 -800
- package/features/ann-config.js +102 -110
- package/features/clear-cache.js +81 -84
- package/features/find-similar-code.js +265 -286
- package/features/hybrid-search.js +487 -536
- package/features/index-codebase.js +3139 -3270
- package/features/lifecycle.js +1041 -1063
- package/features/package-version.js +277 -291
- package/features/register.js +351 -370
- package/features/resources.js +115 -130
- package/features/set-workspace.js +214 -240
- package/index.js +742 -762
- package/lib/cache-ops.js +22 -22
- package/lib/cache-utils.js +465 -519
- package/lib/cache.js +1699 -1767
- package/lib/call-graph.js +396 -396
- package/lib/cli.js +232 -226
- package/lib/config.js +1483 -1495
- package/lib/constants.js +511 -492
- package/lib/embed-query-process.js +206 -212
- package/lib/embedding-process.js +434 -451
- package/lib/embedding-worker.js +862 -934
- package/lib/ignore-patterns.js +276 -316
- package/lib/json-worker.js +14 -14
- package/lib/json-writer.js +302 -310
- package/lib/logging.js +116 -127
- package/lib/memory-logger.js +13 -13
- package/lib/onnx-backend.js +188 -193
- package/lib/path-utils.js +18 -23
- package/lib/project-detector.js +82 -84
- package/lib/server-lifecycle.js +133 -145
- package/lib/settings-editor.js +738 -739
- package/lib/slice-normalize.js +25 -31
- package/lib/tokenizer.js +168 -203
- package/lib/utils.js +364 -409
- package/lib/vector-store-binary.js +811 -591
- package/lib/vector-store-sqlite.js +377 -414
- package/lib/workspace-env.js +32 -34
- package/mcp_config.json +9 -9
- package/package.json +86 -86
- package/scripts/clear-cache.js +20 -20
- package/scripts/download-model.js +43 -43
- package/scripts/mcp-launcher.js +49 -49
- package/scripts/postinstall.js +12 -12
- package/search-configs.js +36 -36
|
@@ -1,286 +1,265 @@
|
|
|
1
|
-
import path from 'path';
|
|
2
|
-
import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
this.
|
|
8
|
-
this.
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
let
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
},
|
|
242
|
-
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
const minSimilarity =
|
|
267
|
-
typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
|
|
268
|
-
|
|
269
|
-
const { results, message } = await findSimilarCode.execute({
|
|
270
|
-
code,
|
|
271
|
-
maxResults,
|
|
272
|
-
minSimilarity,
|
|
273
|
-
});
|
|
274
|
-
|
|
275
|
-
if (message) {
|
|
276
|
-
return {
|
|
277
|
-
content: [{ type: 'text', text: message }],
|
|
278
|
-
};
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
const formattedText = await findSimilarCode.formatResults(results);
|
|
282
|
-
|
|
283
|
-
return {
|
|
284
|
-
content: [{ type: 'text', text: formattedText }],
|
|
285
|
-
};
|
|
286
|
-
}
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
|
|
3
|
+
|
|
4
|
+
export class FindSimilarCode {
|
|
5
|
+
constructor(embedder, cache, config) {
|
|
6
|
+
this.embedder = embedder;
|
|
7
|
+
this.cache = cache;
|
|
8
|
+
this.config = config;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async getChunkContent(chunk) {
|
|
12
|
+
return this.cache.getChunkContent(chunk);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
getChunkVector(chunk) {
|
|
16
|
+
return this.cache.getChunkVector(chunk);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
getAnnCandidateCount(maxResults, totalChunks) {
|
|
20
|
+
const minCandidates = this.config.annMinCandidates ?? 0;
|
|
21
|
+
const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
|
|
22
|
+
const multiplier = this.config.annCandidateMultiplier ?? 1;
|
|
23
|
+
const desired = Math.max(minCandidates, Math.ceil(maxResults * multiplier));
|
|
24
|
+
const capped = Math.min(maxCandidates, desired);
|
|
25
|
+
return Math.min(totalChunks, Math.max(maxResults, capped));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
|
|
29
|
+
if (typeof code !== 'string' || code.trim().length === 0) {
|
|
30
|
+
return {
|
|
31
|
+
results: [],
|
|
32
|
+
message: 'Error: A non-empty code string is required.',
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
const safeMaxResults =
|
|
36
|
+
Number.isFinite(maxResults) && maxResults > 0 ? Math.floor(maxResults) : 5;
|
|
37
|
+
const safeMinSimilarity = Number.isFinite(minSimilarity)
|
|
38
|
+
? Math.min(1, Math.max(0, minSimilarity))
|
|
39
|
+
: 0.3;
|
|
40
|
+
|
|
41
|
+
if (typeof this.cache.ensureLoaded === 'function') {
|
|
42
|
+
await this.cache.ensureLoaded();
|
|
43
|
+
}
|
|
44
|
+
if (typeof this.cache.startRead === 'function') {
|
|
45
|
+
this.cache.startRead();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
const vectorStore = this.cache.getVectorStore();
|
|
50
|
+
|
|
51
|
+
if (vectorStore.length === 0) {
|
|
52
|
+
return {
|
|
53
|
+
results: [],
|
|
54
|
+
message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
let codeToEmbed = code;
|
|
59
|
+
let warningMessage = null;
|
|
60
|
+
|
|
61
|
+
const estimatedTokens = estimateTokens(code);
|
|
62
|
+
const limit = getModelTokenLimit(this.config.embeddingModel);
|
|
63
|
+
|
|
64
|
+
if (estimatedTokens > limit) {
|
|
65
|
+
const chunks = smartChunk(code, 'input.txt', this.config);
|
|
66
|
+
if (chunks.length > 0) {
|
|
67
|
+
codeToEmbed = chunks[0].text;
|
|
68
|
+
warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const codeEmbed = await this.embedder(codeToEmbed, {
|
|
73
|
+
pooling: 'mean',
|
|
74
|
+
normalize: true,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
let codeVector;
|
|
78
|
+
try {
|
|
79
|
+
codeVector = new Float32Array(codeEmbed.data);
|
|
80
|
+
} finally {
|
|
81
|
+
if (typeof codeEmbed.dispose === 'function') {
|
|
82
|
+
try {
|
|
83
|
+
codeEmbed.dispose();
|
|
84
|
+
} catch {}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
let candidates = vectorStore;
|
|
89
|
+
let usedAnn = false;
|
|
90
|
+
if (this.config.annEnabled) {
|
|
91
|
+
const candidateCount = this.getAnnCandidateCount(safeMaxResults, vectorStore.length);
|
|
92
|
+
const annLabels = await this.cache.queryAnn(codeVector, candidateCount);
|
|
93
|
+
if (annLabels && annLabels.length >= safeMaxResults) {
|
|
94
|
+
usedAnn = true;
|
|
95
|
+
const seen = new Set();
|
|
96
|
+
candidates = annLabels
|
|
97
|
+
.map((index) => {
|
|
98
|
+
if (seen.has(index)) return null;
|
|
99
|
+
seen.add(index);
|
|
100
|
+
return vectorStore[index];
|
|
101
|
+
})
|
|
102
|
+
.filter(Boolean);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
|
|
107
|
+
const normalizedInput = normalizeText(codeToEmbed);
|
|
108
|
+
|
|
109
|
+
const scoreAndFilter = async (chunks) => {
|
|
110
|
+
const BATCH_SIZE = 500;
|
|
111
|
+
const scored = [];
|
|
112
|
+
|
|
113
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
114
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
115
|
+
|
|
116
|
+
if (i > 0) {
|
|
117
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
for (const chunk of batch) {
|
|
121
|
+
const vector = this.getChunkVector(chunk);
|
|
122
|
+
if (!vector) continue;
|
|
123
|
+
let similarity;
|
|
124
|
+
try {
|
|
125
|
+
similarity = dotSimilarity(codeVector, vector);
|
|
126
|
+
} catch (err) {
|
|
127
|
+
if (!warningMessage) {
|
|
128
|
+
warningMessage = err?.message || 'Vector dimension mismatch.';
|
|
129
|
+
}
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (similarity >= safeMinSimilarity) {
|
|
134
|
+
scored.push({ ...chunk, similarity });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return scored.sort((a, b) => b.similarity - a.similarity);
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
let filteredResults = await scoreAndFilter(candidates);
|
|
143
|
+
|
|
144
|
+
const MAX_FULL_SCAN_SIZE = 5000;
|
|
145
|
+
if (usedAnn && filteredResults.length < safeMaxResults) {
|
|
146
|
+
if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
|
|
147
|
+
filteredResults = await scoreAndFilter(vectorStore);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const results = [];
|
|
151
|
+
for (const chunk of filteredResults) {
|
|
152
|
+
const content = chunk.content ?? (await this.getChunkContent(chunk));
|
|
153
|
+
if (normalizedInput) {
|
|
154
|
+
const normalizedChunk = normalizeText(content);
|
|
155
|
+
if (normalizedChunk === normalizedInput) continue;
|
|
156
|
+
}
|
|
157
|
+
results.push({ ...chunk, content });
|
|
158
|
+
if (results.length >= safeMaxResults) break;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
results,
|
|
163
|
+
message:
|
|
164
|
+
warningMessage ||
|
|
165
|
+
(results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
|
|
166
|
+
};
|
|
167
|
+
} finally {
|
|
168
|
+
if (typeof this.cache.endRead === 'function') {
|
|
169
|
+
this.cache.endRead();
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async formatResults(results) {
|
|
175
|
+
if (results.length === 0) {
|
|
176
|
+
return 'No similar code patterns found in the codebase.';
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const formatted = await Promise.all(
|
|
180
|
+
results.map(async (r, idx) => {
|
|
181
|
+
const relPath = path.relative(this.config.searchDirectory, r.file);
|
|
182
|
+
const content = r.content ?? (await this.getChunkContent(r));
|
|
183
|
+
return (
|
|
184
|
+
`## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
|
|
185
|
+
`**File:** \`${relPath}\`\n` +
|
|
186
|
+
`**Lines:** ${r.startLine}-${r.endLine}\n\n` +
|
|
187
|
+
'```' +
|
|
188
|
+
path.extname(r.file).slice(1) +
|
|
189
|
+
'\n' +
|
|
190
|
+
content +
|
|
191
|
+
'\n' +
|
|
192
|
+
'```\n'
|
|
193
|
+
);
|
|
194
|
+
})
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
return formatted.join('\n');
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export function getToolDefinition(_config) {
|
|
202
|
+
return {
|
|
203
|
+
name: 'd_find_similar_code',
|
|
204
|
+
description:
|
|
205
|
+
'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
|
|
206
|
+
inputSchema: {
|
|
207
|
+
type: 'object',
|
|
208
|
+
properties: {
|
|
209
|
+
code: {
|
|
210
|
+
type: 'string',
|
|
211
|
+
description: 'The code snippet to find similar patterns for',
|
|
212
|
+
},
|
|
213
|
+
maxResults: {
|
|
214
|
+
type: 'number',
|
|
215
|
+
description: 'Maximum number of similar code chunks to return (default: 5)',
|
|
216
|
+
default: 5,
|
|
217
|
+
},
|
|
218
|
+
minSimilarity: {
|
|
219
|
+
type: 'number',
|
|
220
|
+
description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
|
|
221
|
+
default: 0.3,
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
required: ['code'],
|
|
225
|
+
},
|
|
226
|
+
annotations: {
|
|
227
|
+
title: 'Find Similar Code',
|
|
228
|
+
readOnlyHint: true,
|
|
229
|
+
destructiveHint: false,
|
|
230
|
+
idempotentHint: true,
|
|
231
|
+
openWorldHint: false,
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export async function handleToolCall(request, findSimilarCode) {
|
|
237
|
+
const args = request.params?.arguments || {};
|
|
238
|
+
const code = args.code;
|
|
239
|
+
if (typeof code !== 'string' || code.trim().length === 0) {
|
|
240
|
+
return {
|
|
241
|
+
content: [{ type: 'text', text: 'Error: A non-empty code string is required.' }],
|
|
242
|
+
isError: true,
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
const maxResults = typeof args.maxResults === 'number' ? args.maxResults : 5;
|
|
246
|
+
const minSimilarity = typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
|
|
247
|
+
|
|
248
|
+
const { results, message } = await findSimilarCode.execute({
|
|
249
|
+
code,
|
|
250
|
+
maxResults,
|
|
251
|
+
minSimilarity,
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
if (message) {
|
|
255
|
+
return {
|
|
256
|
+
content: [{ type: 'text', text: message }],
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const formattedText = await findSimilarCode.formatResults(results);
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
content: [{ type: 'text', text: formattedText }],
|
|
264
|
+
};
|
|
265
|
+
}
|