@softerist/heuristic-mcp 3.2.2 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +387 -376
  2. package/config.jsonc +800 -800
  3. package/features/ann-config.js +102 -110
  4. package/features/clear-cache.js +81 -84
  5. package/features/find-similar-code.js +265 -286
  6. package/features/hybrid-search.js +487 -536
  7. package/features/index-codebase.js +3139 -3270
  8. package/features/lifecycle.js +1041 -1063
  9. package/features/package-version.js +277 -291
  10. package/features/register.js +351 -370
  11. package/features/resources.js +115 -130
  12. package/features/set-workspace.js +214 -240
  13. package/index.js +742 -762
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +465 -519
  16. package/lib/cache.js +1699 -1767
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +232 -226
  19. package/lib/config.js +1483 -1495
  20. package/lib/constants.js +511 -492
  21. package/lib/embed-query-process.js +206 -212
  22. package/lib/embedding-process.js +434 -451
  23. package/lib/embedding-worker.js +862 -934
  24. package/lib/ignore-patterns.js +276 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +302 -310
  27. package/lib/logging.js +116 -127
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +188 -193
  30. package/lib/path-utils.js +18 -23
  31. package/lib/project-detector.js +82 -84
  32. package/lib/server-lifecycle.js +133 -145
  33. package/lib/settings-editor.js +738 -739
  34. package/lib/slice-normalize.js +25 -31
  35. package/lib/tokenizer.js +168 -203
  36. package/lib/utils.js +364 -409
  37. package/lib/vector-store-binary.js +811 -591
  38. package/lib/vector-store-sqlite.js +377 -414
  39. package/lib/workspace-env.js +32 -34
  40. package/mcp_config.json +9 -9
  41. package/package.json +86 -86
  42. package/scripts/clear-cache.js +20 -20
  43. package/scripts/download-model.js +43 -43
  44. package/scripts/mcp-launcher.js +49 -49
  45. package/scripts/postinstall.js +12 -12
  46. package/search-configs.js +36 -36
@@ -1,286 +1,265 @@
1
- import path from 'path';
2
- import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
3
-
4
-
5
- export class FindSimilarCode {
6
- constructor(embedder, cache, config) {
7
- this.embedder = embedder;
8
- this.cache = cache;
9
- this.config = config;
10
- }
11
-
12
- async getChunkContent(chunk) {
13
- return this.cache.getChunkContent(chunk);
14
- }
15
-
16
- getChunkVector(chunk) {
17
- return this.cache.getChunkVector(chunk);
18
- }
19
-
20
- getAnnCandidateCount(maxResults, totalChunks) {
21
- const minCandidates = this.config.annMinCandidates ?? 0;
22
- const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
23
- const multiplier = this.config.annCandidateMultiplier ?? 1;
24
- const desired = Math.max(minCandidates, Math.ceil(maxResults * multiplier));
25
- const capped = Math.min(maxCandidates, desired);
26
- return Math.min(totalChunks, Math.max(maxResults, capped));
27
- }
28
-
29
- async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
30
- if (typeof code !== 'string' || code.trim().length === 0) {
31
- return {
32
- results: [],
33
- message: 'Error: A non-empty code string is required.',
34
- };
35
- }
36
- const safeMaxResults =
37
- Number.isFinite(maxResults) && maxResults > 0 ? Math.floor(maxResults) : 5;
38
- const safeMinSimilarity = Number.isFinite(minSimilarity)
39
- ? Math.min(1, Math.max(0, minSimilarity))
40
- : 0.3;
41
-
42
- if (typeof this.cache.ensureLoaded === 'function') {
43
- await this.cache.ensureLoaded();
44
- }
45
- if (typeof this.cache.startRead === 'function') {
46
- this.cache.startRead();
47
- }
48
-
49
- try {
50
- const vectorStore = this.cache.getVectorStore();
51
-
52
- if (vectorStore.length === 0) {
53
- return {
54
- results: [],
55
- message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
56
- };
57
- }
58
-
59
- let codeToEmbed = code;
60
- let warningMessage = null;
61
-
62
-
63
- const estimatedTokens = estimateTokens(code);
64
- const limit = getModelTokenLimit(this.config.embeddingModel);
65
-
66
-
67
- if (estimatedTokens > limit) {
68
-
69
-
70
-
71
- const chunks = smartChunk(code, 'input.txt', this.config);
72
- if (chunks.length > 0) {
73
- codeToEmbed = chunks[0].text;
74
- warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
75
- }
76
- }
77
-
78
-
79
- const codeEmbed = await this.embedder(codeToEmbed, {
80
- pooling: 'mean',
81
- normalize: true,
82
- });
83
-
84
-
85
-
86
- let codeVector;
87
- try {
88
- codeVector = new Float32Array(codeEmbed.data);
89
- } finally {
90
- if (typeof codeEmbed.dispose === 'function') {
91
- try {
92
- codeEmbed.dispose();
93
- } catch {
94
-
95
- }
96
- }
97
- }
98
-
99
- let candidates = vectorStore;
100
- let usedAnn = false;
101
- if (this.config.annEnabled) {
102
- const candidateCount = this.getAnnCandidateCount(safeMaxResults, vectorStore.length);
103
- const annLabels = await this.cache.queryAnn(codeVector, candidateCount);
104
- if (annLabels && annLabels.length >= safeMaxResults) {
105
- usedAnn = true;
106
- const seen = new Set();
107
- candidates = annLabels
108
- .map((index) => {
109
- if (seen.has(index)) return null;
110
- seen.add(index);
111
- return vectorStore[index];
112
- })
113
- .filter(Boolean);
114
- }
115
- }
116
-
117
- const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
118
- const normalizedInput = normalizeText(codeToEmbed);
119
-
120
-
121
- const scoreAndFilter = async (chunks) => {
122
- const BATCH_SIZE = 500;
123
- const scored = [];
124
-
125
- for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
126
- const batch = chunks.slice(i, i + BATCH_SIZE);
127
-
128
-
129
- if (i > 0) {
130
- await new Promise((resolve) => setTimeout(resolve, 0));
131
- }
132
-
133
- for (const chunk of batch) {
134
- const vector = this.getChunkVector(chunk);
135
- if (!vector) continue;
136
- let similarity;
137
- try {
138
- similarity = dotSimilarity(codeVector, vector);
139
- } catch (err) {
140
- if (!warningMessage) {
141
- warningMessage = err?.message || 'Vector dimension mismatch.';
142
- }
143
- continue;
144
- }
145
-
146
- if (similarity >= safeMinSimilarity) {
147
- scored.push({ ...chunk, similarity });
148
- }
149
- }
150
- }
151
-
152
- return scored.sort((a, b) => b.similarity - a.similarity);
153
- };
154
-
155
- let filteredResults = await scoreAndFilter(candidates);
156
-
157
-
158
-
159
- const MAX_FULL_SCAN_SIZE = 5000;
160
- if (usedAnn && filteredResults.length < safeMaxResults) {
161
- if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
162
- filteredResults = await scoreAndFilter(vectorStore);
163
- } else {
164
-
165
- }
166
- }
167
- const results = [];
168
- for (const chunk of filteredResults) {
169
- const content = chunk.content ?? (await this.getChunkContent(chunk));
170
- if (normalizedInput) {
171
- const normalizedChunk = normalizeText(content);
172
- if (normalizedChunk === normalizedInput) continue;
173
- }
174
- results.push({ ...chunk, content });
175
- if (results.length >= safeMaxResults) break;
176
- }
177
-
178
- return {
179
- results,
180
- message:
181
- warningMessage ||
182
- (results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
183
- };
184
- } finally {
185
- if (typeof this.cache.endRead === 'function') {
186
- this.cache.endRead();
187
- }
188
- }
189
- }
190
-
191
- async formatResults(results) {
192
- if (results.length === 0) {
193
- return 'No similar code patterns found in the codebase.';
194
- }
195
-
196
- const formatted = await Promise.all(
197
- results.map(async (r, idx) => {
198
- const relPath = path.relative(this.config.searchDirectory, r.file);
199
- const content = r.content ?? (await this.getChunkContent(r));
200
- return (
201
- `## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
202
- `**File:** \`${relPath}\`\n` +
203
- `**Lines:** ${r.startLine}-${r.endLine}\n\n` +
204
- '```' +
205
- path.extname(r.file).slice(1) +
206
- '\n' +
207
- content +
208
- '\n' +
209
- '```\n'
210
- );
211
- })
212
- );
213
-
214
- return formatted.join('\n');
215
- }
216
- }
217
-
218
-
219
- export function getToolDefinition(_config) {
220
- return {
221
- name: 'd_find_similar_code',
222
- description:
223
- 'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
224
- inputSchema: {
225
- type: 'object',
226
- properties: {
227
- code: {
228
- type: 'string',
229
- description: 'The code snippet to find similar patterns for',
230
- },
231
- maxResults: {
232
- type: 'number',
233
- description: 'Maximum number of similar code chunks to return (default: 5)',
234
- default: 5,
235
- },
236
- minSimilarity: {
237
- type: 'number',
238
- description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
239
- default: 0.3,
240
- },
241
- },
242
- required: ['code'],
243
- },
244
- annotations: {
245
- title: 'Find Similar Code',
246
- readOnlyHint: true,
247
- destructiveHint: false,
248
- idempotentHint: true,
249
- openWorldHint: false,
250
- },
251
- };
252
- }
253
-
254
-
255
- export async function handleToolCall(request, findSimilarCode) {
256
- const args = request.params?.arguments || {};
257
- const code = args.code;
258
- if (typeof code !== 'string' || code.trim().length === 0) {
259
- return {
260
- content: [{ type: 'text', text: 'Error: A non-empty code string is required.' }],
261
- isError: true,
262
- };
263
- }
264
- const maxResults =
265
- typeof args.maxResults === 'number' ? args.maxResults : 5;
266
- const minSimilarity =
267
- typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
268
-
269
- const { results, message } = await findSimilarCode.execute({
270
- code,
271
- maxResults,
272
- minSimilarity,
273
- });
274
-
275
- if (message) {
276
- return {
277
- content: [{ type: 'text', text: message }],
278
- };
279
- }
280
-
281
- const formattedText = await findSimilarCode.formatResults(results);
282
-
283
- return {
284
- content: [{ type: 'text', text: formattedText }],
285
- };
286
- }
1
+ import path from 'path';
2
+ import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
3
+
4
+ export class FindSimilarCode {
5
+ constructor(embedder, cache, config) {
6
+ this.embedder = embedder;
7
+ this.cache = cache;
8
+ this.config = config;
9
+ }
10
+
11
+ async getChunkContent(chunk) {
12
+ return this.cache.getChunkContent(chunk);
13
+ }
14
+
15
+ getChunkVector(chunk) {
16
+ return this.cache.getChunkVector(chunk);
17
+ }
18
+
19
+ getAnnCandidateCount(maxResults, totalChunks) {
20
+ const minCandidates = this.config.annMinCandidates ?? 0;
21
+ const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
22
+ const multiplier = this.config.annCandidateMultiplier ?? 1;
23
+ const desired = Math.max(minCandidates, Math.ceil(maxResults * multiplier));
24
+ const capped = Math.min(maxCandidates, desired);
25
+ return Math.min(totalChunks, Math.max(maxResults, capped));
26
+ }
27
+
28
+ async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
29
+ if (typeof code !== 'string' || code.trim().length === 0) {
30
+ return {
31
+ results: [],
32
+ message: 'Error: A non-empty code string is required.',
33
+ };
34
+ }
35
+ const safeMaxResults =
36
+ Number.isFinite(maxResults) && maxResults > 0 ? Math.floor(maxResults) : 5;
37
+ const safeMinSimilarity = Number.isFinite(minSimilarity)
38
+ ? Math.min(1, Math.max(0, minSimilarity))
39
+ : 0.3;
40
+
41
+ if (typeof this.cache.ensureLoaded === 'function') {
42
+ await this.cache.ensureLoaded();
43
+ }
44
+ if (typeof this.cache.startRead === 'function') {
45
+ this.cache.startRead();
46
+ }
47
+
48
+ try {
49
+ const vectorStore = this.cache.getVectorStore();
50
+
51
+ if (vectorStore.length === 0) {
52
+ return {
53
+ results: [],
54
+ message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
55
+ };
56
+ }
57
+
58
+ let codeToEmbed = code;
59
+ let warningMessage = null;
60
+
61
+ const estimatedTokens = estimateTokens(code);
62
+ const limit = getModelTokenLimit(this.config.embeddingModel);
63
+
64
+ if (estimatedTokens > limit) {
65
+ const chunks = smartChunk(code, 'input.txt', this.config);
66
+ if (chunks.length > 0) {
67
+ codeToEmbed = chunks[0].text;
68
+ warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
69
+ }
70
+ }
71
+
72
+ const codeEmbed = await this.embedder(codeToEmbed, {
73
+ pooling: 'mean',
74
+ normalize: true,
75
+ });
76
+
77
+ let codeVector;
78
+ try {
79
+ codeVector = new Float32Array(codeEmbed.data);
80
+ } finally {
81
+ if (typeof codeEmbed.dispose === 'function') {
82
+ try {
83
+ codeEmbed.dispose();
84
+ } catch {}
85
+ }
86
+ }
87
+
88
+ let candidates = vectorStore;
89
+ let usedAnn = false;
90
+ if (this.config.annEnabled) {
91
+ const candidateCount = this.getAnnCandidateCount(safeMaxResults, vectorStore.length);
92
+ const annLabels = await this.cache.queryAnn(codeVector, candidateCount);
93
+ if (annLabels && annLabels.length >= safeMaxResults) {
94
+ usedAnn = true;
95
+ const seen = new Set();
96
+ candidates = annLabels
97
+ .map((index) => {
98
+ if (seen.has(index)) return null;
99
+ seen.add(index);
100
+ return vectorStore[index];
101
+ })
102
+ .filter(Boolean);
103
+ }
104
+ }
105
+
106
+ const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
107
+ const normalizedInput = normalizeText(codeToEmbed);
108
+
109
+ const scoreAndFilter = async (chunks) => {
110
+ const BATCH_SIZE = 500;
111
+ const scored = [];
112
+
113
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
114
+ const batch = chunks.slice(i, i + BATCH_SIZE);
115
+
116
+ if (i > 0) {
117
+ await new Promise((resolve) => setTimeout(resolve, 0));
118
+ }
119
+
120
+ for (const chunk of batch) {
121
+ const vector = this.getChunkVector(chunk);
122
+ if (!vector) continue;
123
+ let similarity;
124
+ try {
125
+ similarity = dotSimilarity(codeVector, vector);
126
+ } catch (err) {
127
+ if (!warningMessage) {
128
+ warningMessage = err?.message || 'Vector dimension mismatch.';
129
+ }
130
+ continue;
131
+ }
132
+
133
+ if (similarity >= safeMinSimilarity) {
134
+ scored.push({ ...chunk, similarity });
135
+ }
136
+ }
137
+ }
138
+
139
+ return scored.sort((a, b) => b.similarity - a.similarity);
140
+ };
141
+
142
+ let filteredResults = await scoreAndFilter(candidates);
143
+
144
+ const MAX_FULL_SCAN_SIZE = 5000;
145
+ if (usedAnn && filteredResults.length < safeMaxResults) {
146
+ if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
147
+ filteredResults = await scoreAndFilter(vectorStore);
148
+ }
149
+ }
150
+ const results = [];
151
+ for (const chunk of filteredResults) {
152
+ const content = chunk.content ?? (await this.getChunkContent(chunk));
153
+ if (normalizedInput) {
154
+ const normalizedChunk = normalizeText(content);
155
+ if (normalizedChunk === normalizedInput) continue;
156
+ }
157
+ results.push({ ...chunk, content });
158
+ if (results.length >= safeMaxResults) break;
159
+ }
160
+
161
+ return {
162
+ results,
163
+ message:
164
+ warningMessage ||
165
+ (results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
166
+ };
167
+ } finally {
168
+ if (typeof this.cache.endRead === 'function') {
169
+ this.cache.endRead();
170
+ }
171
+ }
172
+ }
173
+
174
+ async formatResults(results) {
175
+ if (results.length === 0) {
176
+ return 'No similar code patterns found in the codebase.';
177
+ }
178
+
179
+ const formatted = await Promise.all(
180
+ results.map(async (r, idx) => {
181
+ const relPath = path.relative(this.config.searchDirectory, r.file);
182
+ const content = r.content ?? (await this.getChunkContent(r));
183
+ return (
184
+ `## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
185
+ `**File:** \`${relPath}\`\n` +
186
+ `**Lines:** ${r.startLine}-${r.endLine}\n\n` +
187
+ '```' +
188
+ path.extname(r.file).slice(1) +
189
+ '\n' +
190
+ content +
191
+ '\n' +
192
+ '```\n'
193
+ );
194
+ })
195
+ );
196
+
197
+ return formatted.join('\n');
198
+ }
199
+ }
200
+
201
+ export function getToolDefinition(_config) {
202
+ return {
203
+ name: 'd_find_similar_code',
204
+ description:
205
+ 'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
206
+ inputSchema: {
207
+ type: 'object',
208
+ properties: {
209
+ code: {
210
+ type: 'string',
211
+ description: 'The code snippet to find similar patterns for',
212
+ },
213
+ maxResults: {
214
+ type: 'number',
215
+ description: 'Maximum number of similar code chunks to return (default: 5)',
216
+ default: 5,
217
+ },
218
+ minSimilarity: {
219
+ type: 'number',
220
+ description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
221
+ default: 0.3,
222
+ },
223
+ },
224
+ required: ['code'],
225
+ },
226
+ annotations: {
227
+ title: 'Find Similar Code',
228
+ readOnlyHint: true,
229
+ destructiveHint: false,
230
+ idempotentHint: true,
231
+ openWorldHint: false,
232
+ },
233
+ };
234
+ }
235
+
236
+ export async function handleToolCall(request, findSimilarCode) {
237
+ const args = request.params?.arguments || {};
238
+ const code = args.code;
239
+ if (typeof code !== 'string' || code.trim().length === 0) {
240
+ return {
241
+ content: [{ type: 'text', text: 'Error: A non-empty code string is required.' }],
242
+ isError: true,
243
+ };
244
+ }
245
+ const maxResults = typeof args.maxResults === 'number' ? args.maxResults : 5;
246
+ const minSimilarity = typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
247
+
248
+ const { results, message } = await findSimilarCode.execute({
249
+ code,
250
+ maxResults,
251
+ minSimilarity,
252
+ });
253
+
254
+ if (message) {
255
+ return {
256
+ content: [{ type: 'text', text: message }],
257
+ };
258
+ }
259
+
260
+ const formattedText = await findSimilarCode.formatResults(results);
261
+
262
+ return {
263
+ content: [{ type: 'text', text: formattedText }],
264
+ };
265
+ }