@softerist/heuristic-mcp 3.0.15 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -104
  2. package/config.jsonc +173 -173
  3. package/features/ann-config.js +131 -0
  4. package/features/clear-cache.js +84 -0
  5. package/features/find-similar-code.js +291 -0
  6. package/features/hybrid-search.js +544 -0
  7. package/features/index-codebase.js +3268 -0
  8. package/features/lifecycle.js +1189 -0
  9. package/features/package-version.js +302 -0
  10. package/features/register.js +408 -0
  11. package/features/resources.js +156 -0
  12. package/features/set-workspace.js +265 -0
  13. package/index.js +96 -96
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +565 -565
  16. package/lib/cache.js +1870 -1870
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +1 -1
  19. package/lib/config.js +517 -517
  20. package/lib/constants.js +39 -39
  21. package/lib/embed-query-process.js +7 -7
  22. package/lib/embedding-process.js +7 -7
  23. package/lib/embedding-worker.js +299 -299
  24. package/lib/ignore-patterns.js +316 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +337 -337
  27. package/lib/logging.js +164 -164
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +193 -193
  30. package/lib/project-detector.js +84 -84
  31. package/lib/server-lifecycle.js +165 -165
  32. package/lib/settings-editor.js +754 -754
  33. package/lib/tokenizer.js +256 -256
  34. package/lib/utils.js +428 -428
  35. package/lib/vector-store-binary.js +627 -627
  36. package/lib/vector-store-sqlite.js +95 -95
  37. package/lib/workspace-env.js +28 -28
  38. package/mcp_config.json +9 -9
  39. package/package.json +86 -75
  40. package/scripts/clear-cache.js +20 -0
  41. package/scripts/download-model.js +43 -0
  42. package/scripts/mcp-launcher.js +49 -0
  43. package/scripts/postinstall.js +12 -0
  44. package/search-configs.js +36 -36
  45. package/.prettierrc +0 -7
  46. package/debug-pids.js +0 -30
  47. package/eslint.config.js +0 -36
  48. package/specs/plan.md +0 -23
  49. package/vitest.config.js +0 -39
@@ -0,0 +1,131 @@
1
+ /**
2
+ * ANN Config Tool - Runtime tuning of ANN search parameters
3
+ *
4
+ * Allows adjusting efSearch on the fly for speed/accuracy tradeoff,
5
+ * and querying current ANN index statistics.
6
+ */
7
+
8
+ export class AnnConfigTool {
9
+ constructor(cache, config) {
10
+ this.cache = cache;
11
+ this.config = config;
12
+ }
13
+
14
+ /**
15
+ * Adjust efSearch and optionally trigger index rebuild
16
+ */
17
+ async execute(args) {
18
+ const action = args.action || 'stats';
19
+
20
+ if (action === 'stats') {
21
+ return this.cache.getAnnStats();
22
+ }
23
+
24
+ if (action === 'set_ef_search') {
25
+ const efSearch = args.efSearch;
26
+ if (efSearch === undefined) {
27
+ return {
28
+ success: false,
29
+ error: 'efSearch parameter is required for set_ef_search action',
30
+ };
31
+ }
32
+ return this.cache.setEfSearch(efSearch);
33
+ }
34
+
35
+ if (action === 'rebuild') {
36
+ // Force invalidate and rebuild the ANN index
37
+ this.cache.invalidateAnnIndex();
38
+ const index = await this.cache.ensureAnnIndex();
39
+ return {
40
+ success: index !== null,
41
+ message: index
42
+ ? 'ANN index rebuilt successfully'
43
+ : 'ANN index rebuild failed or not available',
44
+ };
45
+ }
46
+
47
+ return {
48
+ success: false,
49
+ error: `Unknown action: ${action}. Valid actions: stats, set_ef_search, rebuild`,
50
+ };
51
+ }
52
+
53
+ formatResults(result) {
54
+ if (result.success === false) {
55
+ return `Error: ${result.error}`;
56
+ }
57
+
58
+ if (result.enabled !== undefined) {
59
+ // Stats response
60
+ let output = '## ANN Index Statistics\n\n';
61
+ output += `- **Enabled**: ${result.enabled}\n`;
62
+ output += `- **Index Loaded**: ${result.indexLoaded}\n`;
63
+ output += `- **Dirty (needs rebuild)**: ${result.dirty}\n`;
64
+ output += `- **Vector Count**: ${result.vectorCount}\n`;
65
+ output += `- **Min Chunks for ANN**: ${result.minChunksForAnn}\n`;
66
+
67
+ if (result.config) {
68
+ output += '\n### Current Config\n\n';
69
+ output += `- **Metric**: ${result.config.metric}\n`;
70
+ output += `- **Dimensions**: ${result.config.dim}\n`;
71
+ output += `- **Indexed Vectors**: ${result.config.count}\n`;
72
+ output += `- **M (connectivity)**: ${result.config.m}\n`;
73
+ output += `- **efConstruction**: ${result.config.efConstruction}\n`;
74
+ output += `- **efSearch**: ${result.config.efSearch}\n`;
75
+ } else {
76
+ output += '\n*No active ANN index.*\n';
77
+ }
78
+
79
+ return output;
80
+ }
81
+
82
+ // Other responses (set_ef_search, rebuild)
83
+ return JSON.stringify(result, null, 2);
84
+ }
85
+ }
86
+
87
+ // MCP Tool definition
88
+ export function getToolDefinition() {
89
+ return {
90
+ name: 'd_ann_config',
91
+ description:
92
+ "Configure and monitor the ANN (Approximate Nearest Neighbor) search index. Actions: 'stats' (view current config), 'set_ef_search' (tune search accuracy/speed), 'rebuild' (force index rebuild).",
93
+ inputSchema: {
94
+ type: 'object',
95
+ properties: {
96
+ action: {
97
+ type: 'string',
98
+ enum: ['stats', 'set_ef_search', 'rebuild'],
99
+ description:
100
+ "Action to perform. 'stats' shows current config, 'set_ef_search' changes the search parameter, 'rebuild' forces index rebuild.",
101
+ default: 'stats',
102
+ },
103
+ efSearch: {
104
+ type: 'number',
105
+ description:
106
+ 'New efSearch value (only for set_ef_search action). Higher = more accurate but slower. Typical range: 16-512.',
107
+ minimum: 1,
108
+ maximum: 1000,
109
+ },
110
+ },
111
+ },
112
+ annotations: {
113
+ title: 'ANN Index Configuration',
114
+ readOnlyHint: false,
115
+ destructiveHint: false,
116
+ idempotentHint: true,
117
+ openWorldHint: false,
118
+ },
119
+ };
120
+ }
121
+
122
+ // Tool handler
123
+ export async function handleToolCall(request, annConfigTool) {
124
+ const args = request.params.arguments || {};
125
+ const result = await annConfigTool.execute(args);
126
+ const formattedText = annConfigTool.formatResults(result);
127
+
128
+ return {
129
+ content: [{ type: 'text', text: formattedText }],
130
+ };
131
+ }
@@ -0,0 +1,84 @@
1
+ export class CacheClearer {
2
+ constructor(embedder, cache, config, indexer) {
3
+ this.cache = cache;
4
+ this.config = config;
5
+ this.indexer = indexer;
6
+ this.isClearing = false;
7
+ }
8
+
9
+ async execute() {
10
+ // Check if indexing is in progress
11
+ if (this.indexer && this.indexer.isIndexing) {
12
+ throw new Error(
13
+ 'Cannot clear cache while indexing is in progress. Please wait for indexing to complete.'
14
+ );
15
+ }
16
+
17
+ // Check if cache is currently being saved (race condition prevention)
18
+ if (this.cache.isSaving) {
19
+ throw new Error(
20
+ 'Cannot clear cache while cache is being saved. Please try again in a moment.'
21
+ );
22
+ }
23
+
24
+ // Check if a clear operation is already in progress (prevent concurrent clears)
25
+ if (this.isClearing) {
26
+ throw new Error('Cache clear operation already in progress. Please wait for it to complete.');
27
+ }
28
+
29
+ this.isClearing = true;
30
+
31
+ try {
32
+ await this.cache.clear();
33
+ return {
34
+ success: true,
35
+ message: `Cache cleared successfully. Next indexing will be a full rebuild.`,
36
+ cacheDirectory: this.config.cacheDirectory,
37
+ };
38
+ } finally {
39
+ this.isClearing = false;
40
+ }
41
+ }
42
+ }
43
+
44
+ export function getToolDefinition() {
45
+ return {
46
+ name: 'c_clear_cache',
47
+ description:
48
+ 'Clears the embeddings cache, forcing a complete reindex on next search or manual index operation. Useful when encountering cache corruption or after major codebase changes.',
49
+ inputSchema: {
50
+ type: 'object',
51
+ properties: {},
52
+ },
53
+ annotations: {
54
+ title: 'Clear Embeddings Cache',
55
+ readOnlyHint: false,
56
+ destructiveHint: true,
57
+ idempotentHint: true,
58
+ openWorldHint: false,
59
+ },
60
+ };
61
+ }
62
+
63
+ export async function handleToolCall(request, cacheClearer) {
64
+ try {
65
+ const result = await cacheClearer.execute();
66
+ return {
67
+ content: [
68
+ {
69
+ type: 'text',
70
+ text: `${result.message}\n\nCache directory: ${result.cacheDirectory}`,
71
+ },
72
+ ],
73
+ };
74
+ } catch (error) {
75
+ return {
76
+ content: [
77
+ {
78
+ type: 'text',
79
+ text: `Failed to clear cache: ${error.message}`,
80
+ },
81
+ ],
82
+ };
83
+ }
84
+ }
@@ -0,0 +1,291 @@
1
+ import path from 'path';
2
+ import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
3
+
4
+ /**
5
+ * FindSimilarCode feature
6
+ * Given a code snippet, finds similar patterns elsewhere in the codebase
7
+ */
8
+ export class FindSimilarCode {
9
+ constructor(embedder, cache, config) {
10
+ this.embedder = embedder;
11
+ this.cache = cache;
12
+ this.config = config;
13
+ }
14
+
15
+ async getChunkContent(chunk) {
16
+ return this.cache.getChunkContent(chunk);
17
+ }
18
+
19
+ getChunkVector(chunk) {
20
+ return this.cache.getChunkVector(chunk);
21
+ }
22
+
23
+ getAnnCandidateCount(maxResults, totalChunks) {
24
+ const minCandidates = this.config.annMinCandidates ?? 0;
25
+ const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
26
+ const multiplier = this.config.annCandidateMultiplier ?? 1;
27
+ const desired = Math.max(minCandidates, Math.ceil(maxResults * multiplier));
28
+ const capped = Math.min(maxCandidates, desired);
29
+ return Math.min(totalChunks, Math.max(maxResults, capped));
30
+ }
31
+
32
+ async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
33
+ if (typeof code !== 'string' || code.trim().length === 0) {
34
+ return {
35
+ results: [],
36
+ message: 'Error: A non-empty code string is required.',
37
+ };
38
+ }
39
+ const safeMaxResults =
40
+ Number.isFinite(maxResults) && maxResults > 0 ? Math.floor(maxResults) : 5;
41
+ const safeMinSimilarity = Number.isFinite(minSimilarity)
42
+ ? Math.min(1, Math.max(0, minSimilarity))
43
+ : 0.3;
44
+
45
+ if (typeof this.cache.ensureLoaded === 'function') {
46
+ await this.cache.ensureLoaded();
47
+ }
48
+ if (typeof this.cache.startRead === 'function') {
49
+ this.cache.startRead();
50
+ }
51
+
52
+ try {
53
+ const vectorStore = this.cache.getVectorStore();
54
+
55
+ if (vectorStore.length === 0) {
56
+ return {
57
+ results: [],
58
+ message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
59
+ };
60
+ }
61
+
62
+ let codeToEmbed = code;
63
+ let warningMessage = null;
64
+
65
+ // Check if input is too large and truncate intelligently
66
+ const estimatedTokens = estimateTokens(code);
67
+ const limit = getModelTokenLimit(this.config.embeddingModel);
68
+
69
+ // If input is significantly larger than the model limit, we should chunk it
70
+ if (estimatedTokens > limit) {
71
+ // Use smartChunk to get a semantically valid first block
72
+ // We pass a dummy file name to trigger language detection if possible, or default to .txt
73
+ // Since we don't know the language, we'll try to guess or just use generic chunking
74
+ const chunks = smartChunk(code, 'input.txt', this.config);
75
+ if (chunks.length > 0) {
76
+ codeToEmbed = chunks[0].text;
77
+ warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
78
+ }
79
+ }
80
+
81
+ // Generate embedding for the input code
82
+ const codeEmbed = await this.embedder(codeToEmbed, {
83
+ pooling: 'mean',
84
+ normalize: true,
85
+ });
86
+
87
+ // CRITICAL: Deep copy Float32Array to avoid detachment issues with WASM/Workers
88
+ // accessing a detached buffer from a reusable ONNX tensor can crash the process.
89
+ let codeVector;
90
+ try {
91
+ codeVector = new Float32Array(codeEmbed.data);
92
+ } finally {
93
+ if (typeof codeEmbed.dispose === 'function') {
94
+ try {
95
+ codeEmbed.dispose();
96
+ } catch {
97
+ /* ignore */
98
+ }
99
+ }
100
+ }
101
+
102
+ let candidates = vectorStore;
103
+ let usedAnn = false;
104
+ if (this.config.annEnabled) {
105
+ const candidateCount = this.getAnnCandidateCount(safeMaxResults, vectorStore.length);
106
+ const annLabels = await this.cache.queryAnn(codeVector, candidateCount);
107
+ if (annLabels && annLabels.length >= safeMaxResults) {
108
+ usedAnn = true;
109
+ const seen = new Set();
110
+ candidates = annLabels
111
+ .map((index) => {
112
+ if (seen.has(index)) return null;
113
+ seen.add(index);
114
+ return vectorStore[index];
115
+ })
116
+ .filter(Boolean);
117
+ }
118
+ }
119
+
120
+ const normalizeText = (text) => text.trim().replace(/\s+/g, ' ');
121
+ const normalizedInput = normalizeText(codeToEmbed);
122
+
123
+ /**
124
+ * Batch scoring function to prevent blocking the event loop
125
+ */
126
+ const scoreAndFilter = async (chunks) => {
127
+ const BATCH_SIZE = 500;
128
+ const scored = [];
129
+
130
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
131
+ const batch = chunks.slice(i, i + BATCH_SIZE);
132
+
133
+ // Yield to event loop between batches
134
+ if (i > 0) {
135
+ await new Promise((resolve) => setTimeout(resolve, 0));
136
+ }
137
+
138
+ for (const chunk of batch) {
139
+ const vector = this.getChunkVector(chunk);
140
+ if (!vector) continue;
141
+ let similarity;
142
+ try {
143
+ similarity = dotSimilarity(codeVector, vector);
144
+ } catch (err) {
145
+ if (!warningMessage) {
146
+ warningMessage = err?.message || 'Vector dimension mismatch.';
147
+ }
148
+ continue;
149
+ }
150
+
151
+ if (similarity >= safeMinSimilarity) {
152
+ scored.push({ ...chunk, similarity });
153
+ }
154
+ }
155
+ }
156
+
157
+ return scored.sort((a, b) => b.similarity - a.similarity);
158
+ };
159
+
160
+ let filteredResults = await scoreAndFilter(candidates);
161
+
162
+ // Fallback to full scan if ANN didn't provide enough results
163
+ // Optimization: Skip full scan on large codebases to avoid long pauses
164
+ const MAX_FULL_SCAN_SIZE = 5000;
165
+ if (usedAnn && filteredResults.length < safeMaxResults) {
166
+ if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
167
+ filteredResults = await scoreAndFilter(vectorStore);
168
+ } else {
169
+ // Just return what we found via ANN
170
+ }
171
+ }
172
+ const results = [];
173
+ for (const chunk of filteredResults) {
174
+ const content = chunk.content ?? (await this.getChunkContent(chunk));
175
+ if (normalizedInput) {
176
+ const normalizedChunk = normalizeText(content);
177
+ if (normalizedChunk === normalizedInput) continue;
178
+ }
179
+ results.push({ ...chunk, content });
180
+ if (results.length >= safeMaxResults) break;
181
+ }
182
+
183
+ return {
184
+ results,
185
+ message:
186
+ warningMessage ||
187
+ (results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
188
+ };
189
+ } finally {
190
+ if (typeof this.cache.endRead === 'function') {
191
+ this.cache.endRead();
192
+ }
193
+ }
194
+ }
195
+
196
+ async formatResults(results) {
197
+ if (results.length === 0) {
198
+ return 'No similar code patterns found in the codebase.';
199
+ }
200
+
201
+ const formatted = await Promise.all(
202
+ results.map(async (r, idx) => {
203
+ const relPath = path.relative(this.config.searchDirectory, r.file);
204
+ const content = r.content ?? (await this.getChunkContent(r));
205
+ return (
206
+ `## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
207
+ `**File:** \`${relPath}\`\n` +
208
+ `**Lines:** ${r.startLine}-${r.endLine}\n\n` +
209
+ '```' +
210
+ path.extname(r.file).slice(1) +
211
+ '\n' +
212
+ content +
213
+ '\n' +
214
+ '```\n'
215
+ );
216
+ })
217
+ );
218
+
219
+ return formatted.join('\n');
220
+ }
221
+ }
222
+
223
+ // MCP Tool definition
224
+ export function getToolDefinition(_config) {
225
+ return {
226
+ name: 'd_find_similar_code',
227
+ description:
228
+ 'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
229
+ inputSchema: {
230
+ type: 'object',
231
+ properties: {
232
+ code: {
233
+ type: 'string',
234
+ description: 'The code snippet to find similar patterns for',
235
+ },
236
+ maxResults: {
237
+ type: 'number',
238
+ description: 'Maximum number of similar code chunks to return (default: 5)',
239
+ default: 5,
240
+ },
241
+ minSimilarity: {
242
+ type: 'number',
243
+ description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
244
+ default: 0.3,
245
+ },
246
+ },
247
+ required: ['code'],
248
+ },
249
+ annotations: {
250
+ title: 'Find Similar Code',
251
+ readOnlyHint: true,
252
+ destructiveHint: false,
253
+ idempotentHint: true,
254
+ openWorldHint: false,
255
+ },
256
+ };
257
+ }
258
+
259
+ // Tool handler
260
+ export async function handleToolCall(request, findSimilarCode) {
261
+ const args = request.params?.arguments || {};
262
+ const code = args.code;
263
+ if (typeof code !== 'string' || code.trim().length === 0) {
264
+ return {
265
+ content: [{ type: 'text', text: 'Error: A non-empty code string is required.' }],
266
+ isError: true,
267
+ };
268
+ }
269
+ const maxResults =
270
+ typeof args.maxResults === 'number' ? args.maxResults : 5;
271
+ const minSimilarity =
272
+ typeof args.minSimilarity === 'number' ? args.minSimilarity : 0.3;
273
+
274
+ const { results, message } = await findSimilarCode.execute({
275
+ code,
276
+ maxResults,
277
+ minSimilarity,
278
+ });
279
+
280
+ if (message) {
281
+ return {
282
+ content: [{ type: 'text', text: message }],
283
+ };
284
+ }
285
+
286
+ const formattedText = await findSimilarCode.formatResults(results);
287
+
288
+ return {
289
+ content: [{ type: 'text', text: formattedText }],
290
+ };
291
+ }