@softerist/heuristic-mcp 2.1.46 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -76
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
package/debug-pids.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { exec } from 'child_process';
|
|
2
|
+
import util from 'util';
|
|
3
|
+
|
|
4
|
+
const execPromise = util.promisify(exec);
|
|
5
|
+
|
|
6
|
+
async function check() {
|
|
7
|
+
console.info('Checking processes...');
|
|
8
|
+
try {
|
|
9
|
+
const { stdout } = await execPromise(
|
|
10
|
+
`powershell -NoProfile -Command "Get-CimInstance Win32_Process | Where-Object { $_.Name -eq 'node.exe' } | Select-Object ProcessId, CommandLine, ParentProcessId | ConvertTo-Json"`
|
|
11
|
+
);
|
|
12
|
+
const processes = JSON.parse(stdout);
|
|
13
|
+
const list = Array.isArray(processes) ? processes : [processes];
|
|
14
|
+
|
|
15
|
+
for (const p of list) {
|
|
16
|
+
if (p.CommandLine && (p.CommandLine.includes('heuristic-mcp') || p.CommandLine.includes('index.js'))) {
|
|
17
|
+
console.info(`PID: ${p.ProcessId}, Parent: ${p.ParentProcessId}`);
|
|
18
|
+
console.info(`CMD: ${p.CommandLine}`);
|
|
19
|
+
console.info('---');
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
} catch (err) {
|
|
23
|
+
console.error('Error:', err.message);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
check();
|
package/eslint.config.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import js from '@eslint/js';
|
|
2
|
+
import globals from 'globals';
|
|
3
|
+
|
|
4
|
+
export default [
|
|
5
|
+
{
|
|
6
|
+
ignores: ['node_modules/**', 'coverage/**', '.vitest-coverage/**'],
|
|
7
|
+
},
|
|
8
|
+
js.configs.recommended,
|
|
9
|
+
{
|
|
10
|
+
files: ['**/*.js'],
|
|
11
|
+
languageOptions: {
|
|
12
|
+
ecmaVersion: 2022,
|
|
13
|
+
sourceType: 'module',
|
|
14
|
+
globals: {
|
|
15
|
+
...globals.node,
|
|
16
|
+
...globals.es2021,
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
rules: {
|
|
20
|
+
'no-unused-vars': ['warn', { argsIgnorePattern: '^_', caughtErrorsIgnorePattern: '^_' }],
|
|
21
|
+
'no-console': ['warn', { allow: ['warn', 'error', 'info'] }],
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
files: ['scripts/**/*.js', 'test/**/*.js'],
|
|
26
|
+
rules: {
|
|
27
|
+
'no-console': 'off',
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
files: ['test/**/*.js'],
|
|
32
|
+
rules: {
|
|
33
|
+
'no-unused-vars': 'off',
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
];
|
package/features/ann-config.js
CHANGED
|
@@ -15,31 +15,39 @@ export class AnnConfigTool {
|
|
|
15
15
|
* Adjust efSearch and optionally trigger index rebuild
|
|
16
16
|
*/
|
|
17
17
|
async execute(args) {
|
|
18
|
-
const action = args.action ||
|
|
18
|
+
const action = args.action || 'stats';
|
|
19
19
|
|
|
20
|
-
if (action ===
|
|
20
|
+
if (action === 'stats') {
|
|
21
21
|
return this.cache.getAnnStats();
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
if (action ===
|
|
24
|
+
if (action === 'set_ef_search') {
|
|
25
25
|
const efSearch = args.efSearch;
|
|
26
26
|
if (efSearch === undefined) {
|
|
27
|
-
return {
|
|
27
|
+
return {
|
|
28
|
+
success: false,
|
|
29
|
+
error: 'efSearch parameter is required for set_ef_search action',
|
|
30
|
+
};
|
|
28
31
|
}
|
|
29
32
|
return this.cache.setEfSearch(efSearch);
|
|
30
33
|
}
|
|
31
34
|
|
|
32
|
-
if (action ===
|
|
35
|
+
if (action === 'rebuild') {
|
|
33
36
|
// Force invalidate and rebuild the ANN index
|
|
34
37
|
this.cache.invalidateAnnIndex();
|
|
35
38
|
const index = await this.cache.ensureAnnIndex();
|
|
36
39
|
return {
|
|
37
40
|
success: index !== null,
|
|
38
|
-
message: index
|
|
41
|
+
message: index
|
|
42
|
+
? 'ANN index rebuilt successfully'
|
|
43
|
+
: 'ANN index rebuild failed or not available',
|
|
39
44
|
};
|
|
40
45
|
}
|
|
41
46
|
|
|
42
|
-
return {
|
|
47
|
+
return {
|
|
48
|
+
success: false,
|
|
49
|
+
error: `Unknown action: ${action}. Valid actions: stats, set_ef_search, rebuild`,
|
|
50
|
+
};
|
|
43
51
|
}
|
|
44
52
|
|
|
45
53
|
formatResults(result) {
|
|
@@ -49,7 +57,7 @@ export class AnnConfigTool {
|
|
|
49
57
|
|
|
50
58
|
if (result.enabled !== undefined) {
|
|
51
59
|
// Stats response
|
|
52
|
-
let output =
|
|
60
|
+
let output = '## ANN Index Statistics\n\n';
|
|
53
61
|
output += `- **Enabled**: ${result.enabled}\n`;
|
|
54
62
|
output += `- **Index Loaded**: ${result.indexLoaded}\n`;
|
|
55
63
|
output += `- **Dirty (needs rebuild)**: ${result.dirty}\n`;
|
|
@@ -57,7 +65,7 @@ export class AnnConfigTool {
|
|
|
57
65
|
output += `- **Min Chunks for ANN**: ${result.minChunksForAnn}\n`;
|
|
58
66
|
|
|
59
67
|
if (result.config) {
|
|
60
|
-
output +=
|
|
68
|
+
output += '\n### Current Config\n\n';
|
|
61
69
|
output += `- **Metric**: ${result.config.metric}\n`;
|
|
62
70
|
output += `- **Dimensions**: ${result.config.dim}\n`;
|
|
63
71
|
output += `- **Indexed Vectors**: ${result.config.count}\n`;
|
|
@@ -65,7 +73,7 @@ export class AnnConfigTool {
|
|
|
65
73
|
output += `- **efConstruction**: ${result.config.efConstruction}\n`;
|
|
66
74
|
output += `- **efSearch**: ${result.config.efSearch}\n`;
|
|
67
75
|
} else {
|
|
68
|
-
output +=
|
|
76
|
+
output += '\n*No active ANN index.*\n';
|
|
69
77
|
}
|
|
70
78
|
|
|
71
79
|
return output;
|
|
@@ -79,32 +87,35 @@ export class AnnConfigTool {
|
|
|
79
87
|
// MCP Tool definition
|
|
80
88
|
export function getToolDefinition() {
|
|
81
89
|
return {
|
|
82
|
-
name:
|
|
83
|
-
description:
|
|
90
|
+
name: 'd_ann_config',
|
|
91
|
+
description:
|
|
92
|
+
"Configure and monitor the ANN (Approximate Nearest Neighbor) search index. Actions: 'stats' (view current config), 'set_ef_search' (tune search accuracy/speed), 'rebuild' (force index rebuild).",
|
|
84
93
|
inputSchema: {
|
|
85
|
-
type:
|
|
94
|
+
type: 'object',
|
|
86
95
|
properties: {
|
|
87
96
|
action: {
|
|
88
|
-
type:
|
|
89
|
-
enum: [
|
|
90
|
-
description:
|
|
91
|
-
|
|
97
|
+
type: 'string',
|
|
98
|
+
enum: ['stats', 'set_ef_search', 'rebuild'],
|
|
99
|
+
description:
|
|
100
|
+
"Action to perform. 'stats' shows current config, 'set_ef_search' changes the search parameter, 'rebuild' forces index rebuild.",
|
|
101
|
+
default: 'stats',
|
|
92
102
|
},
|
|
93
103
|
efSearch: {
|
|
94
|
-
type:
|
|
95
|
-
description:
|
|
104
|
+
type: 'number',
|
|
105
|
+
description:
|
|
106
|
+
'New efSearch value (only for set_ef_search action). Higher = more accurate but slower. Typical range: 16-512.',
|
|
96
107
|
minimum: 1,
|
|
97
|
-
maximum: 1000
|
|
98
|
-
}
|
|
99
|
-
}
|
|
108
|
+
maximum: 1000,
|
|
109
|
+
},
|
|
110
|
+
},
|
|
100
111
|
},
|
|
101
112
|
annotations: {
|
|
102
|
-
title:
|
|
113
|
+
title: 'ANN Index Configuration',
|
|
103
114
|
readOnlyHint: false,
|
|
104
115
|
destructiveHint: false,
|
|
105
116
|
idempotentHint: true,
|
|
106
|
-
openWorldHint: false
|
|
107
|
-
}
|
|
117
|
+
openWorldHint: false,
|
|
118
|
+
},
|
|
108
119
|
};
|
|
109
120
|
}
|
|
110
121
|
|
|
@@ -115,6 +126,6 @@ export async function handleToolCall(request, annConfigTool) {
|
|
|
115
126
|
const formattedText = annConfigTool.formatResults(result);
|
|
116
127
|
|
|
117
128
|
return {
|
|
118
|
-
content: [{ type:
|
|
129
|
+
content: [{ type: 'text', text: formattedText }],
|
|
119
130
|
};
|
|
120
131
|
}
|
package/features/clear-cache.js
CHANGED
|
@@ -9,17 +9,21 @@ export class CacheClearer {
|
|
|
9
9
|
async execute() {
|
|
10
10
|
// Check if indexing is in progress
|
|
11
11
|
if (this.indexer && this.indexer.isIndexing) {
|
|
12
|
-
throw new Error(
|
|
12
|
+
throw new Error(
|
|
13
|
+
'Cannot clear cache while indexing is in progress. Please wait for indexing to complete.'
|
|
14
|
+
);
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
// Check if cache is currently being saved (race condition prevention)
|
|
16
18
|
if (this.cache.isSaving) {
|
|
17
|
-
throw new Error(
|
|
19
|
+
throw new Error(
|
|
20
|
+
'Cannot clear cache while cache is being saved. Please try again in a moment.'
|
|
21
|
+
);
|
|
18
22
|
}
|
|
19
23
|
|
|
20
24
|
// Check if a clear operation is already in progress (prevent concurrent clears)
|
|
21
25
|
if (this.isClearing) {
|
|
22
|
-
throw new Error(
|
|
26
|
+
throw new Error('Cache clear operation already in progress. Please wait for it to complete.');
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
this.isClearing = true;
|
|
@@ -29,7 +33,7 @@ export class CacheClearer {
|
|
|
29
33
|
return {
|
|
30
34
|
success: true,
|
|
31
35
|
message: `Cache cleared successfully. Next indexing will be a full rebuild.`,
|
|
32
|
-
cacheDirectory: this.config.cacheDirectory
|
|
36
|
+
cacheDirectory: this.config.cacheDirectory,
|
|
33
37
|
};
|
|
34
38
|
} finally {
|
|
35
39
|
this.isClearing = false;
|
|
@@ -39,19 +43,20 @@ export class CacheClearer {
|
|
|
39
43
|
|
|
40
44
|
export function getToolDefinition() {
|
|
41
45
|
return {
|
|
42
|
-
name:
|
|
43
|
-
description:
|
|
46
|
+
name: 'c_clear_cache',
|
|
47
|
+
description:
|
|
48
|
+
'Clears the embeddings cache, forcing a complete reindex on next search or manual index operation. Useful when encountering cache corruption or after major codebase changes.',
|
|
44
49
|
inputSchema: {
|
|
45
|
-
type:
|
|
46
|
-
properties: {}
|
|
50
|
+
type: 'object',
|
|
51
|
+
properties: {},
|
|
47
52
|
},
|
|
48
53
|
annotations: {
|
|
49
|
-
title:
|
|
54
|
+
title: 'Clear Embeddings Cache',
|
|
50
55
|
readOnlyHint: false,
|
|
51
56
|
destructiveHint: true,
|
|
52
57
|
idempotentHint: true,
|
|
53
|
-
openWorldHint: false
|
|
54
|
-
}
|
|
58
|
+
openWorldHint: false,
|
|
59
|
+
},
|
|
55
60
|
};
|
|
56
61
|
}
|
|
57
62
|
|
|
@@ -59,17 +64,21 @@ export async function handleToolCall(request, cacheClearer) {
|
|
|
59
64
|
try {
|
|
60
65
|
const result = await cacheClearer.execute();
|
|
61
66
|
return {
|
|
62
|
-
content: [
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
content: [
|
|
68
|
+
{
|
|
69
|
+
type: 'text',
|
|
70
|
+
text: `${result.message}\n\nCache directory: ${result.cacheDirectory}`,
|
|
71
|
+
},
|
|
72
|
+
],
|
|
66
73
|
};
|
|
67
74
|
} catch (error) {
|
|
68
75
|
return {
|
|
69
|
-
content: [
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
76
|
+
content: [
|
|
77
|
+
{
|
|
78
|
+
type: 'text',
|
|
79
|
+
text: `Failed to clear cache: ${error.message}`,
|
|
80
|
+
},
|
|
81
|
+
],
|
|
73
82
|
};
|
|
74
83
|
}
|
|
75
84
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import path from
|
|
2
|
-
import { dotSimilarity } from
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { dotSimilarity, smartChunk, estimateTokens, getModelTokenLimit } from '../lib/utils.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* FindSimilarCode feature
|
|
@@ -12,6 +12,14 @@ export class FindSimilarCode {
|
|
|
12
12
|
this.config = config;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
async getChunkContent(chunk) {
|
|
16
|
+
return this.cache.getChunkContent(chunk);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
getChunkVector(chunk) {
|
|
20
|
+
return this.cache.getChunkVector(chunk);
|
|
21
|
+
}
|
|
22
|
+
|
|
15
23
|
getAnnCandidateCount(maxResults, totalChunks) {
|
|
16
24
|
const minCandidates = this.config.annMinCandidates ?? 0;
|
|
17
25
|
const maxCandidates = this.config.annMaxCandidates ?? totalChunks;
|
|
@@ -22,19 +30,44 @@ export class FindSimilarCode {
|
|
|
22
30
|
}
|
|
23
31
|
|
|
24
32
|
async execute({ code, maxResults = 5, minSimilarity = 0.3 }) {
|
|
33
|
+
if (typeof this.cache.ensureLoaded === 'function') {
|
|
34
|
+
await this.cache.ensureLoaded();
|
|
35
|
+
}
|
|
25
36
|
const vectorStore = this.cache.getVectorStore();
|
|
26
37
|
|
|
27
38
|
if (vectorStore.length === 0) {
|
|
28
39
|
return {
|
|
29
40
|
results: [],
|
|
30
|
-
message:
|
|
41
|
+
message: 'No code has been indexed yet. Please wait for initial indexing to complete.',
|
|
31
42
|
};
|
|
32
43
|
}
|
|
33
44
|
|
|
45
|
+
let codeToEmbed = code;
|
|
46
|
+
let warningMessage = null;
|
|
47
|
+
|
|
48
|
+
// Check if input is too large and truncate intelligently
|
|
49
|
+
const estimatedTokens = estimateTokens(code);
|
|
50
|
+
const limit = getModelTokenLimit(this.config.embeddingModel);
|
|
51
|
+
|
|
52
|
+
// If input is significantly larger than the model limit, we should chunk it
|
|
53
|
+
if (estimatedTokens > limit) {
|
|
54
|
+
// Use smartChunk to get a semantically valid first block
|
|
55
|
+
// We pass a dummy file name to trigger language detection if possible, or default to .txt
|
|
56
|
+
// Since we don't know the language, we'll try to guess or just use generic chunking
|
|
57
|
+
const chunks = smartChunk(code, 'input.txt', this.config);
|
|
58
|
+
if (chunks.length > 0) {
|
|
59
|
+
codeToEmbed = chunks[0].text;
|
|
60
|
+
warningMessage = `Note: Input code was too long (${estimatedTokens} tokens). Searching using the first chunk (${chunks[0].tokenCount} tokens).`;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
34
64
|
// Generate embedding for the input code
|
|
35
|
-
const codeEmbed = await this.embedder(
|
|
36
|
-
|
|
37
|
-
|
|
65
|
+
const codeEmbed = await this.embedder(codeToEmbed, {
|
|
66
|
+
pooling: 'mean',
|
|
67
|
+
normalize: true,
|
|
68
|
+
});
|
|
69
|
+
const codeVector = codeEmbed.data; // Keep as Float32Array for performance
|
|
70
|
+
const codeVectorTyped = codeVector;
|
|
38
71
|
|
|
39
72
|
let candidates = vectorStore;
|
|
40
73
|
let usedAnn = false;
|
|
@@ -54,92 +87,131 @@ export class FindSimilarCode {
|
|
|
54
87
|
}
|
|
55
88
|
}
|
|
56
89
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
90
|
+
const normalizedInput = codeToEmbed.trim().replace(/\s+/g, ' ');
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Batch scoring function to prevent blocking the event loop
|
|
94
|
+
*/
|
|
95
|
+
const scoreAndFilter = async (chunks) => {
|
|
96
|
+
const BATCH_SIZE = 500;
|
|
97
|
+
const scored = [];
|
|
98
|
+
|
|
99
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
100
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
101
|
+
|
|
102
|
+
// Yield to event loop between batches
|
|
103
|
+
if (i > 0) {
|
|
104
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
for (const chunk of batch) {
|
|
108
|
+
const vector = this.getChunkVector(chunk);
|
|
109
|
+
if (!vector) continue;
|
|
110
|
+
const similarity = dotSimilarity(codeVector, vector);
|
|
111
|
+
|
|
112
|
+
if (similarity >= minSimilarity) {
|
|
113
|
+
// Deduplicate against input
|
|
114
|
+
if (normalizedInput) {
|
|
115
|
+
const content = await this.getChunkContent(chunk);
|
|
116
|
+
const normalizedChunk = content.trim().replace(/\s+/g, ' ');
|
|
117
|
+
if (normalizedChunk === normalizedInput) continue;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
scored.push({ ...chunk, similarity });
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return scored.sort((a, b) => b.similarity - a.similarity);
|
|
126
|
+
};
|
|
67
127
|
|
|
128
|
+
let filteredResults = await scoreAndFilter(candidates);
|
|
129
|
+
|
|
130
|
+
// Fallback to full scan if ANN didn't provide enough results
|
|
131
|
+
// Optimization: Skip full scan on large codebases to avoid long pauses
|
|
132
|
+
const MAX_FULL_SCAN_SIZE = 5000;
|
|
68
133
|
if (usedAnn && filteredResults.length < maxResults) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
.filter(chunk => chunk.similarity >= minSimilarity)
|
|
75
|
-
.sort((a, b) => b.similarity - a.similarity);
|
|
134
|
+
if (vectorStore.length <= MAX_FULL_SCAN_SIZE) {
|
|
135
|
+
filteredResults = await scoreAndFilter(vectorStore);
|
|
136
|
+
} else {
|
|
137
|
+
// Just return what we found via ANN
|
|
138
|
+
}
|
|
76
139
|
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
})
|
|
86
|
-
.slice(0, maxResults);
|
|
140
|
+
const results = await Promise.all(
|
|
141
|
+
filteredResults.slice(0, maxResults).map(async (chunk) => {
|
|
142
|
+
if (chunk.content === undefined || chunk.content === null) {
|
|
143
|
+
return { ...chunk, content: await this.getChunkContent(chunk) };
|
|
144
|
+
}
|
|
145
|
+
return chunk;
|
|
146
|
+
}),
|
|
147
|
+
);
|
|
87
148
|
|
|
88
149
|
return {
|
|
89
150
|
results,
|
|
90
|
-
message: results.length === 0 ?
|
|
151
|
+
message: warningMessage || (results.length === 0 ? 'No similar code found above the similarity threshold.' : null),
|
|
91
152
|
};
|
|
92
153
|
}
|
|
93
154
|
|
|
94
|
-
formatResults(results) {
|
|
155
|
+
async formatResults(results) {
|
|
95
156
|
if (results.length === 0) {
|
|
96
|
-
return
|
|
157
|
+
return 'No similar code patterns found in the codebase.';
|
|
97
158
|
}
|
|
98
159
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
160
|
+
const formatted = await Promise.all(
|
|
161
|
+
results.map(async (r, idx) => {
|
|
162
|
+
const relPath = path.relative(this.config.searchDirectory, r.file);
|
|
163
|
+
const content = r.content ?? await this.getChunkContent(r);
|
|
164
|
+
return (
|
|
165
|
+
`## Similar Code ${idx + 1} (Similarity: ${(r.similarity * 100).toFixed(1)}%)\n` +
|
|
166
|
+
`**File:** \`${relPath}\`\n` +
|
|
167
|
+
`**Lines:** ${r.startLine}-${r.endLine}\n\n` +
|
|
168
|
+
'```' +
|
|
169
|
+
path.extname(r.file).slice(1) +
|
|
170
|
+
'\n' +
|
|
171
|
+
content +
|
|
172
|
+
'\n' +
|
|
173
|
+
'```\n'
|
|
174
|
+
);
|
|
175
|
+
}),
|
|
176
|
+
);
|
|
177
|
+
|
|
178
|
+
return formatted.join('\n');
|
|
108
179
|
}
|
|
109
180
|
}
|
|
110
181
|
|
|
111
182
|
// MCP Tool definition
|
|
112
|
-
export function getToolDefinition(
|
|
183
|
+
export function getToolDefinition(_config) {
|
|
113
184
|
return {
|
|
114
|
-
name:
|
|
115
|
-
description:
|
|
185
|
+
name: 'd_find_similar_code',
|
|
186
|
+
description:
|
|
187
|
+
'Find similar code patterns in the codebase. Given a code snippet, returns other code chunks that are semantically similar. Useful for finding duplicate code, understanding patterns, and refactoring opportunities.',
|
|
116
188
|
inputSchema: {
|
|
117
|
-
type:
|
|
189
|
+
type: 'object',
|
|
118
190
|
properties: {
|
|
119
191
|
code: {
|
|
120
|
-
type:
|
|
121
|
-
description:
|
|
192
|
+
type: 'string',
|
|
193
|
+
description: 'The code snippet to find similar patterns for',
|
|
122
194
|
},
|
|
123
195
|
maxResults: {
|
|
124
|
-
type:
|
|
125
|
-
description:
|
|
126
|
-
default: 5
|
|
196
|
+
type: 'number',
|
|
197
|
+
description: 'Maximum number of similar code chunks to return (default: 5)',
|
|
198
|
+
default: 5,
|
|
127
199
|
},
|
|
128
200
|
minSimilarity: {
|
|
129
|
-
type:
|
|
130
|
-
description:
|
|
131
|
-
default: 0.3
|
|
132
|
-
}
|
|
201
|
+
type: 'number',
|
|
202
|
+
description: 'Minimum similarity threshold 0-1 (default: 0.3 = 30%)',
|
|
203
|
+
default: 0.3,
|
|
204
|
+
},
|
|
133
205
|
},
|
|
134
|
-
required: [
|
|
206
|
+
required: ['code'],
|
|
135
207
|
},
|
|
136
208
|
annotations: {
|
|
137
|
-
title:
|
|
209
|
+
title: 'Find Similar Code',
|
|
138
210
|
readOnlyHint: true,
|
|
139
211
|
destructiveHint: false,
|
|
140
212
|
idempotentHint: true,
|
|
141
|
-
openWorldHint: false
|
|
142
|
-
}
|
|
213
|
+
openWorldHint: false,
|
|
214
|
+
},
|
|
143
215
|
};
|
|
144
216
|
}
|
|
145
217
|
|
|
@@ -149,17 +221,21 @@ export async function handleToolCall(request, findSimilarCode) {
|
|
|
149
221
|
const maxResults = request.params.arguments.maxResults || 5;
|
|
150
222
|
const minSimilarity = request.params.arguments.minSimilarity || 0.3;
|
|
151
223
|
|
|
152
|
-
const { results, message } = await findSimilarCode.execute({
|
|
224
|
+
const { results, message } = await findSimilarCode.execute({
|
|
225
|
+
code,
|
|
226
|
+
maxResults,
|
|
227
|
+
minSimilarity,
|
|
228
|
+
});
|
|
153
229
|
|
|
154
230
|
if (message) {
|
|
155
231
|
return {
|
|
156
|
-
content: [{ type:
|
|
232
|
+
content: [{ type: 'text', text: message }],
|
|
157
233
|
};
|
|
158
234
|
}
|
|
159
235
|
|
|
160
|
-
const formattedText = findSimilarCode.formatResults(results);
|
|
236
|
+
const formattedText = await findSimilarCode.formatResults(results);
|
|
161
237
|
|
|
162
238
|
return {
|
|
163
|
-
content: [{ type:
|
|
239
|
+
content: [{ type: 'text', text: formattedText }],
|
|
164
240
|
};
|
|
165
241
|
}
|