@softerist/heuristic-mcp 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +9 -4
- package/CONTRIBUTING.md +6 -6
- package/README.md +37 -18
- package/config.json +12 -2
- package/features/ann-config.js +120 -0
- package/features/find-similar-code.js +40 -2
- package/features/hybrid-search.js +69 -5
- package/features/index-codebase.js +28 -4
- package/index.js +9 -1
- package/lib/cache.js +396 -10
- package/lib/call-graph.js +281 -0
- package/lib/config.js +123 -16
- package/lib/project-detector.js +49 -36
- package/package.json +5 -8
- package/test/ann-fallback.test.js +68 -0
- package/test/call-graph.test.js +142 -0
- package/test/clear-cache.test.js +3 -6
- package/test/helpers.js +64 -7
- package/test/hybrid-search.test.js +2 -2
- package/test/index-codebase.test.js +3 -10
- package/test/integration.test.js +3 -3
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Call Graph Extractor
|
|
3
|
+
*
|
|
4
|
+
* Lightweight regex-based extraction of function definitions and calls.
|
|
5
|
+
* Works across multiple languages without external dependencies.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import path from "path";
|
|
9
|
+
|
|
10
|
+
// Language-specific patterns for function/method definitions
|
|
11
|
+
const DEFINITION_PATTERNS = {
|
|
12
|
+
javascript: [
|
|
13
|
+
// function declarations: function name() or async function name()
|
|
14
|
+
/(?:async\s+)?function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/g,
|
|
15
|
+
// arrow functions: const name = () => or const name = async () =>
|
|
16
|
+
/(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/g,
|
|
17
|
+
// class declarations
|
|
18
|
+
/class\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g,
|
|
19
|
+
// method definitions: name() { or async name() {
|
|
20
|
+
/^\s*(?:async\s+)?([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/gm,
|
|
21
|
+
// object method shorthand: name() { inside object
|
|
22
|
+
/([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\([^)]*\)\s*\{/g
|
|
23
|
+
],
|
|
24
|
+
python: [
|
|
25
|
+
// def name():
|
|
26
|
+
/def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/g,
|
|
27
|
+
// class Name:
|
|
28
|
+
/class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[:(]/g
|
|
29
|
+
],
|
|
30
|
+
go: [
|
|
31
|
+
// func name() or func (r Receiver) name()
|
|
32
|
+
/func\s+(?:\([^)]*\)\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/g
|
|
33
|
+
],
|
|
34
|
+
rust: [
|
|
35
|
+
// fn name()
|
|
36
|
+
/fn\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[<(]/g,
|
|
37
|
+
// impl Name
|
|
38
|
+
/impl(?:\s*<[^>]*>)?\s+([a-zA-Z_][a-zA-Z0-9_]*)/g
|
|
39
|
+
],
|
|
40
|
+
java: [
|
|
41
|
+
// public void name() or private static String name()
|
|
42
|
+
/(?:public|private|protected)?\s*(?:static)?\s*(?:\w+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/g,
|
|
43
|
+
// class Name
|
|
44
|
+
/class\s+([a-zA-Z_][a-zA-Z0-9_]*)/g
|
|
45
|
+
]
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// Pattern for function calls (language-agnostic, catches most cases)
|
|
49
|
+
const CALL_PATTERN = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/g;
|
|
50
|
+
|
|
51
|
+
// Common built-ins to exclude from call detection
|
|
52
|
+
const BUILTIN_EXCLUSIONS = new Set([
|
|
53
|
+
// JavaScript
|
|
54
|
+
"if", "for", "while", "switch", "catch", "function", "async", "await",
|
|
55
|
+
"return", "throw", "new", "typeof", "instanceof", "delete", "void",
|
|
56
|
+
"console", "require", "import", "export", "super", "this",
|
|
57
|
+
// Common functions that aren't meaningful for call graphs
|
|
58
|
+
"parseInt", "parseFloat", "String", "Number", "Boolean", "Array", "Object",
|
|
59
|
+
"Map", "Set", "Promise", "Error", "JSON", "Math", "Date", "RegExp",
|
|
60
|
+
// Python
|
|
61
|
+
"def", "class", "print", "len", "range", "str", "int", "float", "list", "dict",
|
|
62
|
+
"tuple", "set", "bool", "type", "isinstance", "hasattr", "getattr", "setattr",
|
|
63
|
+
// Go
|
|
64
|
+
"func", "make", "append", "len", "cap", "new", "panic", "recover",
|
|
65
|
+
// Control flow that looks like function calls
|
|
66
|
+
"else", "try", "finally", "with", "assert", "raise", "yield"
|
|
67
|
+
]);
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Detect language from file extension
|
|
71
|
+
*/
|
|
72
|
+
function detectLanguage(file) {
|
|
73
|
+
const ext = path.extname(file).toLowerCase();
|
|
74
|
+
const langMap = {
|
|
75
|
+
".js": "javascript",
|
|
76
|
+
".jsx": "javascript",
|
|
77
|
+
".ts": "javascript",
|
|
78
|
+
".tsx": "javascript",
|
|
79
|
+
".mjs": "javascript",
|
|
80
|
+
".cjs": "javascript",
|
|
81
|
+
".py": "python",
|
|
82
|
+
".pyw": "python",
|
|
83
|
+
".go": "go",
|
|
84
|
+
".rs": "rust",
|
|
85
|
+
".java": "java",
|
|
86
|
+
".kt": "java",
|
|
87
|
+
".scala": "java"
|
|
88
|
+
};
|
|
89
|
+
return langMap[ext] || "javascript"; // Default to JS patterns
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Extract function/class definitions from content
|
|
94
|
+
*/
|
|
95
|
+
export function extractDefinitions(content, file) {
|
|
96
|
+
const language = detectLanguage(file);
|
|
97
|
+
const patterns = DEFINITION_PATTERNS[language] || DEFINITION_PATTERNS.javascript;
|
|
98
|
+
const definitions = new Set();
|
|
99
|
+
|
|
100
|
+
for (const pattern of patterns) {
|
|
101
|
+
// Reset regex state
|
|
102
|
+
pattern.lastIndex = 0;
|
|
103
|
+
let match;
|
|
104
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
105
|
+
const name = match[1];
|
|
106
|
+
if (name && name.length > 1 && !BUILTIN_EXCLUSIONS.has(name)) {
|
|
107
|
+
definitions.add(name);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return Array.from(definitions);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Extract function calls from content
|
|
117
|
+
*/
|
|
118
|
+
export function extractCalls(content, file) {
|
|
119
|
+
const calls = new Set();
|
|
120
|
+
|
|
121
|
+
// Remove string literals and comments to avoid false positives
|
|
122
|
+
const cleanContent = removeStringsAndComments(content, file);
|
|
123
|
+
|
|
124
|
+
CALL_PATTERN.lastIndex = 0;
|
|
125
|
+
let match;
|
|
126
|
+
while ((match = CALL_PATTERN.exec(cleanContent)) !== null) {
|
|
127
|
+
const name = match[1];
|
|
128
|
+
if (name && name.length > 1 && !BUILTIN_EXCLUSIONS.has(name.toLowerCase())) {
|
|
129
|
+
calls.add(name);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return Array.from(calls);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Remove string literals and comments to improve extraction accuracy
|
|
138
|
+
*/
|
|
139
|
+
function removeStringsAndComments(content, file) {
|
|
140
|
+
const ext = path.extname(file).toLowerCase();
|
|
141
|
+
|
|
142
|
+
// Remove single-line comments
|
|
143
|
+
let cleaned = content.replace(/\/\/.*$/gm, "");
|
|
144
|
+
|
|
145
|
+
// Remove multi-line comments
|
|
146
|
+
cleaned = cleaned.replace(/\/\*[\s\S]*?\*\//g, "");
|
|
147
|
+
|
|
148
|
+
// Remove Python comments
|
|
149
|
+
if (ext === ".py" || ext === ".pyw") {
|
|
150
|
+
cleaned = cleaned.replace(/#.*$/gm, "");
|
|
151
|
+
// Remove triple-quoted strings (docstrings)
|
|
152
|
+
cleaned = cleaned.replace(/"""[\s\S]*?"""/g, "");
|
|
153
|
+
cleaned = cleaned.replace(/'''[\s\S]*?'''/g, "");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Remove string literals (simplified - handles most cases)
|
|
157
|
+
cleaned = cleaned.replace(/"(?:[^"\\]|\\.)*"/g, '""');
|
|
158
|
+
cleaned = cleaned.replace(/'(?:[^'\\]|\\.)*'/g, "''");
|
|
159
|
+
cleaned = cleaned.replace(/`(?:[^`\\]|\\.)*`/g, "``");
|
|
160
|
+
|
|
161
|
+
return cleaned;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Extract both definitions and calls from a file
|
|
166
|
+
*/
|
|
167
|
+
export function extractCallData(content, file) {
|
|
168
|
+
const definitions = extractDefinitions(content, file);
|
|
169
|
+
const calls = extractCalls(content, file);
|
|
170
|
+
|
|
171
|
+
// Remove self-references (calls to functions defined in same file)
|
|
172
|
+
const definitionSet = new Set(definitions);
|
|
173
|
+
const externalCalls = calls.filter(c => !definitionSet.has(c));
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
definitions,
|
|
177
|
+
calls: externalCalls
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Build a call graph from file data
|
|
183
|
+
*/
|
|
184
|
+
export function buildCallGraph(fileCallData) {
|
|
185
|
+
const defines = new Map(); // symbol -> files that define it
|
|
186
|
+
const calledBy = new Map(); // symbol -> files that call it
|
|
187
|
+
const fileCalls = new Map(); // file -> symbols it calls
|
|
188
|
+
|
|
189
|
+
for (const [file, data] of fileCallData.entries()) {
|
|
190
|
+
// Record definitions
|
|
191
|
+
for (const def of data.definitions) {
|
|
192
|
+
if (!defines.has(def)) {
|
|
193
|
+
defines.set(def, []);
|
|
194
|
+
}
|
|
195
|
+
defines.get(def).push(file);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Record calls
|
|
199
|
+
fileCalls.set(file, data.calls);
|
|
200
|
+
for (const call of data.calls) {
|
|
201
|
+
if (!calledBy.has(call)) {
|
|
202
|
+
calledBy.set(call, []);
|
|
203
|
+
}
|
|
204
|
+
calledBy.get(call).push(file);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return { defines, calledBy, fileCalls };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Get files related to a set of symbols (callers + callees)
|
|
213
|
+
*/
|
|
214
|
+
export function getRelatedFiles(callGraph, symbols, maxHops = 1) {
|
|
215
|
+
const related = new Map(); // file -> proximity score (1 = direct, 0.5 = indirect)
|
|
216
|
+
const visited = new Set();
|
|
217
|
+
|
|
218
|
+
function explore(currentSymbols, hop) {
|
|
219
|
+
if (hop > maxHops) return;
|
|
220
|
+
const score = 1 / (hop + 1); // Decay with distance
|
|
221
|
+
|
|
222
|
+
for (const symbol of currentSymbols) {
|
|
223
|
+
// Files that define this symbol
|
|
224
|
+
const definers = callGraph.defines.get(symbol) || [];
|
|
225
|
+
for (const file of definers) {
|
|
226
|
+
if (!visited.has(file)) {
|
|
227
|
+
related.set(file, Math.max(related.get(file) || 0, score));
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Files that call this symbol
|
|
232
|
+
const callers = callGraph.calledBy.get(symbol) || [];
|
|
233
|
+
for (const file of callers) {
|
|
234
|
+
if (!visited.has(file)) {
|
|
235
|
+
related.set(file, Math.max(related.get(file) || 0, score));
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// For next hop, find what these files call/define
|
|
240
|
+
if (hop < maxHops) {
|
|
241
|
+
const nextSymbols = new Set();
|
|
242
|
+
for (const file of [...definers, ...callers]) {
|
|
243
|
+
visited.add(file);
|
|
244
|
+
const calls = callGraph.fileCalls.get(file) || [];
|
|
245
|
+
for (const c of calls) nextSymbols.add(c);
|
|
246
|
+
}
|
|
247
|
+
explore(nextSymbols, hop + 1);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
explore(symbols, 0);
|
|
253
|
+
return related;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Extract symbols (function/class names) from search results
|
|
258
|
+
*/
|
|
259
|
+
export function extractSymbolsFromContent(content) {
|
|
260
|
+
const symbols = new Set();
|
|
261
|
+
|
|
262
|
+
// Look for function/class definitions in the content
|
|
263
|
+
const patterns = [
|
|
264
|
+
/function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g,
|
|
265
|
+
/class\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g,
|
|
266
|
+
/def\s+([a-zA-Z_][a-zA-Z0-9_]*)/g,
|
|
267
|
+
/(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=/g
|
|
268
|
+
];
|
|
269
|
+
|
|
270
|
+
for (const pattern of patterns) {
|
|
271
|
+
pattern.lastIndex = 0;
|
|
272
|
+
let match;
|
|
273
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
274
|
+
if (match[1] && match[1].length > 2) {
|
|
275
|
+
symbols.add(match[1]);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return Array.from(symbols);
|
|
281
|
+
}
|
package/lib/config.js
CHANGED
|
@@ -67,7 +67,20 @@ const DEFAULT_CONFIG = {
|
|
|
67
67
|
exactMatchBoost: 1.5,
|
|
68
68
|
recencyBoost: 0.1, // Boost for recently modified files (max 0.1 added to score)
|
|
69
69
|
recencyDecayDays: 30, // After this many days, recency boost is 0
|
|
70
|
-
smartIndexing: true
|
|
70
|
+
smartIndexing: true,
|
|
71
|
+
callGraphEnabled: true, // Enable call graph extraction for proximity boosting
|
|
72
|
+
callGraphBoost: 0.15, // Boost for files related via call graph (0-1)
|
|
73
|
+
callGraphMaxHops: 1, // How many levels of calls to follow (1 = direct only)
|
|
74
|
+
annEnabled: true,
|
|
75
|
+
annMinChunks: 5000,
|
|
76
|
+
annMinCandidates: 50,
|
|
77
|
+
annMaxCandidates: 200,
|
|
78
|
+
annCandidateMultiplier: 20,
|
|
79
|
+
annEfConstruction: 200,
|
|
80
|
+
annEfSearch: 64,
|
|
81
|
+
annM: 16,
|
|
82
|
+
annIndexCache: true,
|
|
83
|
+
annMetric: "cosine"
|
|
71
84
|
};
|
|
72
85
|
|
|
73
86
|
let config = { ...DEFAULT_CONFIG };
|
|
@@ -84,10 +97,12 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
84
97
|
configPath = path.join(baseDir, "config.json");
|
|
85
98
|
console.error(`[Config] Workspace mode: ${baseDir}`);
|
|
86
99
|
} else {
|
|
87
|
-
// Server mode: load config from server directory
|
|
100
|
+
// Server mode: load config from server directory for global settings,
|
|
101
|
+
// but use process.cwd() as base for searching if not specified otherwise
|
|
88
102
|
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
|
|
89
|
-
|
|
90
|
-
configPath = path.join(
|
|
103
|
+
const serverDir = path.resolve(scriptDir, '..');
|
|
104
|
+
configPath = path.join(serverDir, "config.json");
|
|
105
|
+
baseDir = process.cwd();
|
|
91
106
|
}
|
|
92
107
|
|
|
93
108
|
let userConfig = {};
|
|
@@ -95,23 +110,24 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
95
110
|
const configData = await fs.readFile(configPath, "utf-8");
|
|
96
111
|
userConfig = JSON.parse(configData);
|
|
97
112
|
} catch (configError) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
113
|
+
// If config not found in server dir, try CWD
|
|
114
|
+
if (!workspaceDir) {
|
|
115
|
+
try {
|
|
116
|
+
const localConfigPath = path.join(baseDir, "config.json");
|
|
117
|
+
const configData = await fs.readFile(localConfigPath, "utf-8");
|
|
118
|
+
userConfig = JSON.parse(configData);
|
|
119
|
+
configPath = localConfigPath;
|
|
120
|
+
} catch {
|
|
121
|
+
// ignore
|
|
122
|
+
}
|
|
102
123
|
}
|
|
103
124
|
}
|
|
104
125
|
|
|
105
126
|
config = { ...DEFAULT_CONFIG, ...userConfig };
|
|
106
127
|
|
|
107
|
-
// Set
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
config.cacheDirectory = path.join(baseDir, ".smart-coding-cache");
|
|
111
|
-
} else {
|
|
112
|
-
config.searchDirectory = path.resolve(baseDir, config.searchDirectory);
|
|
113
|
-
config.cacheDirectory = path.resolve(baseDir, config.cacheDirectory);
|
|
114
|
-
}
|
|
128
|
+
// Set search and cache directories
|
|
129
|
+
config.searchDirectory = baseDir;
|
|
130
|
+
config.cacheDirectory = path.join(baseDir, ".smart-coding-cache");
|
|
115
131
|
|
|
116
132
|
// Smart project detection
|
|
117
133
|
if (config.smartIndexing !== false) {
|
|
@@ -239,6 +255,97 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
239
255
|
}
|
|
240
256
|
}
|
|
241
257
|
|
|
258
|
+
if (process.env.SMART_CODING_ANN_ENABLED !== undefined) {
|
|
259
|
+
const value = process.env.SMART_CODING_ANN_ENABLED;
|
|
260
|
+
if (value === 'true' || value === 'false') {
|
|
261
|
+
config.annEnabled = value === 'true';
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (process.env.SMART_CODING_ANN_MIN_CHUNKS !== undefined) {
|
|
266
|
+
const value = parseInt(process.env.SMART_CODING_ANN_MIN_CHUNKS, 10);
|
|
267
|
+
if (!isNaN(value) && value >= 0) {
|
|
268
|
+
config.annMinChunks = value;
|
|
269
|
+
} else {
|
|
270
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_MIN_CHUNKS: ${process.env.SMART_CODING_ANN_MIN_CHUNKS}, using default`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (process.env.SMART_CODING_ANN_MIN_CANDIDATES !== undefined) {
|
|
275
|
+
const value = parseInt(process.env.SMART_CODING_ANN_MIN_CANDIDATES, 10);
|
|
276
|
+
if (!isNaN(value) && value >= 0) {
|
|
277
|
+
config.annMinCandidates = value;
|
|
278
|
+
} else {
|
|
279
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_MIN_CANDIDATES: ${process.env.SMART_CODING_ANN_MIN_CANDIDATES}, using default`);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (process.env.SMART_CODING_ANN_MAX_CANDIDATES !== undefined) {
|
|
284
|
+
const value = parseInt(process.env.SMART_CODING_ANN_MAX_CANDIDATES, 10);
|
|
285
|
+
if (!isNaN(value) && value > 0) {
|
|
286
|
+
config.annMaxCandidates = value;
|
|
287
|
+
} else {
|
|
288
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_MAX_CANDIDATES: ${process.env.SMART_CODING_ANN_MAX_CANDIDATES}, using default`);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (process.env.SMART_CODING_ANN_CANDIDATE_MULTIPLIER !== undefined) {
|
|
293
|
+
const value = parseFloat(process.env.SMART_CODING_ANN_CANDIDATE_MULTIPLIER);
|
|
294
|
+
if (!isNaN(value) && value > 0) {
|
|
295
|
+
config.annCandidateMultiplier = value;
|
|
296
|
+
} else {
|
|
297
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_CANDIDATE_MULTIPLIER: ${process.env.SMART_CODING_ANN_CANDIDATE_MULTIPLIER}, using default`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (process.env.SMART_CODING_ANN_EF_CONSTRUCTION !== undefined) {
|
|
302
|
+
const value = parseInt(process.env.SMART_CODING_ANN_EF_CONSTRUCTION, 10);
|
|
303
|
+
if (!isNaN(value) && value > 0) {
|
|
304
|
+
config.annEfConstruction = value;
|
|
305
|
+
} else {
|
|
306
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_EF_CONSTRUCTION: ${process.env.SMART_CODING_ANN_EF_CONSTRUCTION}, using default`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (process.env.SMART_CODING_ANN_EF_SEARCH !== undefined) {
|
|
311
|
+
const value = parseInt(process.env.SMART_CODING_ANN_EF_SEARCH, 10);
|
|
312
|
+
if (!isNaN(value) && value > 0) {
|
|
313
|
+
config.annEfSearch = value;
|
|
314
|
+
} else {
|
|
315
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_EF_SEARCH: ${process.env.SMART_CODING_ANN_EF_SEARCH}, using default`);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
if (process.env.SMART_CODING_ANN_M !== undefined) {
|
|
320
|
+
const value = parseInt(process.env.SMART_CODING_ANN_M, 10);
|
|
321
|
+
if (!isNaN(value) && value > 0 && value <= 64) {
|
|
322
|
+
config.annM = value;
|
|
323
|
+
} else {
|
|
324
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_M: ${process.env.SMART_CODING_ANN_M}, using default`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (process.env.SMART_CODING_ANN_INDEX_CACHE !== undefined) {
|
|
329
|
+
const value = process.env.SMART_CODING_ANN_INDEX_CACHE;
|
|
330
|
+
if (value === 'true' || value === 'false') {
|
|
331
|
+
config.annIndexCache = value === 'true';
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (process.env.SMART_CODING_ANN_METRIC !== undefined) {
|
|
336
|
+
const value = process.env.SMART_CODING_ANN_METRIC.trim().toLowerCase();
|
|
337
|
+
if (value === "cosine" || value === "ip" || value === "l2") {
|
|
338
|
+
config.annMetric = value;
|
|
339
|
+
} else {
|
|
340
|
+
console.error(`[Config] Invalid SMART_CODING_ANN_METRIC: ${process.env.SMART_CODING_ANN_METRIC}, using default`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (config.annMetric !== "cosine") {
|
|
345
|
+
console.error(`[Config] ANN metric locked to cosine, overriding "${config.annMetric}"`);
|
|
346
|
+
config.annMetric = "cosine";
|
|
347
|
+
}
|
|
348
|
+
|
|
242
349
|
// Safety cap for auto workers
|
|
243
350
|
if (config.workerThreads === 'auto') {
|
|
244
351
|
// Cap at 4 workers max by default to prevent OOM (each model ~150MB)
|
package/lib/project-detector.js
CHANGED
|
@@ -10,52 +10,65 @@ export class ProjectDetector {
|
|
|
10
10
|
|
|
11
11
|
async detectProjectTypes() {
|
|
12
12
|
const markerFiles = Object.keys(FILE_TYPE_MAP);
|
|
13
|
-
|
|
14
|
-
for (const marker of markerFiles) {
|
|
15
|
-
// Handle wildcard patterns like *.csproj
|
|
16
|
-
if (marker.includes('*')) {
|
|
17
|
-
await this.detectWithWildcard(marker);
|
|
18
|
-
} else {
|
|
19
|
-
await this.detectExactFile(marker);
|
|
20
|
-
}
|
|
21
|
-
}
|
|
13
|
+
const discoveredTypes = new Map(); // type -> first marker found
|
|
22
14
|
|
|
23
|
-
|
|
24
|
-
|
|
15
|
+
const checkDir = async (dir, depth) => {
|
|
16
|
+
if (depth > 2) return;
|
|
25
17
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
18
|
+
const items = await fs.readdir(dir, { withFileTypes: true }).catch(() => []);
|
|
19
|
+
const itemNames = items.map(i => i.name);
|
|
20
|
+
const itemSet = new Set(itemNames);
|
|
21
|
+
|
|
22
|
+
for (const marker of markerFiles) {
|
|
23
|
+
let found = false;
|
|
24
|
+
if (marker.includes('*')) {
|
|
25
|
+
const regex = new RegExp('^' + marker.replace('*', '.*') + '$');
|
|
26
|
+
found = itemNames.some(file => regex.test(file));
|
|
27
|
+
} else {
|
|
28
|
+
found = itemSet.has(marker);
|
|
29
|
+
}
|
|
37
30
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
for (const file of files) {
|
|
44
|
-
if (regex.test(file)) {
|
|
45
|
-
const projectType = FILE_TYPE_MAP[pattern];
|
|
46
|
-
this.detectedTypes.add(projectType);
|
|
47
|
-
console.error(`[Detector] Detected ${projectType} project (${file})`);
|
|
48
|
-
break;
|
|
31
|
+
if (found) {
|
|
32
|
+
const type = FILE_TYPE_MAP[marker];
|
|
33
|
+
if (!discoveredTypes.has(type)) {
|
|
34
|
+
discoveredTypes.set(type, path.relative(this.searchDirectory, path.join(dir, marker)));
|
|
35
|
+
}
|
|
49
36
|
}
|
|
50
37
|
}
|
|
51
|
-
|
|
52
|
-
//
|
|
38
|
+
|
|
39
|
+
// Recurse into subdirectories
|
|
40
|
+
if (depth < 2) {
|
|
41
|
+
for (const item of items) {
|
|
42
|
+
if (item.isDirectory()) {
|
|
43
|
+
const name = item.name;
|
|
44
|
+
if (name.startsWith('.') || [
|
|
45
|
+
'node_modules', 'dist', 'build', 'target', 'vendor', // Build outputs
|
|
46
|
+
'coverage', 'htmlcov', // Test coverage
|
|
47
|
+
'typings', 'nltk_data', 'secrets', // Data/secrets
|
|
48
|
+
'venv', 'env', // Python envs (non-dot)
|
|
49
|
+
'__pycache__', 'eggs', '.eggs' // Python artifacts
|
|
50
|
+
].includes(name)) {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
await checkDir(path.join(dir, name), depth + 1);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
await checkDir(this.searchDirectory, 0);
|
|
60
|
+
|
|
61
|
+
for (const [type, marker] of discoveredTypes) {
|
|
62
|
+
this.detectedTypes.add(type);
|
|
63
|
+
console.error(`[Detector] Detected ${type} project (${marker})`);
|
|
53
64
|
}
|
|
65
|
+
|
|
66
|
+
return Array.from(this.detectedTypes);
|
|
54
67
|
}
|
|
55
68
|
|
|
56
69
|
getSmartIgnorePatterns() {
|
|
57
70
|
const patterns = [...IGNORE_PATTERNS.common];
|
|
58
|
-
|
|
71
|
+
|
|
59
72
|
for (const type of this.detectedTypes) {
|
|
60
73
|
if (IGNORE_PATTERNS[type]) {
|
|
61
74
|
patterns.push(...IGNORE_PATTERNS[type]);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@softerist/heuristic-mcp",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -35,14 +35,8 @@
|
|
|
35
35
|
],
|
|
36
36
|
"author": {
|
|
37
37
|
"name": "Softerist",
|
|
38
|
-
"url": "https://
|
|
38
|
+
"url": "https://github.com/softerist"
|
|
39
39
|
},
|
|
40
|
-
"contributors": [
|
|
41
|
-
{
|
|
42
|
-
"name": "Omar Haris",
|
|
43
|
-
"url": "https://www.linkedin.com/in/omarharis/"
|
|
44
|
-
}
|
|
45
|
-
],
|
|
46
40
|
"repository": {
|
|
47
41
|
"type": "git",
|
|
48
42
|
"url": "https://github.com/softerist/heuristic-mcp"
|
|
@@ -56,6 +50,9 @@
|
|
|
56
50
|
"fdir": "^6.5.0",
|
|
57
51
|
"glob": "^10.3.10"
|
|
58
52
|
},
|
|
53
|
+
"optionalDependencies": {
|
|
54
|
+
"hnswlib-node": "^3.0.0"
|
|
55
|
+
},
|
|
59
56
|
"engines": {
|
|
60
57
|
"node": ">=18.0.0"
|
|
61
58
|
},
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
+
vi.mock('hnswlib-node', () => ({}));
|
|
3
|
+
import { EmbeddingsCache } from '../lib/cache.js';
|
|
4
|
+
import { HybridSearch } from '../features/hybrid-search.js';
|
|
5
|
+
import { DEFAULT_CONFIG } from '../lib/config.js';
|
|
6
|
+
|
|
7
|
+
describe('ANN Fallback (Missing hnswlib-node)', () => {
|
|
8
|
+
let cache;
|
|
9
|
+
let hybridSearch;
|
|
10
|
+
let embedder;
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
// Mock configuration with ANN enabled
|
|
14
|
+
const config = {
|
|
15
|
+
...DEFAULT_CONFIG,
|
|
16
|
+
enableCache: false,
|
|
17
|
+
cacheDirectory: './test-cache-ann',
|
|
18
|
+
annEnabled: true,
|
|
19
|
+
annMinChunks: 5, // Low threshold for testing
|
|
20
|
+
annIndexCache: false,
|
|
21
|
+
embeddingModel: 'test-model'
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
// Mock embedder
|
|
25
|
+
embedder = vi.fn().mockResolvedValue({
|
|
26
|
+
data: new Float32Array([0.1, 0.2, 0.3])
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
cache = new EmbeddingsCache(config);
|
|
30
|
+
|
|
31
|
+
// Populate vector store with dummy data
|
|
32
|
+
const vectors = [];
|
|
33
|
+
for (let i = 0; i < 10; i++) {
|
|
34
|
+
vectors.push({
|
|
35
|
+
file: `file${i}.js`,
|
|
36
|
+
content: `content ${i}`,
|
|
37
|
+
startLine: 1,
|
|
38
|
+
endLine: 5,
|
|
39
|
+
vector: [0.1, 0.2, 0.3] // simple dummy vector
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
cache.setVectorStore(vectors);
|
|
43
|
+
|
|
44
|
+
hybridSearch = new HybridSearch(embedder, cache, config);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('should fall back to linear search when ANN index is unavailable', async () => {
|
|
48
|
+
const query = "test query";
|
|
49
|
+
const maxResults = 5;
|
|
50
|
+
|
|
51
|
+
const result = await hybridSearch.search(query, maxResults);
|
|
52
|
+
|
|
53
|
+
expect(result).toBeDefined();
|
|
54
|
+
expect(result.results.length).toBe(5);
|
|
55
|
+
expect(embedder).toHaveBeenCalledWith(query, expect.any(Object));
|
|
56
|
+
// Verify it didn't throw and ANN attempt doesn't prevent results
|
|
57
|
+
const annAttempt = await cache.queryAnn([0.1, 0.2, 0.3], 5);
|
|
58
|
+
expect(annAttempt).toBeNull();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should handle ANN loading failure gracefully', async () => {
|
|
62
|
+
const index = await cache.ensureAnnIndex();
|
|
63
|
+
expect(index).toBeNull();
|
|
64
|
+
|
|
65
|
+
const annResults = await cache.queryAnn([0.1, 0.2, 0.3], 5);
|
|
66
|
+
expect(annResults).toBeNull();
|
|
67
|
+
});
|
|
68
|
+
});
|