@softerist/heuristic-mcp 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -9
- package/features/index-codebase.js +38 -11
- package/lib/cache.js +24 -0
- package/lib/call-graph.js +6 -6
- package/lib/config.js +18 -0
- package/package.json +1 -1
- package/example.png +0 -0
- package/how-its-works.png +0 -0
package/README.md
CHANGED
|
@@ -13,15 +13,78 @@ AI coding assistants work better when they can find relevant code quickly. Tradi
|
|
|
13
13
|
|
|
14
14
|
This MCP server solves that by indexing your codebase with AI embeddings. Your AI assistant can search by meaning instead of exact keywords, finding relevant code even when the terminology differs.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
## Example: Semantic Search in Action
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
**User Query:** "How vector cache working?"
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
### MCP Tool Call
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"maxResults": 5,
|
|
25
|
+
"query": "vector store embeddings cache save load"
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Output
|
|
30
|
+
|
|
31
|
+
**Result 1 (Relevance: 72.5%)**
|
|
32
|
+
|
|
33
|
+
**File:** `lib/cache.js` **Lines:** 42-59
|
|
34
|
+
|
|
35
|
+
```javascript
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const filtered = rawVectorStore.length - this.vectorStore.length;
|
|
39
|
+
if (filtered > 0) {
|
|
40
|
+
console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
|
|
41
|
+
}
|
|
42
|
+
console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Here's how the **vector cache** works:
|
|
46
|
+
|
|
47
|
+
### Overview
|
|
48
|
+
|
|
49
|
+
The cache system (`lib/cache.js`) stores two things:
|
|
50
|
+
|
|
51
|
+
1. **Vector embeddings** - AI-generated vectors for each code chunk
|
|
52
|
+
2. **File hashes** - MD5 hashes to detect file changes
|
|
53
|
+
|
|
54
|
+
## Why Heuristic MCP is Useful
|
|
55
|
+
|
|
56
|
+
Yes, absolutely. The **Heuristic MCP** is highly effective because it bridges the gap between "having access to files" and "understanding the codebase." Here is why it is useful:
|
|
57
|
+
|
|
58
|
+
### 1. It Solved the "Needle in a Haystack" Problem
|
|
59
|
+
|
|
60
|
+
To answer your technical questions, I didn't have to manually browse 15 files or run generic `grep` commands.
|
|
61
|
+
|
|
62
|
+
- **Without MCP:** I would have listed directories, guessed `lib/utils.js` might be relevant, read the whole file, then checked `index.js`, etc.
|
|
63
|
+
- **With MCP:** I asked *"how does chunking work"* and it instantly returned lines 91-108 of `lib/utils.js`. It acted like a senior engineer pointing me to the exact lines of code.
|
|
64
|
+
|
|
65
|
+
### 2. It Finds "Concepts," Not Just Words
|
|
66
|
+
|
|
67
|
+
Standard tools like `grep` only find exact matches.
|
|
68
|
+
|
|
69
|
+
- If I searched for "authentication" using `grep`, I might miss a function named `verifyUserCredentials`.
|
|
70
|
+
- The **Heuristic MCP** links these concepts. In the test script I analyzed earlier, `authentication` correctly matched with `credentials` because of the vector similarity.
|
|
71
|
+
|
|
72
|
+
### 3. It Finds "Similar Code"
|
|
73
|
+
|
|
74
|
+
AI agents have a limited memory (context window).
|
|
75
|
+
|
|
76
|
+
- Instead of reading **every file** to understand the project (which wastes thousands of tokens), the MCP lets me retrieve **only the 5-10 relevant snippets**. This leaves more room for complex reasoning and generating code.
|
|
77
|
+
|
|
78
|
+
### 4. It Is Fast & Private
|
|
79
|
+
|
|
80
|
+
Since it runs the **Local LLM** (Xenova) directly on your machine:
|
|
81
|
+
|
|
82
|
+
- **Latency is near-zero** (<50ms).
|
|
83
|
+
- **Privacy is 100%**: Your source code never leaves your laptop to be indexed by an external cloud service.
|
|
84
|
+
|
|
85
|
+
### Verdict
|
|
86
|
+
|
|
87
|
+
For a developer (or an AI agent) working on a confusing or large project, this tool is a massive productivity booster. It essentially turns the entire codebase into a searchable database of knowledge.
|
|
25
88
|
|
|
26
89
|
**Performance**
|
|
27
90
|
|
|
@@ -148,7 +211,7 @@ Returns: Relevant validation code with file paths and line numbers
|
|
|
148
211
|
**find_similar_code** - Find duplicates or patterns
|
|
149
212
|
|
|
150
213
|
```
|
|
151
|
-
Input: A snippet
|
|
214
|
+
Input: A code snippet (paste the code directly)
|
|
152
215
|
Returns: Other code in the project that looks or functions similarly
|
|
153
216
|
```
|
|
154
217
|
|
|
@@ -178,8 +241,6 @@ When you search, your query is converted to the same vector format. We use a **h
|
|
|
178
241
|
- **Exact Keyword Matching** (BM25-inspired boost)
|
|
179
242
|
- **Recency Boosting** (favoring files you're actively working on)
|
|
180
243
|
|
|
181
|
-

|
|
182
|
-
|
|
183
244
|
## Examples
|
|
184
245
|
|
|
185
246
|
**Natural language search:**
|
|
@@ -547,17 +547,44 @@ export class CodebaseIndexer {
|
|
|
547
547
|
|
|
548
548
|
if (filesToProcess.length === 0) {
|
|
549
549
|
console.error("[Indexer] All files unchanged, nothing to index");
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
550
|
+
|
|
551
|
+
// If we have no call graph data but we have cached files, we should try to rebuild it
|
|
552
|
+
if (this.config.callGraphEnabled && this.cache.getVectorStore().length > 0) {
|
|
553
|
+
// Check for files that are in cache but missing from call graph data
|
|
554
|
+
const cachedFiles = new Set(this.cache.getVectorStore().map(c => c.file));
|
|
555
|
+
const callDataFiles = new Set(this.cache.fileCallData.keys());
|
|
556
|
+
|
|
557
|
+
const missingCallData = [];
|
|
558
|
+
for (const file of cachedFiles) {
|
|
559
|
+
if (!callDataFiles.has(file)) {
|
|
560
|
+
missingCallData.push(file);
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
if (missingCallData.length > 0) {
|
|
565
|
+
console.error(`[Indexer] Found ${missingCallData.length} files missing call graph data, re-indexing...`);
|
|
566
|
+
// Add these files to filesToProcess so they get re-read and re-indexed
|
|
567
|
+
// We need to filter them to ensure they still exist on disk
|
|
568
|
+
for (const file of missingCallData) {
|
|
569
|
+
filesToProcess.push(file);
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// If still empty after checking for missing call data, then we are truly done
|
|
575
|
+
if (filesToProcess.length === 0) {
|
|
576
|
+
this.sendProgress(100, 100, "All files up to date");
|
|
577
|
+
await this.cache.save();
|
|
578
|
+
const vectorStore = this.cache.getVectorStore();
|
|
579
|
+
return {
|
|
580
|
+
skipped: false,
|
|
581
|
+
filesProcessed: 0,
|
|
582
|
+
chunksCreated: 0,
|
|
583
|
+
totalFiles: new Set(vectorStore.map(v => v.file)).size,
|
|
584
|
+
totalChunks: vectorStore.length,
|
|
585
|
+
message: "All files up to date"
|
|
586
|
+
};
|
|
587
|
+
}
|
|
561
588
|
}
|
|
562
589
|
|
|
563
590
|
// Send progress: filtering complete
|
package/lib/cache.js
CHANGED
|
@@ -164,6 +164,19 @@ export class EmbeddingsCache {
|
|
|
164
164
|
this.annIndex = null;
|
|
165
165
|
this.annMeta = null;
|
|
166
166
|
}
|
|
167
|
+
|
|
168
|
+
// Load call-graph data if it exists
|
|
169
|
+
const callGraphFile = path.join(this.config.cacheDirectory, "call-graph.json");
|
|
170
|
+
try {
|
|
171
|
+
const callGraphData = await fs.readFile(callGraphFile, "utf8");
|
|
172
|
+
const parsed = JSON.parse(callGraphData);
|
|
173
|
+
this.fileCallData = new Map(Object.entries(parsed));
|
|
174
|
+
if (this.config.verbose) {
|
|
175
|
+
console.error(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
176
|
+
}
|
|
177
|
+
} catch {
|
|
178
|
+
// Call-graph file doesn't exist yet, that's OK
|
|
179
|
+
}
|
|
167
180
|
} catch (error) {
|
|
168
181
|
console.error("[Cache] Failed to load cache:", error.message);
|
|
169
182
|
}
|
|
@@ -189,6 +202,12 @@ export class EmbeddingsCache {
|
|
|
189
202
|
fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
|
|
190
203
|
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
|
|
191
204
|
]);
|
|
205
|
+
|
|
206
|
+
// Save call-graph data
|
|
207
|
+
if (this.fileCallData.size > 0) {
|
|
208
|
+
const callGraphFile = path.join(this.config.cacheDirectory, "call-graph.json");
|
|
209
|
+
await fs.writeFile(callGraphFile, JSON.stringify(Object.fromEntries(this.fileCallData), null, 2));
|
|
210
|
+
}
|
|
192
211
|
} catch (error) {
|
|
193
212
|
console.error("[Cache] Failed to save cache:", error.message);
|
|
194
213
|
} finally {
|
|
@@ -220,6 +239,8 @@ export class EmbeddingsCache {
|
|
|
220
239
|
removeFileFromStore(file) {
|
|
221
240
|
this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
|
|
222
241
|
this.invalidateAnnIndex();
|
|
242
|
+
// Also clear call-graph data for this file
|
|
243
|
+
this.removeFileCallData(file);
|
|
223
244
|
}
|
|
224
245
|
|
|
225
246
|
|
|
@@ -419,6 +440,9 @@ export class EmbeddingsCache {
|
|
|
419
440
|
this.vectorStore = [];
|
|
420
441
|
this.fileHashes = new Map();
|
|
421
442
|
this.invalidateAnnIndex();
|
|
443
|
+
// Clear call-graph data
|
|
444
|
+
this.fileCallData.clear();
|
|
445
|
+
this.callGraph = null;
|
|
422
446
|
console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
|
|
423
447
|
} catch (error) {
|
|
424
448
|
console.error("[Cache] Failed to clear cache:", error.message);
|
package/lib/call-graph.js
CHANGED
|
@@ -48,20 +48,20 @@ const DEFINITION_PATTERNS = {
|
|
|
48
48
|
// Pattern for function calls (language-agnostic, catches most cases)
|
|
49
49
|
const CALL_PATTERN = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(/g;
|
|
50
50
|
|
|
51
|
-
// Common built-ins to exclude from call detection
|
|
51
|
+
// Common built-ins to exclude from call detection (all lowercase for case-insensitive matching)
|
|
52
52
|
const BUILTIN_EXCLUSIONS = new Set([
|
|
53
53
|
// JavaScript
|
|
54
54
|
"if", "for", "while", "switch", "catch", "function", "async", "await",
|
|
55
55
|
"return", "throw", "new", "typeof", "instanceof", "delete", "void",
|
|
56
56
|
"console", "require", "import", "export", "super", "this",
|
|
57
57
|
// Common functions that aren't meaningful for call graphs
|
|
58
|
-
"
|
|
59
|
-
"
|
|
58
|
+
"parseint", "parsefloat", "string", "number", "boolean", "array", "object",
|
|
59
|
+
"map", "set", "promise", "error", "json", "math", "date", "regexp",
|
|
60
60
|
// Python
|
|
61
61
|
"def", "class", "print", "len", "range", "str", "int", "float", "list", "dict",
|
|
62
|
-
"tuple", "
|
|
62
|
+
"tuple", "bool", "type", "isinstance", "hasattr", "getattr", "setattr",
|
|
63
63
|
// Go
|
|
64
|
-
"func", "make", "append", "
|
|
64
|
+
"func", "make", "append", "cap", "panic", "recover",
|
|
65
65
|
// Control flow that looks like function calls
|
|
66
66
|
"else", "try", "finally", "with", "assert", "raise", "yield"
|
|
67
67
|
]);
|
|
@@ -103,7 +103,7 @@ export function extractDefinitions(content, file) {
|
|
|
103
103
|
let match;
|
|
104
104
|
while ((match = pattern.exec(content)) !== null) {
|
|
105
105
|
const name = match[1];
|
|
106
|
-
if (name && name.length > 1 && !BUILTIN_EXCLUSIONS.has(name)) {
|
|
106
|
+
if (name && name.length > 1 && !BUILTIN_EXCLUSIONS.has(name.toLowerCase())) {
|
|
107
107
|
definitions.add(name);
|
|
108
108
|
}
|
|
109
109
|
}
|
package/lib/config.js
CHANGED
|
@@ -208,6 +208,24 @@ export async function loadConfig(workspaceDir = null) {
|
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
+
if (process.env.SMART_CODING_RECENCY_BOOST !== undefined) {
|
|
212
|
+
const value = parseFloat(process.env.SMART_CODING_RECENCY_BOOST);
|
|
213
|
+
if (!isNaN(value) && value >= 0 && value <= 1) {
|
|
214
|
+
config.recencyBoost = value;
|
|
215
|
+
} else {
|
|
216
|
+
console.error(`[Config] Invalid SMART_CODING_RECENCY_BOOST: ${process.env.SMART_CODING_RECENCY_BOOST}, using default`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (process.env.SMART_CODING_RECENCY_DECAY_DAYS !== undefined) {
|
|
221
|
+
const value = parseInt(process.env.SMART_CODING_RECENCY_DECAY_DAYS, 10);
|
|
222
|
+
if (!isNaN(value) && value > 0 && value <= 365) {
|
|
223
|
+
config.recencyDecayDays = value;
|
|
224
|
+
} else {
|
|
225
|
+
console.error(`[Config] Invalid SMART_CODING_RECENCY_DECAY_DAYS: ${process.env.SMART_CODING_RECENCY_DECAY_DAYS}, using default`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
211
229
|
if (process.env.SMART_CODING_WATCH_FILES !== undefined) {
|
|
212
230
|
const value = process.env.SMART_CODING_WATCH_FILES;
|
|
213
231
|
if (value === 'true' || value === 'false') {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@softerist/heuristic-mcp",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.2",
|
|
4
4
|
"description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
package/example.png
DELETED
|
Binary file
|
package/how-its-works.png
DELETED
|
Binary file
|