codedeep-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -8
- package/dist/config.js +1 -1
- package/dist/fs-util.js +48 -0
- package/dist/git/git-service.js +27 -0
- package/dist/index.js +15 -1
- package/dist/indexer/code-index.js +91 -22
- package/dist/indexer/parser.js +100 -25
- package/dist/indexer/pipeline.js +64 -4
- package/dist/indexer/scanner.js +6 -4
- package/dist/indexer/watcher.js +9 -0
- package/dist/notes/note-store.js +513 -0
- package/dist/notes/staleness.js +168 -0
- package/dist/notes/types.js +19 -0
- package/dist/server.js +105 -16
- package/dist/tools/common.js +51 -41
- package/dist/tools/find-references.js +9 -11
- package/dist/tools/forget.js +26 -0
- package/dist/tools/get-context.js +149 -18
- package/dist/tools/impact.js +18 -5
- package/dist/tools/note-render.js +57 -0
- package/dist/tools/overview.js +76 -3
- package/dist/tools/recall.js +165 -0
- package/dist/tools/remember.js +207 -0
- package/dist/tools/search-structure.js +3 -2
- package/package.json +4 -2
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { createHash, randomBytes } from 'node:crypto';
|
|
2
|
+
import { hashContent } from '../indexer/pipeline.js';
|
|
3
|
+
import { errMsg } from '../logger.js';
|
|
4
|
+
import { normalizeSymbolQuery, qualifiedSymbolName } from '../notes/note-store.js';
|
|
5
|
+
import { normalizeFilePath, pickByLine, readinessBanner, safeReadIndexedFile, textResponse, } from './common.js';
|
|
6
|
+
const MAX_NOTE_CHARS = 4000;
|
|
7
|
+
// "file" | "file:symbol" | "file:symbol:line". The file/symbol boundary is the
|
|
8
|
+
// FIRST ':' that follows the last path separator, so neither a path's own ':'
|
|
9
|
+
// (a Windows drive letter "C:\proj\a.ts") nor a symbol's "::" scope / "." member
|
|
10
|
+
// separators are mis-split. The symbol may be a simple name, "Class.member", or
|
|
11
|
+
// "Ns::Type::method" (resolved below); a trailing ":<digits>" is the 1-based line.
|
|
12
|
+
function parseAnchor(raw) {
|
|
13
|
+
const trimmed = raw.trim();
|
|
14
|
+
if (trimmed.length === 0)
|
|
15
|
+
return null;
|
|
16
|
+
const lastSep = Math.max(trimmed.lastIndexOf('/'), trimmed.lastIndexOf('\\'));
|
|
17
|
+
// Split at the first ':' after the last path separator. A normal Windows
|
|
18
|
+
// absolute path ("C:\proj\a.ts:sym") is handled because its drive ':' precedes
|
|
19
|
+
// the last '\'. The only ambiguous form left is a separator-less string like
|
|
20
|
+
// "X:rest" (a single-char filename "X" with symbol "rest" vs a drive-relative
|
|
21
|
+
// "C:foo.ts") — we favor file+symbol, since anchors are almost always
|
|
22
|
+
// project-relative paths; drive-relative-without-separator is vanishingly rare.
|
|
23
|
+
const splitAt = trimmed.indexOf(':', lastSep + 1);
|
|
24
|
+
const file = splitAt === -1 ? trimmed : trimmed.slice(0, splitAt);
|
|
25
|
+
let tail = splitAt === -1 ? '' : trimmed.slice(splitAt + 1);
|
|
26
|
+
let line;
|
|
27
|
+
// Peel a trailing ":<digits>" — the line pin, OR the COLUMN of a
|
|
28
|
+
// "file:line:col" paste (grep --column / editor gutter / stack-trace ref).
|
|
29
|
+
const lineMatch = tail.match(/:(\d+)$/);
|
|
30
|
+
if (lineMatch) {
|
|
31
|
+
const n = Number(lineMatch[1]);
|
|
32
|
+
if (Number.isInteger(n) && n >= 1)
|
|
33
|
+
line = n; // 1-based; :0 is not a pin
|
|
34
|
+
tail = tail.slice(0, lineMatch.index);
|
|
35
|
+
}
|
|
36
|
+
// A purely-numeric remainder is a line number, not a symbol literally named
|
|
37
|
+
// after a number. Covers both "file:line" (nothing peeled above) and
|
|
38
|
+
// "file:line:col" (the column was peeled into `line`; this remainder is the
|
|
39
|
+
// real LINE and OVERRIDES it — the column is discarded). Without the override a
|
|
40
|
+
// "file:10:20" ref would parse as a phantom symbol "10" and lose the location.
|
|
41
|
+
if (/^\d+$/.test(tail)) {
|
|
42
|
+
const n = Number(tail);
|
|
43
|
+
if (n >= 1)
|
|
44
|
+
line = n;
|
|
45
|
+
tail = '';
|
|
46
|
+
}
|
|
47
|
+
return { file: file.trim(), symbol: tail.trim() || undefined, line };
|
|
48
|
+
}
|
|
49
|
+
export async function runRemember(args, deps) {
|
|
50
|
+
try {
|
|
51
|
+
const text = (args.note ?? '').trim();
|
|
52
|
+
if (text.length === 0) {
|
|
53
|
+
return textResponse('Error: note must be non-empty.');
|
|
54
|
+
}
|
|
55
|
+
if (text.length > MAX_NOTE_CHARS) {
|
|
56
|
+
return textResponse(`Error: note is too long (${text.length} > ${MAX_NOTE_CHARS} chars). ` +
|
|
57
|
+
`Keep notes focused; split into multiple anchored notes.`);
|
|
58
|
+
}
|
|
59
|
+
// load() first so a prior transient read failure is retried before we read
|
|
60
|
+
// (and act on) the write-block flag.
|
|
61
|
+
await deps.notes.load();
|
|
62
|
+
const blocked = deps.notes.writeBlockReason;
|
|
63
|
+
if (blocked)
|
|
64
|
+
return textResponse(`Error: ${blocked}`);
|
|
65
|
+
// Resolve anchors. A bad path fails the whole call (cheap to fix and a
|
|
66
|
+
// silent skip would hide a typo); an unindexed file / missing symbol is
|
|
67
|
+
// captured as a weaker anchor and flagged, never an error.
|
|
68
|
+
const rawAnchors = (args.anchors ?? []).filter((a) => a.trim().length > 0);
|
|
69
|
+
const anchors = [];
|
|
70
|
+
const lines = [];
|
|
71
|
+
// Dedupe the disk read+hash: multiple anchors can target the same file.
|
|
72
|
+
const hashCache = new Map();
|
|
73
|
+
for (const raw of rawAnchors) {
|
|
74
|
+
const parsed = parseAnchor(raw);
|
|
75
|
+
if (parsed === null)
|
|
76
|
+
continue;
|
|
77
|
+
if (parsed.file === '') {
|
|
78
|
+
return textResponse(`Error: anchor "${raw}" is missing a file part (use "file" or "file:symbol").`);
|
|
79
|
+
}
|
|
80
|
+
const rel = normalizeFilePath(parsed.file, deps.config.projectRoot);
|
|
81
|
+
if (rel === null) {
|
|
82
|
+
return textResponse(`Error: anchor "${parsed.file}" is outside the project root.`);
|
|
83
|
+
}
|
|
84
|
+
const { anchor, line } = await resolveAnchor(rel, parsed, deps.index, deps.config, hashCache, deps.indexer.ready);
|
|
85
|
+
anchors.push(anchor);
|
|
86
|
+
lines.push(line);
|
|
87
|
+
}
|
|
88
|
+
const head = (await deps.git.currentHead()) ?? undefined;
|
|
89
|
+
const createdAt = new Date().toISOString();
|
|
90
|
+
const note = {
|
|
91
|
+
id: noteId(createdAt, text),
|
|
92
|
+
text,
|
|
93
|
+
createdAt,
|
|
94
|
+
anchors,
|
|
95
|
+
...(head ? { head } : {}),
|
|
96
|
+
};
|
|
97
|
+
await deps.notes.add(note);
|
|
98
|
+
const banner = readinessBanner(deps.indexer.ready);
|
|
99
|
+
const out = [`✓ Remembered (note ${note.id}).`];
|
|
100
|
+
if (anchors.length === 0) {
|
|
101
|
+
out.push('⚠ No anchors — this note is stored but not staleness-tracked. ' +
|
|
102
|
+
'Re-run with anchors like "src/auth.ts:authenticate" to track it.');
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
out.push('Anchors:');
|
|
106
|
+
out.push(...lines);
|
|
107
|
+
}
|
|
108
|
+
return textResponse(banner + out.join('\n'));
|
|
109
|
+
}
|
|
110
|
+
catch (err) {
|
|
111
|
+
return textResponse(`Error: ${errMsg(err)}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
async function resolveAnchor(rel, parsed, index, config, hashCache, indexReady) {
|
|
115
|
+
// Baseline hash MUST come from DISK — the same bytes recall re-hashes — not
|
|
116
|
+
// the index's contentHash, which lags disk (watcher debounce / watch off /
|
|
117
|
+
// cold start). Trusting the index here would make a note written against
|
|
118
|
+
// unindexed-but-current bytes read 'stale' on its very first recall.
|
|
119
|
+
// Symbol-level detail (id/kind/signature) still comes from the index below.
|
|
120
|
+
let fileContentHash;
|
|
121
|
+
if (hashCache.has(rel)) {
|
|
122
|
+
fileContentHash = hashCache.get(rel);
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
try {
|
|
126
|
+
fileContentHash = hashContent(await safeReadIndexedFile(rel, config));
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
fileContentHash = undefined; // missing / unreadable → unverified anchor
|
|
130
|
+
}
|
|
131
|
+
hashCache.set(rel, fileContentHash);
|
|
132
|
+
}
|
|
133
|
+
const anchor = { file: rel };
|
|
134
|
+
if (fileContentHash !== undefined)
|
|
135
|
+
anchor.fileContentHash = fileContentHash;
|
|
136
|
+
if (parsed.symbol === undefined) {
|
|
137
|
+
const line = fileContentHash !== undefined
|
|
138
|
+
? `- ${rel} — file captured (hash ${fileContentHash})`
|
|
139
|
+
: `- ⚠ ${rel} — not readable on disk; stored as an unverified anchor`;
|
|
140
|
+
return { anchor, line };
|
|
141
|
+
}
|
|
142
|
+
// Symbol anchor. Match the simple name, OR an FQN-style "Class.member" /
|
|
143
|
+
// "Ns::Type::method" via the symbol's fqn (`<file>:<Class>.<member>`, dotted)
|
|
144
|
+
// so members can be anchored precisely without dropping to file level. `::`
|
|
145
|
+
// scope separators are normalized to the extractor's `.` FQN form.
|
|
146
|
+
const wantFqn = `${rel}:${normalizeSymbolQuery(parsed.symbol)}`;
|
|
147
|
+
const candidates = index
|
|
148
|
+
.getSymbolsInFile(rel)
|
|
149
|
+
.filter((s) => s.name === parsed.symbol || s.fqn === wantFqn);
|
|
150
|
+
if (candidates.length === 0) {
|
|
151
|
+
if (!indexReady) {
|
|
152
|
+
// Startup indexing is still running, so a real-but-not-yet-parsed symbol
|
|
153
|
+
// looks absent. Don't claim "symbol not found" (it may well exist once
|
|
154
|
+
// indexing finishes). Store the normalized NAME so recall can still find
|
|
155
|
+
// the note by symbol, and tell the agent to re-remember once indexed for
|
|
156
|
+
// signature-level tracking — a name with no baseline symbolId yields only
|
|
157
|
+
// file-level staleness (describeChange bails to the generic detail).
|
|
158
|
+
anchor.symbol = normalizeSymbolQuery(parsed.symbol);
|
|
159
|
+
return {
|
|
160
|
+
anchor,
|
|
161
|
+
line: `- ⚠ ${rel}:${parsed.symbol} — index still building; not yet resolvable. ` +
|
|
162
|
+
`Anchored by name (file-level staleness for now); re-remember after ` +
|
|
163
|
+
`indexing completes for signature-level tracking.`,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
anchor,
|
|
168
|
+
line: `- ⚠ ${rel}:${parsed.symbol} — symbol not found; anchored at file level`,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
let target = candidates[0];
|
|
172
|
+
if (candidates.length > 1) {
|
|
173
|
+
if (parsed.line === undefined) {
|
|
174
|
+
// Ambiguous: anchor by file (still useful). Store the NORMALIZED name so
|
|
175
|
+
// recall's `::`-folding bySymbol can still match it.
|
|
176
|
+
anchor.symbol = normalizeSymbolQuery(parsed.symbol);
|
|
177
|
+
return {
|
|
178
|
+
anchor,
|
|
179
|
+
line: `- ⚠ ${rel}:${parsed.symbol} — ${candidates.length} symbols share this name; ` +
|
|
180
|
+
`anchored by name (add a line, e.g. "${rel}:${parsed.symbol}:42", to pin one)`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
target = pickByLine(candidates, parsed.line);
|
|
184
|
+
}
|
|
185
|
+
// Store the QUALIFIED name (fqn's part after "<file>:", e.g. "Class.member")
|
|
186
|
+
// so recall can distinguish two same-simple-named members in one file.
|
|
187
|
+
const qualified = qualifiedSymbolName(target.fqn, rel, target.name);
|
|
188
|
+
anchor.symbol = qualified;
|
|
189
|
+
anchor.symbolId = target.id;
|
|
190
|
+
anchor.symbolKind = target.kind;
|
|
191
|
+
if (target.signature)
|
|
192
|
+
anchor.signature = target.signature;
|
|
193
|
+
const hashSuffix = fileContentHash !== undefined ? `, hash ${fileContentHash}` : ', file unreadable';
|
|
194
|
+
return {
|
|
195
|
+
anchor,
|
|
196
|
+
line: `- ${rel}:${qualified} — ${target.kind} #${target.id}${hashSuffix}`,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
// A random nonce (not a notes.length sequence — that repeats after a forget,
|
|
200
|
+
// so two same-text notes in the same millisecond could collide) makes the id
|
|
201
|
+
// collision-proof across the store's whole lifetime.
|
|
202
|
+
function noteId(createdAt, text) {
|
|
203
|
+
return createHash('sha1')
|
|
204
|
+
.update(`${createdAt}\0${text}\0${randomBytes(8).toString('hex')}`)
|
|
205
|
+
.digest('hex')
|
|
206
|
+
.slice(0, 16);
|
|
207
|
+
}
|
|
@@ -170,8 +170,9 @@ async function runPatternMode(pattern, languages, limit, deps) {
|
|
|
170
170
|
// nothing), so validate by parsing the pattern as code with our own
|
|
171
171
|
// tree-sitter grammars. `hasError` covers both ERROR nodes and
|
|
172
172
|
// zero-width MISSING-token recovery (e.g. `function f() {`), which an
|
|
173
|
-
// ERROR-kind query would let through.
|
|
174
|
-
|
|
173
|
+
// ERROR-kind query would let through. Load ONLY the grammars pattern mode
|
|
174
|
+
// can scan — an argless initParser would pull in all 16.
|
|
175
|
+
await initParser(targets.keys());
|
|
175
176
|
const invalidLangs = [];
|
|
176
177
|
for (const id of targets.keys()) {
|
|
177
178
|
if (!patternParses(id, pattern)) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codedeep-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "MCP server that gives AI coding agents structural understanding of a codebase — symbols, references, blast radius, cyclomatic/cognitive complexity, and behavioral-git signals across 14 languages (tree-sitter).",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -47,13 +47,15 @@
|
|
|
47
47
|
"prepare": "npm run build"
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@ast-grep/napi": "^0.43.0",
|
|
51
50
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
52
51
|
"minisearch": "^7.1.2",
|
|
53
52
|
"picomatch": "^4.0.4",
|
|
54
53
|
"web-tree-sitter": "^0.26.8",
|
|
55
54
|
"zod": "^4.0.0"
|
|
56
55
|
},
|
|
56
|
+
"optionalDependencies": {
|
|
57
|
+
"@ast-grep/napi": "^0.43.0"
|
|
58
|
+
},
|
|
57
59
|
"devDependencies": {
|
|
58
60
|
"@repomix/tree-sitter-wasms": "^0.1.17",
|
|
59
61
|
"@tree-sitter-grammars/tree-sitter-kotlin": "^1.1.0",
|