codedeep-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,207 @@
1
+ import { createHash, randomBytes } from 'node:crypto';
2
+ import { hashContent } from '../indexer/pipeline.js';
3
+ import { errMsg } from '../logger.js';
4
+ import { normalizeSymbolQuery, qualifiedSymbolName } from '../notes/note-store.js';
5
+ import { normalizeFilePath, pickByLine, readinessBanner, safeReadIndexedFile, textResponse, } from './common.js';
6
+ const MAX_NOTE_CHARS = 4000;
7
+ // "file" | "file:symbol" | "file:symbol:line". The file/symbol boundary is the
8
+ // FIRST ':' that follows the last path separator, so neither a path's own ':'
9
+ // (a Windows drive letter "C:\proj\a.ts") nor a symbol's "::" scope / "." member
10
+ // separators are mis-split. The symbol may be a simple name, "Class.member", or
11
+ // "Ns::Type::method" (resolved below); a trailing ":<digits>" is the 1-based line.
12
+ function parseAnchor(raw) {
13
+ const trimmed = raw.trim();
14
+ if (trimmed.length === 0)
15
+ return null;
16
+ const lastSep = Math.max(trimmed.lastIndexOf('/'), trimmed.lastIndexOf('\\'));
17
+ // Split at the first ':' after the last path separator. A normal Windows
18
+ // absolute path ("C:\proj\a.ts:sym") is handled because its drive ':' precedes
19
+ // the last '\'. The only ambiguous form left is a separator-less string like
20
+ // "X:rest" (a single-char filename "X" with symbol "rest" vs a drive-relative
21
+ // "C:foo.ts") — we favor file+symbol, since anchors are almost always
22
+ // project-relative paths; drive-relative-without-separator is vanishingly rare.
23
+ const splitAt = trimmed.indexOf(':', lastSep + 1);
24
+ const file = splitAt === -1 ? trimmed : trimmed.slice(0, splitAt);
25
+ let tail = splitAt === -1 ? '' : trimmed.slice(splitAt + 1);
26
+ let line;
27
+ // Peel a trailing ":<digits>" — the line pin, OR the COLUMN of a
28
+ // "file:line:col" paste (grep --column / editor gutter / stack-trace ref).
29
+ const lineMatch = tail.match(/:(\d+)$/);
30
+ if (lineMatch) {
31
+ const n = Number(lineMatch[1]);
32
+ if (Number.isInteger(n) && n >= 1)
33
+ line = n; // 1-based; :0 is not a pin
34
+ tail = tail.slice(0, lineMatch.index);
35
+ }
36
+ // A purely-numeric remainder is a line number, not a symbol literally named
37
+ // after a number. Covers both "file:line" (nothing peeled above) and
38
+ // "file:line:col" (the column was peeled into `line`; this remainder is the
39
+ // real LINE and OVERRIDES it — the column is discarded). Without the override a
40
+ // "file:10:20" ref would parse as a phantom symbol "10" and lose the location.
41
+ if (/^\d+$/.test(tail)) {
42
+ const n = Number(tail);
43
+ if (n >= 1)
44
+ line = n;
45
+ tail = '';
46
+ }
47
+ return { file: file.trim(), symbol: tail.trim() || undefined, line };
48
+ }
49
+ export async function runRemember(args, deps) {
50
+ try {
51
+ const text = (args.note ?? '').trim();
52
+ if (text.length === 0) {
53
+ return textResponse('Error: note must be non-empty.');
54
+ }
55
+ if (text.length > MAX_NOTE_CHARS) {
56
+ return textResponse(`Error: note is too long (${text.length} > ${MAX_NOTE_CHARS} chars). ` +
57
+ `Keep notes focused; split into multiple anchored notes.`);
58
+ }
59
+ // load() first so a prior transient read failure is retried before we read
60
+ // (and act on) the write-block flag.
61
+ await deps.notes.load();
62
+ const blocked = deps.notes.writeBlockReason;
63
+ if (blocked)
64
+ return textResponse(`Error: ${blocked}`);
65
+ // Resolve anchors. A bad path fails the whole call (cheap to fix and a
66
+ // silent skip would hide a typo); an unindexed file / missing symbol is
67
+ // captured as a weaker anchor and flagged, never an error.
68
+ const rawAnchors = (args.anchors ?? []).filter((a) => a.trim().length > 0);
69
+ const anchors = [];
70
+ const lines = [];
71
+ // Dedupe the disk read+hash: multiple anchors can target the same file.
72
+ const hashCache = new Map();
73
+ for (const raw of rawAnchors) {
74
+ const parsed = parseAnchor(raw);
75
+ if (parsed === null)
76
+ continue;
77
+ if (parsed.file === '') {
78
+ return textResponse(`Error: anchor "${raw}" is missing a file part (use "file" or "file:symbol").`);
79
+ }
80
+ const rel = normalizeFilePath(parsed.file, deps.config.projectRoot);
81
+ if (rel === null) {
82
+ return textResponse(`Error: anchor "${parsed.file}" is outside the project root.`);
83
+ }
84
+ const { anchor, line } = await resolveAnchor(rel, parsed, deps.index, deps.config, hashCache, deps.indexer.ready);
85
+ anchors.push(anchor);
86
+ lines.push(line);
87
+ }
88
+ const head = (await deps.git.currentHead()) ?? undefined;
89
+ const createdAt = new Date().toISOString();
90
+ const note = {
91
+ id: noteId(createdAt, text),
92
+ text,
93
+ createdAt,
94
+ anchors,
95
+ ...(head ? { head } : {}),
96
+ };
97
+ await deps.notes.add(note);
98
+ const banner = readinessBanner(deps.indexer.ready);
99
+ const out = [`✓ Remembered (note ${note.id}).`];
100
+ if (anchors.length === 0) {
101
+ out.push('⚠ No anchors — this note is stored but not staleness-tracked. ' +
102
+ 'Re-run with anchors like "src/auth.ts:authenticate" to track it.');
103
+ }
104
+ else {
105
+ out.push('Anchors:');
106
+ out.push(...lines);
107
+ }
108
+ return textResponse(banner + out.join('\n'));
109
+ }
110
+ catch (err) {
111
+ return textResponse(`Error: ${errMsg(err)}`);
112
+ }
113
+ }
114
+ async function resolveAnchor(rel, parsed, index, config, hashCache, indexReady) {
115
+ // Baseline hash MUST come from DISK — the same bytes recall re-hashes — not
116
+ // the index's contentHash, which lags disk (watcher debounce / watch off /
117
+ // cold start). Trusting the index here would make a note written against
118
+ // unindexed-but-current bytes read 'stale' on its very first recall.
119
+ // Symbol-level detail (id/kind/signature) still comes from the index below.
120
+ let fileContentHash;
121
+ if (hashCache.has(rel)) {
122
+ fileContentHash = hashCache.get(rel);
123
+ }
124
+ else {
125
+ try {
126
+ fileContentHash = hashContent(await safeReadIndexedFile(rel, config));
127
+ }
128
+ catch {
129
+ fileContentHash = undefined; // missing / unreadable → unverified anchor
130
+ }
131
+ hashCache.set(rel, fileContentHash);
132
+ }
133
+ const anchor = { file: rel };
134
+ if (fileContentHash !== undefined)
135
+ anchor.fileContentHash = fileContentHash;
136
+ if (parsed.symbol === undefined) {
137
+ const line = fileContentHash !== undefined
138
+ ? `- ${rel} — file captured (hash ${fileContentHash})`
139
+ : `- ⚠ ${rel} — not readable on disk; stored as an unverified anchor`;
140
+ return { anchor, line };
141
+ }
142
+ // Symbol anchor. Match the simple name, OR an FQN-style "Class.member" /
143
+ // "Ns::Type::method" via the symbol's fqn (`<file>:<Class>.<member>`, dotted)
144
+ // so members can be anchored precisely without dropping to file level. `::`
145
+ // scope separators are normalized to the extractor's `.` FQN form.
146
+ const wantFqn = `${rel}:${normalizeSymbolQuery(parsed.symbol)}`;
147
+ const candidates = index
148
+ .getSymbolsInFile(rel)
149
+ .filter((s) => s.name === parsed.symbol || s.fqn === wantFqn);
150
+ if (candidates.length === 0) {
151
+ if (!indexReady) {
152
+ // Startup indexing is still running, so a real-but-not-yet-parsed symbol
153
+ // looks absent. Don't claim "symbol not found" (it may well exist once
154
+ // indexing finishes). Store the normalized NAME so recall can still find
155
+ // the note by symbol, and tell the agent to re-remember once indexed for
156
+ // signature-level tracking — a name with no baseline symbolId yields only
157
+ // file-level staleness (describeChange bails to the generic detail).
158
+ anchor.symbol = normalizeSymbolQuery(parsed.symbol);
159
+ return {
160
+ anchor,
161
+ line: `- ⚠ ${rel}:${parsed.symbol} — index still building; not yet resolvable. ` +
162
+ `Anchored by name (file-level staleness for now); re-remember after ` +
163
+ `indexing completes for signature-level tracking.`,
164
+ };
165
+ }
166
+ return {
167
+ anchor,
168
+ line: `- ⚠ ${rel}:${parsed.symbol} — symbol not found; anchored at file level`,
169
+ };
170
+ }
171
+ let target = candidates[0];
172
+ if (candidates.length > 1) {
173
+ if (parsed.line === undefined) {
174
+ // Ambiguous: anchor by file (still useful). Store the NORMALIZED name so
175
+ // recall's `::`-folding bySymbol can still match it.
176
+ anchor.symbol = normalizeSymbolQuery(parsed.symbol);
177
+ return {
178
+ anchor,
179
+ line: `- ⚠ ${rel}:${parsed.symbol} — ${candidates.length} symbols share this name; ` +
180
+ `anchored by name (add a line, e.g. "${rel}:${parsed.symbol}:42", to pin one)`,
181
+ };
182
+ }
183
+ target = pickByLine(candidates, parsed.line);
184
+ }
185
+ // Store the QUALIFIED name (fqn's part after "<file>:", e.g. "Class.member")
186
+ // so recall can distinguish two same-simple-named members in one file.
187
+ const qualified = qualifiedSymbolName(target.fqn, rel, target.name);
188
+ anchor.symbol = qualified;
189
+ anchor.symbolId = target.id;
190
+ anchor.symbolKind = target.kind;
191
+ if (target.signature)
192
+ anchor.signature = target.signature;
193
+ const hashSuffix = fileContentHash !== undefined ? `, hash ${fileContentHash}` : ', file unreadable';
194
+ return {
195
+ anchor,
196
+ line: `- ${rel}:${qualified} — ${target.kind} #${target.id}${hashSuffix}`,
197
+ };
198
+ }
199
+ // A random nonce (not a notes.length sequence — that repeats after a forget,
200
+ // so two same-text notes in the same millisecond could collide) makes the id
201
+ // collision-proof across the store's whole lifetime.
202
+ function noteId(createdAt, text) {
203
+ return createHash('sha1')
204
+ .update(`${createdAt}\0${text}\0${randomBytes(8).toString('hex')}`)
205
+ .digest('hex')
206
+ .slice(0, 16);
207
+ }
@@ -170,8 +170,9 @@ async function runPatternMode(pattern, languages, limit, deps) {
170
170
  // nothing), so validate by parsing the pattern as code with our own
171
171
  // tree-sitter grammars. `hasError` covers both ERROR nodes and
172
172
  // zero-width MISSING-token recovery (e.g. `function f() {`), which an
173
- // ERROR-kind query would let through.
174
- await initParser();
173
+ // ERROR-kind query would let through. Load ONLY the grammars pattern mode
174
+ // can scan — an argless initParser would pull in all 16.
175
+ await initParser(targets.keys());
175
176
  const invalidLangs = [];
176
177
  for (const id of targets.keys()) {
177
178
  if (!patternParses(id, pattern)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codedeep-mcp",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "MCP server that gives AI coding agents structural understanding of a codebase — symbols, references, blast radius, cyclomatic/cognitive complexity, and behavioral-git signals across 14 languages (tree-sitter).",
5
5
  "keywords": [
6
6
  "mcp",
@@ -47,13 +47,15 @@
47
47
  "prepare": "npm run build"
48
48
  },
49
49
  "dependencies": {
50
- "@ast-grep/napi": "^0.43.0",
51
50
  "@modelcontextprotocol/sdk": "^1.29.0",
52
51
  "minisearch": "^7.1.2",
53
52
  "picomatch": "^4.0.4",
54
53
  "web-tree-sitter": "^0.26.8",
55
54
  "zod": "^4.0.0"
56
55
  },
56
+ "optionalDependencies": {
57
+ "@ast-grep/napi": "^0.43.0"
58
+ },
57
59
  "devDependencies": {
58
60
  "@repomix/tree-sitter-wasms": "^0.1.17",
59
61
  "@tree-sitter-grammars/tree-sitter-kotlin": "^1.1.0",