codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,303 @@
1
+ import { initParser, parseFile } from '../indexer/parser.js';
2
+ import { compareShallowFirst } from '../indexer/scanner.js';
3
+ import { errMsg, log } from '../logger.js';
4
+ import { MODULE_LEVEL, displaySignature, innermostEnclosing, omittedSuffix, readinessBanner, safeReadIndexedFile, textResponse, } from './common.js';
5
+ const DEFAULT_LIMIT = 10;
6
+ const MAX_LIMIT = 100;
7
+ // Git churn boost for query mode: recently active files float up. Capped
8
+ // at 1.5x — the same magnitude as the exported-symbol boost — so churn
9
+ // reorders near-ties but never outweighs MiniSearch's name relevance
10
+ // (name field boost is 3). log1p-scaled because commit counts are
11
+ // heavy-tailed: one 300-commit churner must not flatten the boost for
12
+ // the 1-30 range where most files live.
13
+ const GIT_BOOST_MAX_EXTRA = 0.5;
14
+ // Memoized on the GitMeta OBJECT IDENTITY: every path that changes the
15
+ // underlying commitFrequency data — applyGitAnalysis, load(), and
16
+ // clearGitData() — replaces (or nulls) the GitMeta instance, so keying
17
+ // on the object collapses cache validity into one invariant owned by
18
+ // CodeIndex itself. No generation counter, no cross-module guard.
19
+ const boostMemo = new WeakMap();
20
+ function gitBoostMap(deps) {
21
+ const meta = deps.index.getGitMeta();
22
+ if (meta === null)
23
+ return undefined;
24
+ if (boostMemo.has(meta))
25
+ return boostMemo.get(meta);
26
+ const churned = deps.index
27
+ .getAllFiles()
28
+ .filter((f) => (f.commitFrequency ?? 0) > 0);
29
+ let map;
30
+ if (churned.length > 0) {
31
+ // Plain loop, NOT Math.max(...spread): spreading throws RangeError
32
+ // past ~125k elements, and an exception here would surface as an
33
+ // in-band tool error caused purely by git enrichment.
34
+ let maxLog = 0;
35
+ for (const f of churned) {
36
+ const lg = Math.log1p(f.commitFrequency);
37
+ if (lg > maxLog)
38
+ maxLog = lg;
39
+ }
40
+ map = new Map(churned.map((f) => [
41
+ f.path,
42
+ 1 + GIT_BOOST_MAX_EXTRA * (Math.log1p(f.commitFrequency) / maxLog),
43
+ ]));
44
+ }
45
+ boostMemo.set(meta, map);
46
+ return map;
47
+ }
48
+ // Pattern scans read every candidate file from disk; bound the worst case
49
+ // (zero matches on a huge repo) and tell the caller to narrow instead.
50
+ const PATTERN_FILE_CAP = 2000;
51
+ const MATCH_TEXT_CAP = 120;
52
+ // User-facing language names → index-internal language ids.
53
+ const LANGUAGE_ALIASES = {
54
+ typescript: ['typescript', 'tsx'],
55
+ ts: ['typescript', 'tsx'],
56
+ tsx: ['tsx'],
57
+ javascript: ['javascript'],
58
+ js: ['javascript'],
59
+ python: ['python'],
60
+ py: ['python'],
61
+ java: ['java'],
62
+ go: ['go'],
63
+ golang: ['go'],
64
+ rust: ['rust'],
65
+ rs: ['rust'],
66
+ swift: ['swift'],
67
+ kotlin: ['kotlin'],
68
+ kt: ['kotlin'],
69
+ dart: ['dart'],
70
+ flutter: ['dart'],
71
+ csharp: ['csharp'],
72
+ cs: ['csharp'],
73
+ php: ['php'],
74
+ ruby: ['ruby'],
75
+ rb: ['ruby'],
76
+ cpp: ['cpp'],
77
+ 'c++': ['cpp'],
78
+ cc: ['cpp'],
79
+ c: ['c'],
80
+ objc: ['objc'],
81
+ 'objective-c': ['objc'],
82
+ objectivec: ['objc'],
83
+ };
84
+ const SUPPORTED_LANGUAGES = 'typescript, tsx, javascript, python, java, go, rust, swift, kotlin, dart, csharp, php, ruby, cpp, c, objc';
85
+ // Loaded lazily and cached: the native binding is only touched when a
86
+ // `pattern` call arrives, and a load failure (unsupported platform)
87
+ // degrades pattern mode without affecting the rest of the server.
88
+ let astGrepLoad;
89
+ function loadAstGrep() {
90
+ astGrepLoad ??= import('@ast-grep/napi').then((m) => m, (err) => {
91
+ log.warn(`search_structure: @ast-grep/napi unavailable: ${errMsg(err)}`);
92
+ return null;
93
+ });
94
+ return astGrepLoad;
95
+ }
96
+ export async function runSearchStructure(args, deps) {
97
+ try {
98
+ const banner = readinessBanner(deps.indexer.ready);
99
+ const limit = Math.min(args.limit ?? DEFAULT_LIMIT, MAX_LIMIT);
100
+ let languages;
101
+ if (args.language !== undefined) {
102
+ const expanded = LANGUAGE_ALIASES[args.language.trim().toLowerCase()];
103
+ if (!expanded) {
104
+ return textResponse(`Error: unknown language '${args.language}'. Supported: ${SUPPORTED_LANGUAGES}.`);
105
+ }
106
+ languages = new Set(expanded);
107
+ }
108
+ const pattern = args.pattern?.trim();
109
+ if (pattern) {
110
+ return textResponse(banner + (await runPatternMode(pattern, languages, limit, deps)));
111
+ }
112
+ const query = args.query?.trim();
113
+ if (!query) {
114
+ return textResponse('Error: provide a non-empty `query` or an ast-grep `pattern`.');
115
+ }
116
+ return textResponse(banner + runQueryMode(query, languages, limit, deps, args.language));
117
+ }
118
+ catch (err) {
119
+ return textResponse(`Error: ${errMsg(err)}`);
120
+ }
121
+ }
122
+ function runQueryMode(query, languages, limit, deps, languageArg) {
123
+ const { symbols, total } = deps.index.searchSymbols(query, {
124
+ limit,
125
+ languages,
126
+ boostByFile: gitBoostMap(deps),
127
+ });
128
+ if (symbols.length === 0) {
129
+ const filterNote = languageArg ? ` (language: ${languageArg})` : '';
130
+ return `No matches for '${query}'${filterNote}.`;
131
+ }
132
+ const blocks = symbols.map(renderSymbolBlock);
133
+ if (total > symbols.length) {
134
+ blocks.push(omittedSuffix(total - symbols.length));
135
+ }
136
+ return blocks.join('\n\n');
137
+ }
138
+ function symbolHeader(sym) {
139
+ const exportedSuffix = sym.exported ? ' | exported' : '';
140
+ return `${sym.file}:${sym.startLine}-${sym.endLine} | ${sym.kind}${exportedSuffix}`;
141
+ }
142
+ function renderSymbolBlock(sym) {
143
+ const lines = [symbolHeader(sym), displaySignature(sym)];
144
+ if (sym.doc && sym.doc.length > 0)
145
+ lines.push(sym.doc);
146
+ return lines.join('\n');
147
+ }
148
+ async function runPatternMode(pattern, languages, limit, deps) {
149
+ const ag = await loadAstGrep();
150
+ if (!ag) {
151
+ return ('Error: structural pattern matching is unavailable — the ' +
152
+ '@ast-grep/napi native binding failed to load on this platform. ' +
153
+ 'Keyword `query` mode still works.');
154
+ }
155
+ // Pattern matching ships for the ast-grep built-in languages only;
156
+ // other indexed languages would need the pre-1.0 @ast-grep/lang-*
157
+ // packages.
158
+ const patternLangs = new Map([
159
+ ['typescript', ag.Lang.TypeScript],
160
+ ['tsx', ag.Lang.Tsx],
161
+ ['javascript', ag.Lang.JavaScript],
162
+ ]);
163
+ const targets = new Map([...patternLangs].filter(([id]) => !languages || languages.has(id)));
164
+ if (targets.size === 0) {
165
+ return ('Structural patterns are not supported for this language yet — ' +
166
+ 'this phase covers TypeScript/TSX/JavaScript only. ' +
167
+ 'Keyword `query` mode works for all indexed languages.');
168
+ }
169
+ // ast-grep does not reject malformed patterns (they silently match
170
+ // nothing), so validate by parsing the pattern as code with our own
171
+ // tree-sitter grammars. `hasError` covers both ERROR nodes and
172
+ // zero-width MISSING-token recovery (e.g. `function f() {`), which an
173
+ // ERROR-kind query would let through.
174
+ await initParser();
175
+ const invalidLangs = [];
176
+ for (const id of targets.keys()) {
177
+ if (!patternParses(id, pattern)) {
178
+ invalidLangs.push(id);
179
+ targets.delete(id);
180
+ }
181
+ }
182
+ if (targets.size === 0) {
183
+ return `Error: invalid ast-grep pattern '${pattern}' — it does not parse as ${invalidLangs.join('/')} code.`;
184
+ }
185
+ const files = deps.index
186
+ .getAllFiles()
187
+ .filter((f) => targets.has(f.language))
188
+ .sort(compareShallowFirst);
189
+ const { matches, skipped, fileCapHit } = await scanPattern(ag, pattern, targets, files, limit, deps.config);
190
+ const blocks = renderPatternMatches(matches.slice(0, limit), deps.index);
191
+ if (blocks.length === 0) {
192
+ if (files.length > 0) {
193
+ blocks.push(`No structural matches for pattern '${pattern}'.`);
194
+ }
195
+ else if (languages !== undefined || invalidLangs.length > 0) {
196
+ // A blanket "nothing is indexed" claim would be false here — the
197
+ // emptiness came from the language filter / pattern validation.
198
+ blocks.push(`No structural matches for pattern '${pattern}' — no indexed files match the scanned language(s) (${[...targets.keys()].join('/')}).`);
199
+ }
200
+ else {
201
+ blocks.push(`No structural matches for pattern '${pattern}' — no TypeScript/TSX/JavaScript files are indexed.`);
202
+ }
203
+ }
204
+ const notes = [];
205
+ if (matches.length > limit)
206
+ notes.push('(more matches exist; raise `limit` to see all)');
207
+ if (fileCapHit) {
208
+ notes.push(`(stopped after scanning ${PATTERN_FILE_CAP} files; pass \`language\` to narrow the scan)`);
209
+ }
210
+ if (skipped > 0)
211
+ notes.push(`(${skipped} file${skipped === 1 ? '' : 's'} could not be read and were skipped)`);
212
+ if (invalidLangs.length > 0) {
213
+ notes.push(`(pattern does not parse as ${invalidLangs.join('/')}; those files were skipped)`);
214
+ }
215
+ return [...blocks, ...notes].join('\n\n');
216
+ }
217
+ // Validates the pattern by parsing it as code with OUR web-tree-sitter
218
+ // grammar for the index language id. Requires initParser() to have run
219
+ // (the caller awaits it). No catch: an unexpected parser throw must reach
220
+ // runSearchStructure's outer in-band handler as the REAL error, not be
221
+ // mislabeled as an invalid pattern.
222
+ function patternParses(languageId, pattern) {
223
+ const tree = parseFile(pattern, languageId);
224
+ if (!tree)
225
+ return false;
226
+ try {
227
+ return !tree.rootNode.hasError;
228
+ }
229
+ finally {
230
+ tree.delete();
231
+ }
232
+ }
233
+ // Reads and scans each candidate file, collecting up to limit+1 matches
234
+ // (the sentinel detects truncation). Per-file failures are skipped and
235
+ // counted, never thrown.
236
+ async function scanPattern(ag, pattern, targets, files, limit, config) {
237
+ const matches = [];
238
+ let scanned = 0;
239
+ let skipped = 0;
240
+ let fileCapHit = false;
241
+ for (const file of files) {
242
+ if (matches.length > limit)
243
+ break;
244
+ if (scanned >= PATTERN_FILE_CAP) {
245
+ fileCapHit = true;
246
+ break;
247
+ }
248
+ scanned++;
249
+ try {
250
+ const content = await safeReadIndexedFile(file.path, config);
251
+ // parseAsync offloads parsing to the threadpool, so long scans
252
+ // keep yielding to the event loop between files.
253
+ const root = (await ag.parseAsync(targets.get(file.language), content)).root();
254
+ for (const node of root.findAll(pattern)) {
255
+ matches.push({
256
+ file: file.path,
257
+ line: node.range().start.line + 1,
258
+ text: firstLine(node.text()),
259
+ });
260
+ if (matches.length > limit)
261
+ break;
262
+ }
263
+ }
264
+ catch (err) {
265
+ skipped++;
266
+ log.debug(`search_structure: skipped ${file.path}: ${errMsg(err)}`);
267
+ }
268
+ }
269
+ return { matches, skipped, fileCapHit };
270
+ }
271
+ function firstLine(text) {
272
+ // First visual line only, line-ending agnostic: cut at the first CR or LF.
273
+ // A CRLF- or lone-CR-authored source file (common on Windows) would
274
+ // otherwise leave a stray '\r' on the rendered snippet — slicing at '\n'
275
+ // alone keeps the '\r' of a '\r\n' pair, and a lone '\r' has no '\n' at all.
276
+ const end = text.search(/[\r\n]/);
277
+ const line = end === -1 ? text : text.slice(0, end);
278
+ return line.length > MATCH_TEXT_CAP ? `${line.slice(0, MATCH_TEXT_CAP)}…` : line;
279
+ }
280
+ // Groups matches by (file, enclosing symbol); module-level matches get a
281
+ // `file (module level)` header. Insertion order follows scan order, so
282
+ // output stays shallow-first and in-document-order.
283
+ function renderPatternMatches(matches, index) {
284
+ const groups = new Map();
285
+ for (const m of matches) {
286
+ const sym = innermostEnclosing(index.getSymbolsInFile(m.file), m.line);
287
+ const key = `${m.file}\0${sym?.id ?? ''}`;
288
+ let group = groups.get(key);
289
+ if (!group) {
290
+ group = sym
291
+ ? { header: symbolHeader(sym), signature: displaySignature(sym), rows: [] }
292
+ : { header: `${m.file} ${MODULE_LEVEL}`, signature: null, rows: [] };
293
+ groups.set(key, group);
294
+ }
295
+ group.rows.push(` match :${m.line} ${m.text}`);
296
+ }
297
+ const blocks = [];
298
+ for (const g of groups.values()) {
299
+ const lines = g.signature ? [g.header, g.signature, ...g.rows] : [g.header, ...g.rows];
300
+ blocks.push(lines.join('\n'));
301
+ }
302
+ return blocks;
303
+ }
package/dist/types.js ADDED
@@ -0,0 +1,61 @@
1
+ // Shared types for codedeep-mcp.
2
+ //
3
+ // Note: the `Symbol` interface name shadows the global `Symbol` constructor
4
+ // when imported, but only as a *type* — the runtime `Symbol(...)` value is
5
+ // untouched. Module-scoped interfaces do NOT merge with `lib.es5.d.ts`'s
6
+ // global `Symbol` (that would require `declare global`).
7
+ // SymbolKinds that bare-identifier calls (`foo()`) can never bind to:
8
+ // methods require member access (resolved separately via the receiver and
9
+ // `methodsByClass`); interfaces and types never appear at runtime. Used by
10
+ // the extractor's call resolver to exclude these from `nameToId`, and by
11
+ // `isCallerOf` to reject bare-name matches against these kinds (member
12
+ // refs — `receiver` present — may still match methods).
13
+ export const NON_CALLABLE_KINDS = new Set([
14
+ 'method',
15
+ 'interface',
16
+ 'type',
17
+ // Bare `Color()` is never a valid call on an enum, and enums have no
18
+ // invocable members, so member refs can't target them either.
19
+ 'enum',
20
+ // Same for namespaces: in declaration merging (`namespace fmt` +
21
+ // `function fmt`) the call belongs to the function — without this
22
+ // entry the namespace symbol would win the extractor's first-wins
23
+ // nameToId and steal the function's resolved refs.
24
+ 'module',
25
+ ]);
26
+ // Sentinel values stored in `ImportInfo.importedNames[].name` to encode
27
+ // non-named imports. Both `default` and `*` are reserved words in module
28
+ // syntax and cannot collide with a real exported identifier; consumers
29
+ // distinguish them by these constants instead of comparing to literals.
30
+ export const IMPORT_DEFAULT = 'default';
31
+ export const IMPORT_NAMESPACE = '*';
32
+ // Receiver sentinel for member calls whose receiver is NOT a single resolvable
33
+ // token — chained `a.b().c()`, indexed `arr[0].run()`, non-null `a!.b()`. A
34
+ // non-identifier marker, so it can never collide with a real receiver name or an
35
+ // import local: the consumers (memberRefMatchesTarget / rankRefs / edgeStrength)
36
+ // already treat an unknown receiver as a weak, tier-5, never-resolved member
37
+ // include. These refs are therefore FINDABLE by method name (recall) but can
38
+ // never form a resolved cross-file edge.
39
+ export const RECEIVER_OPAQUE = '()';
40
+ // Sentinel language tag for files whose extension we don't recognize.
41
+ // They're recorded as FileInfo (so overview can report them) but skipped
42
+ // at parse/extract time.
43
+ export const LANGUAGE_UNKNOWN = 'unknown';
44
+ // Extracts `Class` from a member FQN `<file>:<Class>.<member>`; null for
45
+ // top-level FQNs (`<file>:<name>`). File paths can contain dots, so the
46
+ // split is the first `.` AFTER the first `:` — this function is the one
47
+ // parser of that contract (extractor member resolution and code-index
48
+ // member gating both rely on it).
49
+ export function classNameFromFqn(fqn) {
50
+ const colon = fqn.indexOf(':');
51
+ if (colon === -1)
52
+ return null;
53
+ const dot = fqn.indexOf('.', colon + 1);
54
+ if (dot === -1)
55
+ return null;
56
+ // Empty class segment means the dot belongs to the SYMBOL name, not a member
57
+ // separator — e.g. a top-level Swift operator function `func .*` has FQN
58
+ // `file:.*`. Treat it as top-level (null), not a member of class "".
59
+ const cls = fqn.slice(colon + 1, dot);
60
+ return cls === '' ? null : cls;
61
+ }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "codedeep-mcp",
3
+ "version": "0.1.0",
4
+ "description": "MCP server that gives AI coding agents structural understanding of a codebase — symbols, references, blast radius, cyclomatic/cognitive complexity, and behavioral-git signals across 14 languages (tree-sitter).",
5
+ "keywords": [
6
+ "mcp",
7
+ "model-context-protocol",
8
+ "tree-sitter",
9
+ "code-intelligence",
10
+ "code-analysis",
11
+ "ast",
12
+ "cyclomatic-complexity",
13
+ "cognitive-complexity",
14
+ "ai-agents",
15
+ "developer-tools"
16
+ ],
17
+ "license": "MIT",
18
+ "type": "module",
19
+ "main": "dist/index.js",
20
+ "bin": {
21
+ "codedeep-mcp": "dist/index.js"
22
+ },
23
+ "files": [
24
+ "dist/",
25
+ "grammars/"
26
+ ],
27
+ "repository": {
28
+ "type": "git",
29
+ "url": "git+https://github.com/planexhq/codedeep-mcp.git"
30
+ },
31
+ "bugs": {
32
+ "url": "https://github.com/planexhq/codedeep-mcp/issues"
33
+ },
34
+ "homepage": "https://github.com/planexhq/codedeep-mcp#readme",
35
+ "engines": {
36
+ "node": ">=20.0.0"
37
+ },
38
+ "scripts": {
39
+ "copy:grammars": "node scripts/copy-grammars.mjs",
40
+ "prebuild": "npm run copy:grammars",
41
+ "build": "tsc -p tsconfig.json",
42
+ "dev": "tsc -p tsconfig.json --watch",
43
+ "start": "node dist/index.js",
44
+ "dogfood": "tsx scripts/dogfood/run.ts",
45
+ "test": "vitest run --passWithNoTests",
46
+ "inspect": "npm run build && npx @modelcontextprotocol/inspector node dist/index.js",
47
+ "prepare": "npm run build"
48
+ },
49
+ "dependencies": {
50
+ "@ast-grep/napi": "^0.43.0",
51
+ "@modelcontextprotocol/sdk": "^1.29.0",
52
+ "minisearch": "^7.1.2",
53
+ "picomatch": "^4.0.4",
54
+ "web-tree-sitter": "^0.26.8",
55
+ "zod": "^4.0.0"
56
+ },
57
+ "devDependencies": {
58
+ "@repomix/tree-sitter-wasms": "^0.1.17",
59
+ "@tree-sitter-grammars/tree-sitter-kotlin": "^1.1.0",
60
+ "@types/node": "^25.6.0",
61
+ "@types/picomatch": "^4.0.3",
62
+ "tree-sitter-objc": "^3.0.2",
63
+ "tsx": "^4.22.4",
64
+ "typescript": "^6.0.0",
65
+ "vitest": "^4.1.0"
66
+ }
67
+ }