codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,204 @@
1
+ // GitRunner — the project's only subprocess boundary. Wraps execFile('git')
2
+ // with a timeout, an output cap, an AbortController per call (so shutdown
3
+ // can kill every in-flight child), and an error taxonomy that callers
4
+ // branch on:
5
+ //
6
+ // 'git-missing' spawn ENOENT — git isn't installed. Disables the runner
7
+ // for the whole session (one warn, zero further spawns).
8
+ // 'disabled' a call made after disableForSession; never spawned.
9
+ // 'aborted' killed via abortAll() (shutdown) — log at debug only.
10
+ // 'timeout' exceeded timeoutMs and was SIGTERM'd by execFile.
11
+ // 'maxbuffer' output exceeded maxBuffer — skip this result, do NOT
12
+ // disable the session.
13
+ // 'exit' git RAN and exited non-zero (numeric exit code). exit
14
+ // 128 is *normal* for "not a git repository" / "no
15
+ // commits yet" — callers decide log level, the runner
16
+ // never warns on 'exit'. This is the ONLY kind callers
17
+ // may treat as an authoritative negative answer.
18
+ // 'spawn-failed' the child never ran or died abnormally: spawn errors
19
+ // other than ENOENT (EACCES/EMFILE/EAGAIN), external
20
+ // signal kills, anything unexplained. Transient — must
21
+ // never be read as "not a repository".
22
+ //
23
+ // Git failures must never surface to a tool response: callers degrade by
24
+ // omitting sections. The runner is also the warn-dedup point (warnOnce) so
25
+ // a failing command logs once per session, not once per tool call.
26
+ import { execFile } from 'node:child_process';
27
+ import { errMsg, log } from '../logger.js';
28
+ export class GitError extends Error {
29
+ kind;
30
+ exitCode;
31
+ stderr;
32
+ constructor(kind, message, opts = {}) {
33
+ super(message, opts.cause !== undefined ? { cause: opts.cause } : undefined);
34
+ this.name = 'GitError';
35
+ this.kind = kind;
36
+ if (opts.exitCode !== undefined)
37
+ this.exitCode = opts.exitCode;
38
+ if (opts.stderr !== undefined)
39
+ this.stderr = opts.stderr;
40
+ }
41
+ }
42
+ const DEFAULT_TIMEOUT_MS = 5_000;
43
+ const DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
44
+ // Keep GitError.stderr bounded — git can echo whole pathspecs into stderr.
45
+ const STDERR_CAP = 500;
46
+ // Prepended to every invocation — pins of user-config settings that
47
+ // would otherwise corrupt parsing or behavior:
48
+ // - core.quotepath=false: without it git %-escapes non-ASCII bytes in
49
+ // --name-only output ("\303\251"-style), which would never match the
50
+ // raw POSIX paths used as index keys.
51
+ // - log.showSignature=false: a user-level log.showSignature=true would
52
+ // inject GPG status lines into every log output, which the analyzer
53
+ // would otherwise parse as file paths.
54
+ // - log.follow=false: a user-level log.follow=true silently turns the
55
+ // single-pathspec recentCommits query into a full-history rename walk
56
+ // (the documented no---follow decision).
57
+ // - diff.relative=false: a user-level diff.relative=true makes the bulk
58
+ // `log --name-only` emit cwd-relative paths and DROP paths outside the
59
+ // cwd subtree — in monorepo-subdirectory mode that zeroes the whole
60
+ // analysis. The branch diff's explicit --relative flag still wins.
61
+ const GIT_ARGS_PREFIX = [
62
+ '-c', 'core.quotepath=false',
63
+ '-c', 'log.showSignature=false',
64
+ '-c', 'log.follow=false',
65
+ '-c', 'diff.relative=false',
66
+ ];
67
+ // Inherited git environment would silently override cwd-based repo
68
+ // discovery (GIT_DIR/GIT_WORK_TREE/GIT_INDEX_FILE/GIT_COMMON_DIR/
69
+ // GIT_OBJECT_DIRECTORY), block upward discovery entirely
70
+ // (GIT_CEILING_DIRECTORIES — fatal for monorepo-subdirectory roots,
71
+ // where the clean exit-128 would read as "no repo" and wipe the cache),
72
+ // or inject parent-process config (GIT_CONFIG_PARAMETERS /
73
+ // GIT_CONFIG_COUNT, set by hooks). Strip them all so discovery is
74
+ // always anchored to the project root passed as cwd.
75
+ function sanitizedEnv() {
76
+ const env = { ...process.env };
77
+ delete env.GIT_DIR;
78
+ delete env.GIT_WORK_TREE;
79
+ delete env.GIT_INDEX_FILE;
80
+ delete env.GIT_COMMON_DIR;
81
+ delete env.GIT_OBJECT_DIRECTORY;
82
+ delete env.GIT_CEILING_DIRECTORIES;
83
+ delete env.GIT_CONFIG_PARAMETERS;
84
+ delete env.GIT_CONFIG_COUNT;
85
+ return env;
86
+ }
87
+ export class GitRunner {
88
+ cwd;
89
+ gitBin;
90
+ execFileImpl;
91
+ controllers = new Set();
92
+ warned = new Set();
93
+ disabledReason = null;
94
+ constructor(cwd, opts = {}) {
95
+ this.cwd = cwd;
96
+ this.gitBin = opts.gitBin ?? 'git';
97
+ this.execFileImpl = opts.execFileImpl ?? execFile;
98
+ }
99
+ get disabled() {
100
+ return this.disabledReason !== null;
101
+ }
102
+ // Permanent for the session (git missing or gitEnabled=false). Later
103
+ // run()/tryRun() calls fail fast without spawning.
104
+ disableForSession(reason) {
105
+ if (this.disabledReason !== null)
106
+ return;
107
+ this.disabledReason = reason;
108
+ this.warnOnce('disabled', `git: disabled for this session: ${reason}`);
109
+ }
110
+ warnOnce(key, msg) {
111
+ if (this.warned.has(key))
112
+ return;
113
+ this.warned.add(key);
114
+ log.warn(msg);
115
+ }
116
+ // Shutdown path: abort every in-flight child. Their promises reject with
117
+ // kind 'aborted'; callers log those at debug.
118
+ abortAll() {
119
+ for (const controller of this.controllers)
120
+ controller.abort();
121
+ }
122
+ async run(args, opts = {}) {
123
+ if (this.disabledReason !== null) {
124
+ throw new GitError('disabled', `git disabled: ${this.disabledReason}`);
125
+ }
126
+ const controller = new AbortController();
127
+ this.controllers.add(controller);
128
+ try {
129
+ return await new Promise((resolvePromise, rejectPromise) => {
130
+ this.execFileImpl(this.gitBin, [...GIT_ARGS_PREFIX, ...args], {
131
+ cwd: this.cwd,
132
+ env: sanitizedEnv(),
133
+ encoding: 'utf8',
134
+ timeout: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
135
+ maxBuffer: opts.maxBuffer ?? DEFAULT_MAX_BUFFER,
136
+ signal: controller.signal,
137
+ windowsHide: true,
138
+ }, (err, stdout, stderr) => {
139
+ if (err === null) {
140
+ resolvePromise(stdout);
141
+ return;
142
+ }
143
+ rejectPromise(this.classify(err, stderr ?? '', controller, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS));
144
+ });
145
+ });
146
+ }
147
+ finally {
148
+ this.controllers.delete(controller);
149
+ }
150
+ }
151
+ // null on any failure; logs at debug. For call sites where the caller
152
+ // doesn't branch on the failure kind (cheap per-call queries).
153
+ async tryRun(args, opts = {}) {
154
+ try {
155
+ return await this.run(args, opts);
156
+ }
157
+ catch (err) {
158
+ log.debug(`git: ${args[0] ?? '?'} failed: ${errMsg(err)}`);
159
+ return null;
160
+ }
161
+ }
162
+ classify(err, stderr, controller, timeoutMs) {
163
+ const stderrCapped = stderr.trim().slice(0, STDERR_CAP);
164
+ // Our own abort also reports killed/ABORT_ERR — check the signal first
165
+ // so shutdown never masquerades as a timeout.
166
+ if (controller.signal.aborted) {
167
+ return new GitError('aborted', 'git call aborted', { cause: err });
168
+ }
169
+ if (err.code === 'ENOENT') {
170
+ this.disableForSession(`'${this.gitBin}' not found on PATH`);
171
+ return new GitError('git-missing', `git executable not found: ${this.gitBin}`, {
172
+ cause: err,
173
+ });
174
+ }
175
+ if (err.code === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER') {
176
+ return new GitError('maxbuffer', 'git output exceeded maxBuffer', {
177
+ stderr: stderrCapped,
178
+ cause: err,
179
+ });
180
+ }
181
+ if (err.killed === true) {
182
+ return new GitError('timeout', `git timed out after ${timeoutMs}ms`, {
183
+ stderr: stderrCapped,
184
+ cause: err,
185
+ });
186
+ }
187
+ if (typeof err.code === 'number') {
188
+ return new GitError('exit', `git exited with code ${err.code}`, {
189
+ exitCode: err.code,
190
+ stderr: stderrCapped,
191
+ cause: err,
192
+ });
193
+ }
194
+ // Anything left never produced a numeric exit: spawn errors with
195
+ // string errno codes (EACCES/EMFILE/EAGAIN), external signal kills
196
+ // (code null, signal set), or unknown shapes. These are transient —
197
+ // classifying them as 'exit' would let the detection probe read an
198
+ // fd-pressure blip as "not a repository" and wipe the cache.
199
+ return new GitError('spawn-failed', errMsg(err), {
200
+ stderr: stderrCapped,
201
+ cause: err,
202
+ });
203
+ }
204
+ }
package/dist/index.js ADDED
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env node
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { loadConfig, resolveCacheDir } from "./config.js";
4
+ import { GitService } from "./git/git-service.js";
5
+ import { CodeIndex } from "./indexer/code-index.js";
6
+ import { Indexer } from "./indexer/pipeline.js";
7
+ import { Watcher } from "./indexer/watcher.js";
8
+ import { errMsg, log } from "./logger.js";
9
+ import { createServer } from "./server.js";
10
+ let initial;
11
+ try {
12
+ initial = loadConfig();
13
+ }
14
+ catch (err) {
15
+ log.error(`codedeep-mcp: invalid config: ${errMsg(err)}`);
16
+ process.exit(1);
17
+ }
18
+ let cacheDir;
19
+ try {
20
+ cacheDir = await resolveCacheDir(initial);
21
+ }
22
+ catch (err) {
23
+ log.error(`codedeep-mcp: failed to resolve cache directory: ${errMsg(err)}`);
24
+ process.exit(1);
25
+ }
26
+ const config = Object.freeze({ ...initial, cacheDir });
27
+ const index = new CodeIndex(config.projectRoot);
28
+ const indexer = new Indexer(config, index);
29
+ const indexingPromise = (async () => {
30
+ const loaded = await index.load(indexer.cachePath);
31
+ log.debug(loaded
32
+ ? "Indexer: cache loaded; running indexChanged"
33
+ : "Indexer: no cache; running indexAll");
34
+ // The run guard drops (not queues) concurrent requests. Nothing else can
35
+ // hold it this early — the watcher defers until `ready`, which only the
36
+ // startup run sets — so a refusal here means a code change broke that
37
+ // invariant; surface it loudly rather than papering over it with retries.
38
+ const ran = loaded ? await indexer.indexChanged() : await indexer.indexAll();
39
+ if (!ran) {
40
+ log.error("Indexer: startup indexing refused by run guard; index is stale");
41
+ }
42
+ })();
43
+ // Attach .catch synchronously so a rejection during indexing can't crash the
44
+ // process under --unhandled-rejections=throw. The server stays up either way.
45
+ indexingPromise.catch((err) => {
46
+ log.error(`Indexer top-level failure: ${errMsg(err)}`);
47
+ });
48
+ // Safe to start while the cache load / background index runs: the watcher
49
+ // defers its flushes until `indexer.ready`, so it can neither race the
50
+ // load's Map swap nor steal the startup run's concurrency guard.
51
+ let watcher = null;
52
+ if (config.watch) {
53
+ watcher = new Watcher(indexer, index, config);
54
+ watcher.start();
55
+ }
56
+ // Git enrichment starts strictly AFTER the startup index resolves: the
57
+ // analyzer filters to fileByPath membership, so running against a
58
+ // cold/empty index would persist an EMPTY analysis whose gitMeta marks
59
+ // it fresh for a day. The .catch(() => {}) keeps git alive when
60
+ // indexChanged fails over a successfully loaded cache. Tool calls served
61
+ // before the first analysis simply omit git sections (warm starts get
62
+ // them instantly from the v5 cache).
63
+ // git.start() never throws (it catches internally), and the leading
64
+ // .catch makes the chain unrejectable — one line, one error path.
65
+ const git = new GitService(config, index, indexer.cachePath);
66
+ void indexingPromise.catch(() => { }).then(() => git.start());
67
+ // Flush-on-shutdown — the watcher's per-flush saves bound the loss window,
68
+ // but exiting between flushes shouldn't discard the last debounce batch.
69
+ // watcher.close() drains that batch and persists through its normal save
70
+ // path (and refuses to touch the index before the startup run completes),
71
+ // so no separate save is needed here.
72
+ //
73
+ // The graceful work is raced against a watchdog — a save wedged on a
74
+ // dead network mount must not orphan the process after the client is
75
+ // gone. Exit codes are honest: 0 only when the flush completed; 1 when
76
+ // the watchdog cut it, it failed, or a signal escalated past it.
77
+ const SHUTDOWN_WATCHDOG_MS = 10_000;
78
+ let shuttingDown = false;
79
+ function shutdown(reason, waitForStartup,
80
+ // Signals escalate an in-flight graceful shutdown to an immediate
81
+ // exit. Duplicate stdin events ('end' then 'close' fire back-to-back
82
+ // on EOF) must NOT — they'd cut the flush they themselves started.
83
+ escalateIfShuttingDown = false) {
84
+ if (shuttingDown) {
85
+ if (escalateIfShuttingDown) {
86
+ log.debug("codedeep-mcp: signal during shutdown; exiting immediately");
87
+ process.exit(1);
88
+ }
89
+ return;
90
+ }
91
+ shuttingDown = true;
92
+ log.debug(`codedeep-mcp: ${reason}; flushing watcher before exit`);
93
+ void (async () => {
94
+ let code = 1;
95
+ try {
96
+ const work = (async () => {
97
+ // Client disconnect is not urgent — let an in-flight startup
98
+ // index finish (it persists internally) as the pre-watcher
99
+ // server did. Signals skip the wait: the user wants out now.
100
+ if (waitForStartup)
101
+ await indexingPromise.catch(() => { });
102
+ // Sync-fast: aborts in-flight git children. Never awaits the
103
+ // analysis — the watchdog must not ride on a git subprocess.
104
+ git.close();
105
+ await watcher?.close();
106
+ })();
107
+ const watchdog = new Promise((resolve) => {
108
+ const t = setTimeout(() => resolve("timeout"), SHUTDOWN_WATCHDOG_MS);
109
+ t.unref();
110
+ });
111
+ const outcome = await Promise.race([
112
+ work.then(() => "done"),
113
+ watchdog,
114
+ ]);
115
+ if (outcome === "done")
116
+ code = 0;
117
+ else
118
+ log.warn("codedeep-mcp: shutdown watchdog fired; exiting with flush incomplete");
119
+ }
120
+ catch (err) {
121
+ log.warn(`codedeep-mcp: shutdown flush failed: ${errMsg(err)}`);
122
+ }
123
+ finally {
124
+ process.exit(code);
125
+ }
126
+ })();
127
+ }
128
+ // `on`, not `once`: a REPEATED signal must reach the escalate path above
129
+ // instead of falling back to default disposition (uncontrolled kill).
130
+ process.on("SIGINT", () => shutdown("SIGINT received", false, true));
131
+ process.on("SIGTERM", () => shutdown("SIGTERM received", false, true));
132
+ // The PRIMARY MCP shutdown path is the client closing stdin — without
133
+ // this hook the unref'd watcher timer never fires again and the process
134
+ // exits with the last debounce batch unflushed and unsaved.
135
+ process.stdin.once("end", () => shutdown("stdin closed", true));
136
+ process.stdin.once("close", () => shutdown("stdin closed", true));
137
+ const server = createServer({ index, indexer, config, git });
138
+ await server.connect(new StdioServerTransport());