codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
// GitService — the facade the server hands to tools. Owns the runner and
|
|
2
|
+
// the analysis lifecycle:
|
|
3
|
+
//
|
|
4
|
+
// - detection at start(): one `rev-parse` probe decides 'ready' /
|
|
5
|
+
// 'no-repo' / 'disabled' for the whole session;
|
|
6
|
+
// - the bulk analysis pass (hotspots + co-change), persisted into the
|
|
7
|
+
// CodeIndex cache, refreshed when stale (HEAD moved, window changed,
|
|
8
|
+
// or older than a day);
|
|
9
|
+
// - cheap per-call queries (branch summary, recent commits per file),
|
|
10
|
+
// never persisted, memoized per generation.
|
|
11
|
+
//
|
|
12
|
+
// Degradation contract: NO method ever throws and none surface errors to
|
|
13
|
+
// tool output — every failure path returns null/empty so tools simply
|
|
14
|
+
// omit git sections. The service is constructed even when git is off so
|
|
15
|
+
// the ServerDeps shape stays uniform.
|
|
16
|
+
import { existsSync } from 'node:fs';
|
|
17
|
+
import { join, resolve } from 'node:path';
|
|
18
|
+
import { errMsg, log } from '../logger.js';
|
|
19
|
+
import { GIT_COMMIT_CAP, analyzeLog, buildLogArgs } from './analyzer.js';
|
|
20
|
+
import { HeadWatcher } from './head-watcher.js';
|
|
21
|
+
import { GitError, GitRunner } from './runner.js';
|
|
22
|
+
// The bulk pass gets generous limits (whole-window history of a large
|
|
23
|
+
// repo); per-call queries stay snappy and just degrade on timeout.
|
|
24
|
+
const ANALYSIS_TIMEOUT_MS = 30_000;
|
|
25
|
+
const ANALYSIS_MAX_BUFFER = 64 * 1024 * 1024;
|
|
26
|
+
const QUICK_TIMEOUT_MS = 3_000;
|
|
27
|
+
// The window is relative to "now", so counts drift as commits age out —
|
|
28
|
+
// refresh a fresh-HEAD analysis once a day anyway.
|
|
29
|
+
const ANALYSIS_MAX_AGE_MS = 24 * 3_600_000;
|
|
30
|
+
const RECENT_MEMO_CAP = 256;
|
|
31
|
+
const RECENT_COMMITS_DEFAULT = 5;
|
|
32
|
+
// Per-call startup retries (maybeRetryStartup) are bounded: at most one
|
|
33
|
+
// attempt per interval, so a permanently failing bulk pass cannot turn
|
|
34
|
+
// every tool call into a 30s/64MB git child.
|
|
35
|
+
const STARTUP_RETRY_BACKOFF_MS = 60_000;
|
|
36
|
+
export class GitService {
|
|
37
|
+
config;
|
|
38
|
+
index;
|
|
39
|
+
cachePath;
|
|
40
|
+
runner;
|
|
41
|
+
stateValue = 'unknown';
|
|
42
|
+
// Resolved actual git dir (`.git` may be a FILE in worktrees); the
|
|
43
|
+
// head-watcher (live refresh) attaches here.
|
|
44
|
+
gitDir = null;
|
|
45
|
+
// Non-empty when the project root is a SUBDIRECTORY of the git
|
|
46
|
+
// toplevel (monorepo package): repo-relative log paths must be
|
|
47
|
+
// stripped by this prefix to match project-relative index keys.
|
|
48
|
+
pathPrefix = '';
|
|
49
|
+
headWatcher = null;
|
|
50
|
+
headDebounceMs;
|
|
51
|
+
// Bumped when a completed analysis lands (and, later, on HEAD-watch
|
|
52
|
+
// events). Memos and the search boost map key off it.
|
|
53
|
+
generationValue = 0;
|
|
54
|
+
branchMemo = null;
|
|
55
|
+
recentMemo = new Map();
|
|
56
|
+
// Single-flight: a refresh requested while one is running coalesces
|
|
57
|
+
// into exactly one trailing rerun.
|
|
58
|
+
inFlight = null;
|
|
59
|
+
rerunRequested = false;
|
|
60
|
+
// True while a startup retry (maybeRetryStartup) is in flight — at
|
|
61
|
+
// most one at a time, rate-limited by nextStartupRetryAt.
|
|
62
|
+
retryingStartup = false;
|
|
63
|
+
nextStartupRetryAt = 0;
|
|
64
|
+
// Retries must NOT run before the official start() settles: tools are
|
|
65
|
+
// served while startup indexing is still populating the index, and an
|
|
66
|
+
// early retry would run startInner (duplicate watcher) and persist an
|
|
67
|
+
// analysis built against a PARTIAL index as fresh — exactly what the
|
|
68
|
+
// index.ts start-after-indexing chaining exists to prevent.
|
|
69
|
+
startSettled = false;
|
|
70
|
+
// Set when a bulk pass failed transiently or was skipped over an empty
|
|
71
|
+
// index — lets the per-call retry re-attempt the analysis even though
|
|
72
|
+
// a (stale) gitMeta exists. Cleared on successful apply.
|
|
73
|
+
analysisRetryNeeded = false;
|
|
74
|
+
closed = false;
|
|
75
|
+
constructor(config, index, cachePath, runner, options = {}) {
|
|
76
|
+
this.config = config;
|
|
77
|
+
this.index = index;
|
|
78
|
+
this.cachePath = cachePath;
|
|
79
|
+
this.runner = runner ?? new GitRunner(config.projectRoot);
|
|
80
|
+
this.headDebounceMs = options.headDebounceMs;
|
|
81
|
+
}
|
|
82
|
+
get state() {
|
|
83
|
+
return this.stateValue;
|
|
84
|
+
}
|
|
85
|
+
get generation() {
|
|
86
|
+
return this.generationValue;
|
|
87
|
+
}
|
|
88
|
+
// Detection + initial analysis. Never throws; never blocks server
|
|
89
|
+
// startup (the caller backgrounds it AFTER the index is populated —
|
|
90
|
+
// analyzing against an empty index would persist an empty result
|
|
91
|
+
// marked fresh).
|
|
92
|
+
async start() {
|
|
93
|
+
try {
|
|
94
|
+
await this.startInner();
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
log.warn(`git: startup failed: ${errMsg(err)}`);
|
|
98
|
+
}
|
|
99
|
+
finally {
|
|
100
|
+
this.startSettled = true;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
async startInner() {
|
|
104
|
+
if (this.closed)
|
|
105
|
+
return;
|
|
106
|
+
if (!this.config.gitEnabled) {
|
|
107
|
+
this.stateValue = 'disabled';
|
|
108
|
+
log.debug('git: disabled by config (gitEnabled=false)');
|
|
109
|
+
await this.clearPersistedGitData('gitEnabled=false');
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
// --show-prefix is the subdirectory-of-toplevel detector: empty at
|
|
113
|
+
// the repo root, 'packages/app/' when the project root is a
|
|
114
|
+
// monorepo package. Without it, every log path would miss the index.
|
|
115
|
+
// run() (not tryRun) so the failure KIND survives: only a clean
|
|
116
|
+
// non-zero exit means "not a repository". A transient timeout /
|
|
117
|
+
// maxBuffer / shutdown abort must NOT classify as no-repo — that
|
|
118
|
+
// branch wipes the persisted enrichment, and the repo may be fine.
|
|
119
|
+
let probe;
|
|
120
|
+
try {
|
|
121
|
+
probe = await this.runner.run(['rev-parse', '--is-inside-work-tree', '--git-dir', '--show-prefix'], { timeoutMs: QUICK_TIMEOUT_MS });
|
|
122
|
+
}
|
|
123
|
+
catch (err) {
|
|
124
|
+
if (this.closed)
|
|
125
|
+
return;
|
|
126
|
+
const kind = err instanceof GitError ? err.kind : null;
|
|
127
|
+
if (kind === 'exit') {
|
|
128
|
+
// Plain exit-128 non-repo is a normal deployment, debug only.
|
|
129
|
+
this.stateValue = 'no-repo';
|
|
130
|
+
log.debug('git: no repository detected; enrichment off');
|
|
131
|
+
await this.clearPersistedGitData('no repository');
|
|
132
|
+
}
|
|
133
|
+
else if (kind === 'git-missing' || kind === 'disabled') {
|
|
134
|
+
// ENOENT already session-disabled the runner (with one warn).
|
|
135
|
+
this.stateValue = 'disabled';
|
|
136
|
+
await this.clearPersistedGitData('git unavailable');
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// timeout / maxbuffer / aborted / unknown: transient. Keep the
|
|
140
|
+
// persisted data (stale beats none) and stay in 'unknown' —
|
|
141
|
+
// sections that need 'ready' degrade, analysis-derived ones
|
|
142
|
+
// keep serving the cache.
|
|
143
|
+
log.debug(`git: detection failed transiently (${errMsg(err)}); enrichment off this session`);
|
|
144
|
+
}
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
if (this.closed)
|
|
148
|
+
return;
|
|
149
|
+
// Split WITHOUT trimming the whole probe first: the --show-prefix
|
|
150
|
+
// line is legitimately empty at the toplevel.
|
|
151
|
+
const lines = probe.split('\n');
|
|
152
|
+
if (lines[0]?.trim() !== 'true') {
|
|
153
|
+
// Bare repo or cwd inside .git — no work tree to enrich.
|
|
154
|
+
this.stateValue = 'no-repo';
|
|
155
|
+
log.debug('git: not inside a work tree; enrichment off');
|
|
156
|
+
await this.clearPersistedGitData('no work tree');
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
this.gitDir = resolve(this.config.projectRoot, lines[1]?.trim() ?? '.git');
|
|
160
|
+
this.pathPrefix = lines[2]?.trim() ?? '';
|
|
161
|
+
this.stateValue = 'ready';
|
|
162
|
+
this.startHeadWatcher();
|
|
163
|
+
await this.ensureFreshAnalysis();
|
|
164
|
+
}
|
|
165
|
+
// Persisted enrichment from an earlier enabled session must not keep
|
|
166
|
+
// rendering when it can never refresh again (kill switch flipped, repo
|
|
167
|
+
// deleted, git uninstalled). Clearing also nulls gitMeta, so
|
|
168
|
+
// re-enabling later triggers a clean re-analysis. Never during
|
|
169
|
+
// shutdown: an aborted probe must not gut a healthy warm cache.
|
|
170
|
+
async clearPersistedGitData(reason) {
|
|
171
|
+
if (this.closed)
|
|
172
|
+
return;
|
|
173
|
+
try {
|
|
174
|
+
if (!(await this.index.clearGitData()))
|
|
175
|
+
return;
|
|
176
|
+
log.debug(`git: cleared persisted git data (${reason})`);
|
|
177
|
+
await this.index.save(this.cachePath);
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
log.debug(`git: failed to clear persisted data: ${errMsg(err)}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Live refresh: <gitdir>/logs/HEAD changes on every commit, checkout,
|
|
184
|
+
// merge, and rebase. When it can't be watched (no reflog, fs.watch
|
|
185
|
+
// failure) we degrade to startup-only freshness — staleness heals on
|
|
186
|
+
// the next server start.
|
|
187
|
+
startHeadWatcher() {
|
|
188
|
+
// Idempotent: a retry re-running startInner must not overwrite (and
|
|
189
|
+
// leak) an already-attached watcher — the orphan would double every
|
|
190
|
+
// onHeadChanged and survive close().
|
|
191
|
+
if (this.headWatcher !== null)
|
|
192
|
+
return;
|
|
193
|
+
if (this.gitDir === null || this.closed)
|
|
194
|
+
return;
|
|
195
|
+
const headLogPath = join(this.gitDir, 'logs', 'HEAD');
|
|
196
|
+
if (!existsSync(headLogPath)) {
|
|
197
|
+
log.debug(`git: ${headLogPath} missing; live refresh unavailable`);
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
this.headWatcher = new HeadWatcher(headLogPath, () => this.onHeadChanged(), {
|
|
201
|
+
debounceMs: this.headDebounceMs,
|
|
202
|
+
// Leading edge: the moment the reflog moves, the branch/recent
|
|
203
|
+
// memos describe a world that may have changed — invalidate NOW
|
|
204
|
+
// rather than serving a mid-rebase snapshot until the trailing
|
|
205
|
+
// debounce (up to maxDelayMs) lands the analysis refresh.
|
|
206
|
+
onWindowStart: () => this.bumpGeneration(),
|
|
207
|
+
});
|
|
208
|
+
if (!this.headWatcher.start())
|
|
209
|
+
this.headWatcher = null;
|
|
210
|
+
}
|
|
211
|
+
// The generation bumps IMMEDIATELY (not just after the analysis lands):
|
|
212
|
+
// branch summary and recent-commits memos answer from HEAD's committed
|
|
213
|
+
// state, which has definitely changed; the analysis refresh follows in
|
|
214
|
+
// the background, bumping again when it applies.
|
|
215
|
+
onHeadChanged() {
|
|
216
|
+
this.bumpGeneration();
|
|
217
|
+
void this.ensureFreshAnalysis().catch((err) => log.debug(`git: head-change refresh failed: ${errMsg(err)}`));
|
|
218
|
+
}
|
|
219
|
+
// Re-analyze when the persisted gitMeta no longer matches reality.
|
|
220
|
+
// The warm path — cache already loaded with matching HEAD/window —
|
|
221
|
+
// returns without spawning anything beyond one rev-parse.
|
|
222
|
+
async ensureFreshAnalysis() {
|
|
223
|
+
if (this.stateValue !== 'ready' || this.closed)
|
|
224
|
+
return;
|
|
225
|
+
const headRaw = await this.runner.tryRun(['rev-parse', 'HEAD'], {
|
|
226
|
+
timeoutMs: QUICK_TIMEOUT_MS,
|
|
227
|
+
});
|
|
228
|
+
if (headRaw === null)
|
|
229
|
+
return; // no usable answer (unborn HEAD or transient failure)
|
|
230
|
+
const head = headRaw.trim();
|
|
231
|
+
const meta = this.index.getGitMeta();
|
|
232
|
+
const fresh = meta !== null &&
|
|
233
|
+
meta.head === head &&
|
|
234
|
+
meta.windowDays === this.config.gitWindow &&
|
|
235
|
+
Date.now() - meta.analyzedAt <= ANALYSIS_MAX_AGE_MS;
|
|
236
|
+
if (fresh && !this.analysisRetryNeeded)
|
|
237
|
+
return;
|
|
238
|
+
await this.runAnalysis(head);
|
|
239
|
+
}
|
|
240
|
+
runAnalysis(head) {
|
|
241
|
+
if (this.inFlight) {
|
|
242
|
+
this.rerunRequested = true;
|
|
243
|
+
return this.inFlight;
|
|
244
|
+
}
|
|
245
|
+
this.inFlight = this.doAnalysis(head).finally(() => {
|
|
246
|
+
this.inFlight = null;
|
|
247
|
+
if (this.rerunRequested && !this.closed) {
|
|
248
|
+
this.rerunRequested = false;
|
|
249
|
+
// Re-resolve HEAD — the rerun exists because it moved mid-run.
|
|
250
|
+
void this.ensureFreshAnalysis().catch((err) => log.debug(`git: rerun failed: ${errMsg(err)}`));
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
return this.inFlight;
|
|
254
|
+
}
|
|
255
|
+
async doAnalysis(head) {
|
|
256
|
+
// Cheap pre-spawn guard: an empty index cannot accept an analysis —
|
|
257
|
+
// don't pay a (up to 30s / 64MB) bulk pass to discover that. The
|
|
258
|
+
// post-await twin below still covers an index emptied mid-pass.
|
|
259
|
+
if (this.index.fileCount === 0) {
|
|
260
|
+
log.debug('git: index empty; skipping analysis');
|
|
261
|
+
this.analysisRetryNeeded = true;
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
let stdout;
|
|
265
|
+
try {
|
|
266
|
+
stdout = await this.runner.run(buildLogArgs(this.config.gitWindow), {
|
|
267
|
+
timeoutMs: ANALYSIS_TIMEOUT_MS,
|
|
268
|
+
maxBuffer: ANALYSIS_MAX_BUFFER,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
catch (err) {
|
|
272
|
+
const kind = err instanceof GitError ? err.kind : null;
|
|
273
|
+
if (kind === 'aborted') {
|
|
274
|
+
log.debug('git: analysis aborted (shutdown)');
|
|
275
|
+
}
|
|
276
|
+
else if (kind === 'git-missing' || kind === 'disabled') {
|
|
277
|
+
// git vanished MID-SESSION (toolchain/PATH swap): the persisted
|
|
278
|
+
// enrichment can never refresh again — same contract as the
|
|
279
|
+
// detection-time disabled path.
|
|
280
|
+
this.stateValue = 'disabled';
|
|
281
|
+
await this.clearPersistedGitData('git unavailable');
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
// Keep whatever data the cache already holds — stale beats none;
|
|
285
|
+
// the per-call retry hook may re-attempt later (backoff-bounded).
|
|
286
|
+
this.analysisRetryNeeded = true;
|
|
287
|
+
this.runner.warnOnce('bulk-log', `git: history analysis failed (${errMsg(err)}); keeping previous git data`);
|
|
288
|
+
}
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
// Re-check after the await: a shutdown that raced the bulk pass must
|
|
292
|
+
// not apply + save behind the watchdog's back.
|
|
293
|
+
if (this.closed) {
|
|
294
|
+
log.debug('git: analysis discarded (shutting down)');
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
// An empty index means startup indexing has not populated (or failed)
|
|
298
|
+
// — applying now would persist an empty analysis whose fresh gitMeta
|
|
299
|
+
// suppresses the real one for up to 24h. Skip; staleness re-triggers.
|
|
300
|
+
if (this.index.fileCount === 0) {
|
|
301
|
+
log.debug('git: index empty; skipping analysis apply');
|
|
302
|
+
this.analysisRetryNeeded = true;
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
const analysis = analyzeLog(stdout, (p) => this.index.hasFile(p), this.pathPrefix);
|
|
306
|
+
if (analysis.commitCount >= GIT_COMMIT_CAP) {
|
|
307
|
+
log.debug(`git: analysis hit the ${GIT_COMMIT_CAP}-commit cap; older activity is not counted`);
|
|
308
|
+
}
|
|
309
|
+
await this.index.applyGitAnalysis({
|
|
310
|
+
counts: analysis.counts,
|
|
311
|
+
cochanges: analysis.cochanges,
|
|
312
|
+
hotspots: analysis.hotspots,
|
|
313
|
+
meta: {
|
|
314
|
+
head,
|
|
315
|
+
windowDays: this.config.gitWindow,
|
|
316
|
+
analyzedAt: Date.now(),
|
|
317
|
+
},
|
|
318
|
+
});
|
|
319
|
+
this.analysisRetryNeeded = false;
|
|
320
|
+
// Bump BEFORE the save: memo/boost invalidation must be atomic with
|
|
321
|
+
// the data swap, not deferred behind a potentially slow cache write.
|
|
322
|
+
this.bumpGeneration();
|
|
323
|
+
try {
|
|
324
|
+
await this.index.save(this.cachePath);
|
|
325
|
+
}
|
|
326
|
+
catch (err) {
|
|
327
|
+
this.runner.warnOnce('analysis-save', `git: failed to persist analysis: ${errMsg(err)}`);
|
|
328
|
+
}
|
|
329
|
+
log.debug(`git: analysis complete (${analysis.commitCount} commits, ` +
|
|
330
|
+
`${analysis.hotspots.length} hotspots, head ${head.slice(0, 7)})`);
|
|
331
|
+
}
|
|
332
|
+
bumpGeneration() {
|
|
333
|
+
this.generationValue++;
|
|
334
|
+
this.branchMemo = null;
|
|
335
|
+
this.recentMemo.clear();
|
|
336
|
+
}
|
|
337
|
+
async branchSummary() {
|
|
338
|
+
if (this.closed)
|
|
339
|
+
return null;
|
|
340
|
+
// BEFORE the ready gate: the retry hook is what heals 'unknown'
|
|
341
|
+
// (transient detection failure) — gating it on 'ready' would make
|
|
342
|
+
// that state permanent.
|
|
343
|
+
this.maybeRetryStartup();
|
|
344
|
+
if (this.stateValue !== 'ready')
|
|
345
|
+
return null;
|
|
346
|
+
if (this.branchMemo?.gen === this.generationValue)
|
|
347
|
+
return this.branchMemo.value;
|
|
348
|
+
// Capture the generation BEFORE computing: if a HEAD change lands
|
|
349
|
+
// mid-computation (bumpGeneration clears the memo), the straggler
|
|
350
|
+
// must not restamp its possibly-torn result as current — stamped
|
|
351
|
+
// with the captured gen, it self-invalidates on the next lookup.
|
|
352
|
+
const gen = this.generationValue;
|
|
353
|
+
const { value, degraded } = await this.computeBranchSummary();
|
|
354
|
+
// Memoize only complete, failure-free answers: a degraded summary
|
|
355
|
+
// (some probe failed transiently — possibly a FABRICATED field like
|
|
356
|
+
// detached-HEAD from a timed-out symbolic-ref) pinned for a whole
|
|
357
|
+
// generation could serve a wrong branch all session.
|
|
358
|
+
if (value !== null && !degraded)
|
|
359
|
+
this.branchMemo = { gen, value };
|
|
360
|
+
return value;
|
|
361
|
+
}
|
|
362
|
+
// Heals startup and mid-session races from tool calls, at most one
|
|
363
|
+
// retry in flight and no more than once per minute — and never before
|
|
364
|
+
// the official start() has settled (which itself runs only after
|
|
365
|
+
// startup indexing). Cases: git vanished mid-session -> transition to
|
|
366
|
+
// 'disabled' and clear; state 'unknown' (transient detection failure)
|
|
367
|
+
// -> re-run the whole probe; state 'ready' with no analysis landed, a
|
|
368
|
+
// transiently-failed/skipped bulk pass, or an analysis older than the
|
|
369
|
+
// daily refresh -> re-attach the reflog watcher (if missing) and
|
|
370
|
+
// re-attempt the analysis. 'disabled' and 'no-repo' are permanent.
|
|
371
|
+
maybeRetryStartup() {
|
|
372
|
+
if (this.closed || this.retryingStartup || !this.startSettled)
|
|
373
|
+
return;
|
|
374
|
+
if (this.runner.disabled && this.stateValue !== 'disabled') {
|
|
375
|
+
this.stateValue = 'disabled';
|
|
376
|
+
void this.clearPersistedGitData('git unavailable');
|
|
377
|
+
return;
|
|
378
|
+
}
|
|
379
|
+
if (this.stateValue !== 'unknown') {
|
|
380
|
+
if (this.stateValue !== 'ready')
|
|
381
|
+
return;
|
|
382
|
+
const meta = this.index.getGitMeta();
|
|
383
|
+
const staleByAge = meta !== null && Date.now() - meta.analyzedAt > ANALYSIS_MAX_AGE_MS;
|
|
384
|
+
if (meta !== null && !this.analysisRetryNeeded && !staleByAge)
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
const now = Date.now();
|
|
388
|
+
if (now < this.nextStartupRetryAt)
|
|
389
|
+
return;
|
|
390
|
+
this.nextStartupRetryAt = now + STARTUP_RETRY_BACKOFF_MS;
|
|
391
|
+
this.retryingStartup = true;
|
|
392
|
+
void (async () => {
|
|
393
|
+
try {
|
|
394
|
+
if (this.stateValue === 'unknown') {
|
|
395
|
+
await this.startInner();
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
this.startHeadWatcher();
|
|
399
|
+
await this.ensureFreshAnalysis();
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
catch (err) {
|
|
403
|
+
log.debug(`git: startup retry failed: ${errMsg(err)}`);
|
|
404
|
+
}
|
|
405
|
+
finally {
|
|
406
|
+
this.retryingStartup = false;
|
|
407
|
+
}
|
|
408
|
+
})();
|
|
409
|
+
}
|
|
410
|
+
// Kind-aware probe for per-call queries: distinguishes a REAL negative
|
|
411
|
+
// (clean non-zero exit — e.g. "no such ref", a memoizable answer) from
|
|
412
|
+
// a transient failure (timeout, spawn error, shutdown abort) that must
|
|
413
|
+
// never be memoized or read as a negative. Also refuses to spawn after
|
|
414
|
+
// close().
|
|
415
|
+
async probe(args, opts) {
|
|
416
|
+
if (this.closed)
|
|
417
|
+
return { out: null, transient: true };
|
|
418
|
+
try {
|
|
419
|
+
return { out: await this.runner.run(args, opts), transient: false };
|
|
420
|
+
}
|
|
421
|
+
catch (err) {
|
|
422
|
+
log.debug(`git: ${args[0] ?? '?'} failed: ${errMsg(err)}`);
|
|
423
|
+
const kind = err instanceof GitError ? err.kind : null;
|
|
424
|
+
return { out: null, transient: kind !== 'exit' };
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
// `degraded` = some probe failed transiently, so the value (possibly
|
|
428
|
+
// null, possibly missing fields) is NOT a faithful answer and must not
|
|
429
|
+
// be memoized. Real negatives (clean non-zero exits: detached HEAD, no
|
|
430
|
+
// origin, no main/master) do NOT degrade — they are the true state.
|
|
431
|
+
async computeBranchSummary() {
|
|
432
|
+
const q = { timeoutMs: QUICK_TIMEOUT_MS };
|
|
433
|
+
let degraded = false;
|
|
434
|
+
// Branch detection and default-branch detection are independent —
|
|
435
|
+
// run their first probes concurrently (each is a child spawn, and
|
|
436
|
+
// this is user-facing tool latency on the first call per generation).
|
|
437
|
+
// symbolic-ref works on unborn branches (fresh init), where
|
|
438
|
+
// `rev-parse --abbrev-ref HEAD` errors.
|
|
439
|
+
const [symR, originR] = await Promise.all([
|
|
440
|
+
this.probe(['symbolic-ref', '--short', '-q', 'HEAD'], q),
|
|
441
|
+
this.probe(['rev-parse', '--abbrev-ref', 'origin/HEAD'], q),
|
|
442
|
+
]);
|
|
443
|
+
// A transient symbolic-ref failure is NOT detached HEAD — fabricating
|
|
444
|
+
// 'HEAD (detached at ...)' from it would misreport the branch.
|
|
445
|
+
if (symR.transient)
|
|
446
|
+
return { value: null, degraded: true };
|
|
447
|
+
if (originR.transient)
|
|
448
|
+
degraded = true;
|
|
449
|
+
let branch;
|
|
450
|
+
if (symR.out !== null && symR.out.trim().length > 0) {
|
|
451
|
+
branch = symR.out.trim();
|
|
452
|
+
}
|
|
453
|
+
else {
|
|
454
|
+
const shortR = await this.probe(['rev-parse', '--short', 'HEAD'], q);
|
|
455
|
+
if (shortR.transient)
|
|
456
|
+
return { value: null, degraded: true };
|
|
457
|
+
if (shortR.out === null)
|
|
458
|
+
return { value: null, degraded }; // empty repo
|
|
459
|
+
branch = `HEAD (detached at ${shortR.out.trim()})`;
|
|
460
|
+
}
|
|
461
|
+
// base = the rev we diff against (may be a remote-tracking ref);
|
|
462
|
+
// defaultBranch = its display name.
|
|
463
|
+
let base = null;
|
|
464
|
+
let defaultBranch = null;
|
|
465
|
+
if (originR.out !== null && originR.out.trim().length > 0) {
|
|
466
|
+
base = originR.out.trim();
|
|
467
|
+
defaultBranch = base.replace(/^origin\//, '');
|
|
468
|
+
}
|
|
469
|
+
else if (!originR.transient) {
|
|
470
|
+
for (const candidate of ['main', 'master']) {
|
|
471
|
+
const okR = await this.probe(['rev-parse', '--verify', '-q', `refs/heads/${candidate}`], q);
|
|
472
|
+
if (okR.transient)
|
|
473
|
+
degraded = true;
|
|
474
|
+
if (okR.out !== null) {
|
|
475
|
+
base = candidate;
|
|
476
|
+
defaultBranch = candidate;
|
|
477
|
+
break;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
if (base === null) {
|
|
482
|
+
return {
|
|
483
|
+
value: { branch, defaultBranch: null, ahead: null, changedFiles: null },
|
|
484
|
+
degraded,
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
if (branch === defaultBranch) {
|
|
488
|
+
return {
|
|
489
|
+
value: { branch, defaultBranch, ahead: 0, changedFiles: [] },
|
|
490
|
+
degraded,
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
// Independent of each other (both only need `base`); run together.
|
|
494
|
+
// '-- .' scopes the diff to the project subtree and --relative makes
|
|
495
|
+
// the output PROJECT-relative — every path leaving GitService must
|
|
496
|
+
// match index keys, exactly like the analyzer's prefix mapping.
|
|
497
|
+
const [aheadR, diffR] = await Promise.all([
|
|
498
|
+
this.probe(['rev-list', '--count', `${base}..HEAD`], q),
|
|
499
|
+
this.probe(['diff', '--name-only', '--relative', `${base}...HEAD`, '--', '.'], q),
|
|
500
|
+
]);
|
|
501
|
+
if (aheadR.transient || diffR.transient)
|
|
502
|
+
degraded = true;
|
|
503
|
+
const ahead = aheadR.out === null ? null : Number(aheadR.out.trim());
|
|
504
|
+
const changedFiles = diffR.out === null
|
|
505
|
+
? null
|
|
506
|
+
: diffR.out.split('\n').filter((l) => l.length > 0);
|
|
507
|
+
return {
|
|
508
|
+
value: {
|
|
509
|
+
branch,
|
|
510
|
+
defaultBranch,
|
|
511
|
+
ahead: ahead !== null && Number.isFinite(ahead) ? ahead : null,
|
|
512
|
+
changedFiles,
|
|
513
|
+
},
|
|
514
|
+
degraded,
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
// Last N commits touching one file. No --follow: rename tracking costs
|
|
518
|
+
// a full history walk per call; with renames the new path simply has a
|
|
519
|
+
// shorter history. NUL field separators make parsing immune to any
|
|
520
|
+
// subject content.
|
|
521
|
+
async recentCommits(path, n = RECENT_COMMITS_DEFAULT) {
|
|
522
|
+
if (this.closed)
|
|
523
|
+
return [];
|
|
524
|
+
this.maybeRetryStartup();
|
|
525
|
+
if (this.stateValue !== 'ready')
|
|
526
|
+
return [];
|
|
527
|
+
// The requested count is part of the memo key: a 2-row answer must
|
|
528
|
+
// not be served to a caller asking for 5 (or vice versa).
|
|
529
|
+
const memoKey = `${n}:${path}`;
|
|
530
|
+
const memo = this.recentMemo.get(memoKey);
|
|
531
|
+
if (memo && memo.gen === this.generationValue)
|
|
532
|
+
return memo.value;
|
|
533
|
+
// Captured pre-compute for the same reason as branchSummary.
|
|
534
|
+
const gen = this.generationValue;
|
|
535
|
+
// :(literal) disables pathspec magic: glob metacharacters in real
|
|
536
|
+
// filenames must not attribute foreign commits, and a ':'-prefixed
|
|
537
|
+
// name must not parse as pathspec syntax.
|
|
538
|
+
const { out } = await this.probe(['log', '-n', String(n), '--pretty=format:%h%x00%cs%x00%s', '--', `:(literal)${path}`], { timeoutMs: QUICK_TIMEOUT_MS });
|
|
539
|
+
// null = git did not give an answer (transient failure, or a real
|
|
540
|
+
// exit like an unborn HEAD): return empty WITHOUT memoizing so the
|
|
541
|
+
// next call retries. A successful empty answer (file never
|
|
542
|
+
// committed) is a real result and is memoized below.
|
|
543
|
+
if (out === null)
|
|
544
|
+
return [];
|
|
545
|
+
const value = [];
|
|
546
|
+
for (const line of out.split('\n')) {
|
|
547
|
+
const parts = line.split('\u0000');
|
|
548
|
+
if (parts.length === 3 && parts[0].length > 0) {
|
|
549
|
+
value.push({ hash: parts[0], date: parts[1], subject: parts[2] });
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
if (this.recentMemo.size >= RECENT_MEMO_CAP) {
|
|
553
|
+
const oldest = this.recentMemo.keys().next().value;
|
|
554
|
+
if (oldest !== undefined)
|
|
555
|
+
this.recentMemo.delete(oldest);
|
|
556
|
+
}
|
|
557
|
+
this.recentMemo.set(memoKey, { gen, value });
|
|
558
|
+
return value;
|
|
559
|
+
}
|
|
560
|
+
// Shutdown: kill in-flight children, never await the analysis — the
|
|
561
|
+
// 10s shutdown watchdog must not ride on a git subprocess.
|
|
562
|
+
close() {
|
|
563
|
+
this.closed = true;
|
|
564
|
+
this.headWatcher?.close();
|
|
565
|
+
this.headWatcher = null;
|
|
566
|
+
this.runner.abortAll();
|
|
567
|
+
}
|
|
568
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// Live-refresh trigger for the git layer: a tiny non-recursive fs.watch
|
|
2
|
+
// on <gitdir>/logs/ filtered to the HEAD reflog. logs/HEAD is appended on
|
|
3
|
+
// every commit, checkout, merge, reset, and rebase step — exactly the
|
|
4
|
+
// events after which hotspots/co-change data should refresh. The main
|
|
5
|
+
// project watcher never sees these (.git is in DEFAULT_EXCLUDES); this
|
|
6
|
+
// watcher exists only for that one file.
|
|
7
|
+
//
|
|
8
|
+
// Watching the DIRECTORY and filtering on filename survives the
|
|
9
|
+
// rotate/replace edge cases that a direct file watch does not (same
|
|
10
|
+
// robustness reasoning as src/indexer/watcher.ts). Rebases append in
|
|
11
|
+
// bursts, hence the trailing 1s debounce — much coarser than the source
|
|
12
|
+
// watcher's 100ms because a refresh costs a whole git log pass.
|
|
13
|
+
import { watch as fsWatch } from 'node:fs';
|
|
14
|
+
import { dirname, basename } from 'node:path';
|
|
15
|
+
import { canonicalWatchPath } from '../indexer/watcher.js';
|
|
16
|
+
import { errMsg, log } from '../logger.js';
|
|
17
|
+
const defaultFactory = (dir, onEvent, onError) => {
|
|
18
|
+
// canonicalWatchPath: avoid the Windows 8.3-short-name libuv abort. See
|
|
19
|
+
// its definition in watcher.ts for the full failure mode.
|
|
20
|
+
const w = fsWatch(canonicalWatchPath(dir), { recursive: false });
|
|
21
|
+
w.on('change', onEvent);
|
|
22
|
+
w.on('error', onError);
|
|
23
|
+
// The stdio transport governs process lifetime, never this watcher.
|
|
24
|
+
w.unref();
|
|
25
|
+
return w;
|
|
26
|
+
};
|
|
27
|
+
export const DEFAULT_HEAD_DEBOUNCE_MS = 1_000;
|
|
28
|
+
// Max-wait cap on the trailing debounce: a sustained commit stream
|
|
29
|
+
// (scripted rebases, CI bots) must not postpone the refresh forever.
|
|
30
|
+
// Coarse — each refresh costs a whole git log pass.
|
|
31
|
+
export const DEFAULT_HEAD_MAX_DELAY_MS = 15_000;
|
|
32
|
+
export class HeadWatcher {
|
|
33
|
+
onChange;
|
|
34
|
+
dir;
|
|
35
|
+
file;
|
|
36
|
+
debounceMs;
|
|
37
|
+
maxDelayMs;
|
|
38
|
+
factory;
|
|
39
|
+
backend = null;
|
|
40
|
+
timer = null;
|
|
41
|
+
// Wall-clock bound for the current accumulation window.
|
|
42
|
+
deadline = null;
|
|
43
|
+
closed = false;
|
|
44
|
+
constructor(headLogPath, onChange, options = {}) {
|
|
45
|
+
this.onChange = onChange;
|
|
46
|
+
this.dir = dirname(headLogPath);
|
|
47
|
+
this.file = basename(headLogPath);
|
|
48
|
+
this.debounceMs = options.debounceMs ?? DEFAULT_HEAD_DEBOUNCE_MS;
|
|
49
|
+
this.maxDelayMs = options.maxDelayMs ?? DEFAULT_HEAD_MAX_DELAY_MS;
|
|
50
|
+
this.factory = options.watchFactory ?? defaultFactory;
|
|
51
|
+
this.onWindowStart = options.onWindowStart;
|
|
52
|
+
}
|
|
53
|
+
onWindowStart;
|
|
54
|
+
// False when the logs dir is missing or fs.watch fails — the caller
|
|
55
|
+
// degrades to startup-only refresh (staleness still heals on the next
|
|
56
|
+
// server start). Never throws.
|
|
57
|
+
start() {
|
|
58
|
+
if (this.backend || this.closed)
|
|
59
|
+
return this.backend !== null;
|
|
60
|
+
try {
|
|
61
|
+
this.backend = this.factory(this.dir, (_eventType, filename) => this.handleEvent(filename), (err) => {
|
|
62
|
+
log.debug(`git: HEAD watch error (${errMsg(err)}); live refresh off`);
|
|
63
|
+
this.close();
|
|
64
|
+
});
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
catch (err) {
|
|
68
|
+
log.debug(`git: cannot watch ${this.dir} (${errMsg(err)}); live refresh off`);
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
handleEvent(filename) {
|
|
73
|
+
if (this.closed)
|
|
74
|
+
return;
|
|
75
|
+
// A null filename means the platform couldn't attribute the event;
|
|
76
|
+
// treat it as potentially-HEAD rather than dropping a real commit.
|
|
77
|
+
if (filename !== null && filename.toString() !== this.file)
|
|
78
|
+
return;
|
|
79
|
+
if (this.timer !== null)
|
|
80
|
+
clearTimeout(this.timer);
|
|
81
|
+
// Trailing debounce: rebases append many reflog entries back to
|
|
82
|
+
// back; only the last one should trigger a refresh. The wall-clock
|
|
83
|
+
// deadline caps how long a continuous stream can keep postponing.
|
|
84
|
+
if (this.deadline === null) {
|
|
85
|
+
this.deadline = Date.now() + this.maxDelayMs;
|
|
86
|
+
this.onWindowStart?.();
|
|
87
|
+
}
|
|
88
|
+
const delay = Math.max(0, Math.min(this.debounceMs, this.deadline - Date.now()));
|
|
89
|
+
this.timer = setTimeout(() => {
|
|
90
|
+
this.timer = null;
|
|
91
|
+
this.deadline = null;
|
|
92
|
+
if (!this.closed)
|
|
93
|
+
this.onChange();
|
|
94
|
+
}, delay);
|
|
95
|
+
this.timer.unref();
|
|
96
|
+
}
|
|
97
|
+
close() {
|
|
98
|
+
if (this.closed)
|
|
99
|
+
return;
|
|
100
|
+
this.closed = true;
|
|
101
|
+
if (this.timer !== null) {
|
|
102
|
+
clearTimeout(this.timer);
|
|
103
|
+
this.timer = null;
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
this.backend?.close();
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
// closing a dead FSWatcher must never propagate
|
|
110
|
+
}
|
|
111
|
+
this.backend = null;
|
|
112
|
+
}
|
|
113
|
+
}
|