codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,568 @@
1
+ // GitService — the facade the server hands to tools. Owns the runner and
2
+ // the analysis lifecycle:
3
+ //
4
+ // - detection at start(): one `rev-parse` probe decides 'ready' /
5
+ // 'no-repo' / 'disabled' for the whole session;
6
+ // - the bulk analysis pass (hotspots + co-change), persisted into the
7
+ // CodeIndex cache, refreshed when stale (HEAD moved, window changed,
8
+ // or older than a day);
9
+ // - cheap per-call queries (branch summary, recent commits per file),
10
+ // never persisted, memoized per generation.
11
+ //
12
+ // Degradation contract: NO method ever throws and none surface errors to
13
+ // tool output — every failure path returns null/empty so tools simply
14
+ // omit git sections. The service is constructed even when git is off so
15
+ // the ServerDeps shape stays uniform.
16
+ import { existsSync } from 'node:fs';
17
+ import { join, resolve } from 'node:path';
18
+ import { errMsg, log } from '../logger.js';
19
+ import { GIT_COMMIT_CAP, analyzeLog, buildLogArgs } from './analyzer.js';
20
+ import { HeadWatcher } from './head-watcher.js';
21
+ import { GitError, GitRunner } from './runner.js';
22
+ // The bulk pass gets generous limits (whole-window history of a large
23
+ // repo); per-call queries stay snappy and just degrade on timeout.
24
+ const ANALYSIS_TIMEOUT_MS = 30_000;
25
+ const ANALYSIS_MAX_BUFFER = 64 * 1024 * 1024;
26
+ const QUICK_TIMEOUT_MS = 3_000;
27
+ // The window is relative to "now", so counts drift as commits age out —
28
+ // refresh a fresh-HEAD analysis once a day anyway.
29
+ const ANALYSIS_MAX_AGE_MS = 24 * 3_600_000;
30
+ const RECENT_MEMO_CAP = 256;
31
+ const RECENT_COMMITS_DEFAULT = 5;
32
+ // Per-call startup retries (maybeRetryStartup) are bounded: at most one
33
+ // attempt per interval, so a permanently failing bulk pass cannot turn
34
+ // every tool call into a 30s/64MB git child.
35
+ const STARTUP_RETRY_BACKOFF_MS = 60_000;
36
+ export class GitService {
37
+ config;
38
+ index;
39
+ cachePath;
40
+ runner;
41
+ stateValue = 'unknown';
42
+ // Resolved actual git dir (`.git` may be a FILE in worktrees); the
43
+ // head-watcher (live refresh) attaches here.
44
+ gitDir = null;
45
+ // Non-empty when the project root is a SUBDIRECTORY of the git
46
+ // toplevel (monorepo package): repo-relative log paths must be
47
+ // stripped by this prefix to match project-relative index keys.
48
+ pathPrefix = '';
49
+ headWatcher = null;
50
+ headDebounceMs;
51
+ // Bumped when a completed analysis lands (and, later, on HEAD-watch
52
+ // events). Memos and the search boost map key off it.
53
+ generationValue = 0;
54
+ branchMemo = null;
55
+ recentMemo = new Map();
56
+ // Single-flight: a refresh requested while one is running coalesces
57
+ // into exactly one trailing rerun.
58
+ inFlight = null;
59
+ rerunRequested = false;
60
+ // True while a startup retry (maybeRetryStartup) is in flight — at
61
+ // most one at a time, rate-limited by nextStartupRetryAt.
62
+ retryingStartup = false;
63
+ nextStartupRetryAt = 0;
64
+ // Retries must NOT run before the official start() settles: tools are
65
+ // served while startup indexing is still populating the index, and an
66
+ // early retry would run startInner (duplicate watcher) and persist an
67
+ // analysis built against a PARTIAL index as fresh — exactly what the
68
+ // index.ts start-after-indexing chaining exists to prevent.
69
+ startSettled = false;
70
+ // Set when a bulk pass failed transiently or was skipped over an empty
71
+ // index — lets the per-call retry re-attempt the analysis even though
72
+ // a (stale) gitMeta exists. Cleared on successful apply.
73
+ analysisRetryNeeded = false;
74
+ closed = false;
75
+ constructor(config, index, cachePath, runner, options = {}) {
76
+ this.config = config;
77
+ this.index = index;
78
+ this.cachePath = cachePath;
79
+ this.runner = runner ?? new GitRunner(config.projectRoot);
80
+ this.headDebounceMs = options.headDebounceMs;
81
+ }
82
+ get state() {
83
+ return this.stateValue;
84
+ }
85
+ get generation() {
86
+ return this.generationValue;
87
+ }
88
+ // Detection + initial analysis. Never throws; never blocks server
89
+ // startup (the caller backgrounds it AFTER the index is populated —
90
+ // analyzing against an empty index would persist an empty result
91
+ // marked fresh).
92
+ async start() {
93
+ try {
94
+ await this.startInner();
95
+ }
96
+ catch (err) {
97
+ log.warn(`git: startup failed: ${errMsg(err)}`);
98
+ }
99
+ finally {
100
+ this.startSettled = true;
101
+ }
102
+ }
103
+ async startInner() {
104
+ if (this.closed)
105
+ return;
106
+ if (!this.config.gitEnabled) {
107
+ this.stateValue = 'disabled';
108
+ log.debug('git: disabled by config (gitEnabled=false)');
109
+ await this.clearPersistedGitData('gitEnabled=false');
110
+ return;
111
+ }
112
+ // --show-prefix is the subdirectory-of-toplevel detector: empty at
113
+ // the repo root, 'packages/app/' when the project root is a
114
+ // monorepo package. Without it, every log path would miss the index.
115
+ // run() (not tryRun) so the failure KIND survives: only a clean
116
+ // non-zero exit means "not a repository". A transient timeout /
117
+ // maxBuffer / shutdown abort must NOT classify as no-repo — that
118
+ // branch wipes the persisted enrichment, and the repo may be fine.
119
+ let probe;
120
+ try {
121
+ probe = await this.runner.run(['rev-parse', '--is-inside-work-tree', '--git-dir', '--show-prefix'], { timeoutMs: QUICK_TIMEOUT_MS });
122
+ }
123
+ catch (err) {
124
+ if (this.closed)
125
+ return;
126
+ const kind = err instanceof GitError ? err.kind : null;
127
+ if (kind === 'exit') {
128
+ // Plain exit-128 non-repo is a normal deployment, debug only.
129
+ this.stateValue = 'no-repo';
130
+ log.debug('git: no repository detected; enrichment off');
131
+ await this.clearPersistedGitData('no repository');
132
+ }
133
+ else if (kind === 'git-missing' || kind === 'disabled') {
134
+ // ENOENT already session-disabled the runner (with one warn).
135
+ this.stateValue = 'disabled';
136
+ await this.clearPersistedGitData('git unavailable');
137
+ }
138
+ else {
139
+ // timeout / maxbuffer / aborted / unknown: transient. Keep the
140
+ // persisted data (stale beats none) and stay in 'unknown' —
141
+ // sections that need 'ready' degrade, analysis-derived ones
142
+ // keep serving the cache.
143
+ log.debug(`git: detection failed transiently (${errMsg(err)}); enrichment off this session`);
144
+ }
145
+ return;
146
+ }
147
+ if (this.closed)
148
+ return;
149
+ // Split WITHOUT trimming the whole probe first: the --show-prefix
150
+ // line is legitimately empty at the toplevel.
151
+ const lines = probe.split('\n');
152
+ if (lines[0]?.trim() !== 'true') {
153
+ // Bare repo or cwd inside .git — no work tree to enrich.
154
+ this.stateValue = 'no-repo';
155
+ log.debug('git: not inside a work tree; enrichment off');
156
+ await this.clearPersistedGitData('no work tree');
157
+ return;
158
+ }
159
+ this.gitDir = resolve(this.config.projectRoot, lines[1]?.trim() ?? '.git');
160
+ this.pathPrefix = lines[2]?.trim() ?? '';
161
+ this.stateValue = 'ready';
162
+ this.startHeadWatcher();
163
+ await this.ensureFreshAnalysis();
164
+ }
165
+ // Persisted enrichment from an earlier enabled session must not keep
166
+ // rendering when it can never refresh again (kill switch flipped, repo
167
+ // deleted, git uninstalled). Clearing also nulls gitMeta, so
168
+ // re-enabling later triggers a clean re-analysis. Never during
169
+ // shutdown: an aborted probe must not gut a healthy warm cache.
170
+ async clearPersistedGitData(reason) {
171
+ if (this.closed)
172
+ return;
173
+ try {
174
+ if (!(await this.index.clearGitData()))
175
+ return;
176
+ log.debug(`git: cleared persisted git data (${reason})`);
177
+ await this.index.save(this.cachePath);
178
+ }
179
+ catch (err) {
180
+ log.debug(`git: failed to clear persisted data: ${errMsg(err)}`);
181
+ }
182
+ }
183
+ // Live refresh: <gitdir>/logs/HEAD changes on every commit, checkout,
184
+ // merge, and rebase. When it can't be watched (no reflog, fs.watch
185
+ // failure) we degrade to startup-only freshness — staleness heals on
186
+ // the next server start.
187
+ startHeadWatcher() {
188
+ // Idempotent: a retry re-running startInner must not overwrite (and
189
+ // leak) an already-attached watcher — the orphan would double every
190
+ // onHeadChanged and survive close().
191
+ if (this.headWatcher !== null)
192
+ return;
193
+ if (this.gitDir === null || this.closed)
194
+ return;
195
+ const headLogPath = join(this.gitDir, 'logs', 'HEAD');
196
+ if (!existsSync(headLogPath)) {
197
+ log.debug(`git: ${headLogPath} missing; live refresh unavailable`);
198
+ return;
199
+ }
200
+ this.headWatcher = new HeadWatcher(headLogPath, () => this.onHeadChanged(), {
201
+ debounceMs: this.headDebounceMs,
202
+ // Leading edge: the moment the reflog moves, the branch/recent
203
+ // memos describe a world that may have changed — invalidate NOW
204
+ // rather than serving a mid-rebase snapshot until the trailing
205
+ // debounce (up to maxDelayMs) lands the analysis refresh.
206
+ onWindowStart: () => this.bumpGeneration(),
207
+ });
208
+ if (!this.headWatcher.start())
209
+ this.headWatcher = null;
210
+ }
211
+ // The generation bumps IMMEDIATELY (not just after the analysis lands):
212
+ // branch summary and recent-commits memos answer from HEAD's committed
213
+ // state, which has definitely changed; the analysis refresh follows in
214
+ // the background, bumping again when it applies.
215
+ onHeadChanged() {
216
+ this.bumpGeneration();
217
+ void this.ensureFreshAnalysis().catch((err) => log.debug(`git: head-change refresh failed: ${errMsg(err)}`));
218
+ }
219
+ // Re-analyze when the persisted gitMeta no longer matches reality.
220
+ // The warm path — cache already loaded with matching HEAD/window —
221
+ // returns without spawning anything beyond one rev-parse.
222
+ async ensureFreshAnalysis() {
223
+ if (this.stateValue !== 'ready' || this.closed)
224
+ return;
225
+ const headRaw = await this.runner.tryRun(['rev-parse', 'HEAD'], {
226
+ timeoutMs: QUICK_TIMEOUT_MS,
227
+ });
228
+ if (headRaw === null)
229
+ return; // no usable answer (unborn HEAD or transient failure)
230
+ const head = headRaw.trim();
231
+ const meta = this.index.getGitMeta();
232
+ const fresh = meta !== null &&
233
+ meta.head === head &&
234
+ meta.windowDays === this.config.gitWindow &&
235
+ Date.now() - meta.analyzedAt <= ANALYSIS_MAX_AGE_MS;
236
+ if (fresh && !this.analysisRetryNeeded)
237
+ return;
238
+ await this.runAnalysis(head);
239
+ }
240
+ runAnalysis(head) {
241
+ if (this.inFlight) {
242
+ this.rerunRequested = true;
243
+ return this.inFlight;
244
+ }
245
+ this.inFlight = this.doAnalysis(head).finally(() => {
246
+ this.inFlight = null;
247
+ if (this.rerunRequested && !this.closed) {
248
+ this.rerunRequested = false;
249
+ // Re-resolve HEAD — the rerun exists because it moved mid-run.
250
+ void this.ensureFreshAnalysis().catch((err) => log.debug(`git: rerun failed: ${errMsg(err)}`));
251
+ }
252
+ });
253
+ return this.inFlight;
254
+ }
255
+ async doAnalysis(head) {
256
+ // Cheap pre-spawn guard: an empty index cannot accept an analysis —
257
+ // don't pay a (up to 30s / 64MB) bulk pass to discover that. The
258
+ // post-await twin below still covers an index emptied mid-pass.
259
+ if (this.index.fileCount === 0) {
260
+ log.debug('git: index empty; skipping analysis');
261
+ this.analysisRetryNeeded = true;
262
+ return;
263
+ }
264
+ let stdout;
265
+ try {
266
+ stdout = await this.runner.run(buildLogArgs(this.config.gitWindow), {
267
+ timeoutMs: ANALYSIS_TIMEOUT_MS,
268
+ maxBuffer: ANALYSIS_MAX_BUFFER,
269
+ });
270
+ }
271
+ catch (err) {
272
+ const kind = err instanceof GitError ? err.kind : null;
273
+ if (kind === 'aborted') {
274
+ log.debug('git: analysis aborted (shutdown)');
275
+ }
276
+ else if (kind === 'git-missing' || kind === 'disabled') {
277
+ // git vanished MID-SESSION (toolchain/PATH swap): the persisted
278
+ // enrichment can never refresh again — same contract as the
279
+ // detection-time disabled path.
280
+ this.stateValue = 'disabled';
281
+ await this.clearPersistedGitData('git unavailable');
282
+ }
283
+ else {
284
+ // Keep whatever data the cache already holds — stale beats none;
285
+ // the per-call retry hook may re-attempt later (backoff-bounded).
286
+ this.analysisRetryNeeded = true;
287
+ this.runner.warnOnce('bulk-log', `git: history analysis failed (${errMsg(err)}); keeping previous git data`);
288
+ }
289
+ return;
290
+ }
291
+ // Re-check after the await: a shutdown that raced the bulk pass must
292
+ // not apply + save behind the watchdog's back.
293
+ if (this.closed) {
294
+ log.debug('git: analysis discarded (shutting down)');
295
+ return;
296
+ }
297
+ // An empty index means startup indexing has not populated (or failed)
298
+ // — applying now would persist an empty analysis whose fresh gitMeta
299
+ // suppresses the real one for up to 24h. Skip; staleness re-triggers.
300
+ if (this.index.fileCount === 0) {
301
+ log.debug('git: index empty; skipping analysis apply');
302
+ this.analysisRetryNeeded = true;
303
+ return;
304
+ }
305
+ const analysis = analyzeLog(stdout, (p) => this.index.hasFile(p), this.pathPrefix);
306
+ if (analysis.commitCount >= GIT_COMMIT_CAP) {
307
+ log.debug(`git: analysis hit the ${GIT_COMMIT_CAP}-commit cap; older activity is not counted`);
308
+ }
309
+ await this.index.applyGitAnalysis({
310
+ counts: analysis.counts,
311
+ cochanges: analysis.cochanges,
312
+ hotspots: analysis.hotspots,
313
+ meta: {
314
+ head,
315
+ windowDays: this.config.gitWindow,
316
+ analyzedAt: Date.now(),
317
+ },
318
+ });
319
+ this.analysisRetryNeeded = false;
320
+ // Bump BEFORE the save: memo/boost invalidation must be atomic with
321
+ // the data swap, not deferred behind a potentially slow cache write.
322
+ this.bumpGeneration();
323
+ try {
324
+ await this.index.save(this.cachePath);
325
+ }
326
+ catch (err) {
327
+ this.runner.warnOnce('analysis-save', `git: failed to persist analysis: ${errMsg(err)}`);
328
+ }
329
+ log.debug(`git: analysis complete (${analysis.commitCount} commits, ` +
330
+ `${analysis.hotspots.length} hotspots, head ${head.slice(0, 7)})`);
331
+ }
332
+ bumpGeneration() {
333
+ this.generationValue++;
334
+ this.branchMemo = null;
335
+ this.recentMemo.clear();
336
+ }
337
+ async branchSummary() {
338
+ if (this.closed)
339
+ return null;
340
+ // BEFORE the ready gate: the retry hook is what heals 'unknown'
341
+ // (transient detection failure) — gating it on 'ready' would make
342
+ // that state permanent.
343
+ this.maybeRetryStartup();
344
+ if (this.stateValue !== 'ready')
345
+ return null;
346
+ if (this.branchMemo?.gen === this.generationValue)
347
+ return this.branchMemo.value;
348
+ // Capture the generation BEFORE computing: if a HEAD change lands
349
+ // mid-computation (bumpGeneration clears the memo), the straggler
350
+ // must not restamp its possibly-torn result as current — stamped
351
+ // with the captured gen, it self-invalidates on the next lookup.
352
+ const gen = this.generationValue;
353
+ const { value, degraded } = await this.computeBranchSummary();
354
+ // Memoize only complete, failure-free answers: a degraded summary
355
+ // (some probe failed transiently — possibly a FABRICATED field like
356
+ // detached-HEAD from a timed-out symbolic-ref) pinned for a whole
357
+ // generation could serve a wrong branch all session.
358
+ if (value !== null && !degraded)
359
+ this.branchMemo = { gen, value };
360
+ return value;
361
+ }
362
+ // Heals startup and mid-session races from tool calls, at most one
363
+ // retry in flight and no more than once per minute — and never before
364
+ // the official start() has settled (which itself runs only after
365
+ // startup indexing). Cases: git vanished mid-session -> transition to
366
+ // 'disabled' and clear; state 'unknown' (transient detection failure)
367
+ // -> re-run the whole probe; state 'ready' with no analysis landed, a
368
+ // transiently-failed/skipped bulk pass, or an analysis older than the
369
+ // daily refresh -> re-attach the reflog watcher (if missing) and
370
+ // re-attempt the analysis. 'disabled' and 'no-repo' are permanent.
371
+ maybeRetryStartup() {
372
+ if (this.closed || this.retryingStartup || !this.startSettled)
373
+ return;
374
+ if (this.runner.disabled && this.stateValue !== 'disabled') {
375
+ this.stateValue = 'disabled';
376
+ void this.clearPersistedGitData('git unavailable');
377
+ return;
378
+ }
379
+ if (this.stateValue !== 'unknown') {
380
+ if (this.stateValue !== 'ready')
381
+ return;
382
+ const meta = this.index.getGitMeta();
383
+ const staleByAge = meta !== null && Date.now() - meta.analyzedAt > ANALYSIS_MAX_AGE_MS;
384
+ if (meta !== null && !this.analysisRetryNeeded && !staleByAge)
385
+ return;
386
+ }
387
+ const now = Date.now();
388
+ if (now < this.nextStartupRetryAt)
389
+ return;
390
+ this.nextStartupRetryAt = now + STARTUP_RETRY_BACKOFF_MS;
391
+ this.retryingStartup = true;
392
+ void (async () => {
393
+ try {
394
+ if (this.stateValue === 'unknown') {
395
+ await this.startInner();
396
+ }
397
+ else {
398
+ this.startHeadWatcher();
399
+ await this.ensureFreshAnalysis();
400
+ }
401
+ }
402
+ catch (err) {
403
+ log.debug(`git: startup retry failed: ${errMsg(err)}`);
404
+ }
405
+ finally {
406
+ this.retryingStartup = false;
407
+ }
408
+ })();
409
+ }
410
+ // Kind-aware probe for per-call queries: distinguishes a REAL negative
411
+ // (clean non-zero exit — e.g. "no such ref", a memoizable answer) from
412
+ // a transient failure (timeout, spawn error, shutdown abort) that must
413
+ // never be memoized or read as a negative. Also refuses to spawn after
414
+ // close().
415
+ async probe(args, opts) {
416
+ if (this.closed)
417
+ return { out: null, transient: true };
418
+ try {
419
+ return { out: await this.runner.run(args, opts), transient: false };
420
+ }
421
+ catch (err) {
422
+ log.debug(`git: ${args[0] ?? '?'} failed: ${errMsg(err)}`);
423
+ const kind = err instanceof GitError ? err.kind : null;
424
+ return { out: null, transient: kind !== 'exit' };
425
+ }
426
+ }
427
+ // `degraded` = some probe failed transiently, so the value (possibly
428
+ // null, possibly missing fields) is NOT a faithful answer and must not
429
+ // be memoized. Real negatives (clean non-zero exits: detached HEAD, no
430
+ // origin, no main/master) do NOT degrade — they are the true state.
431
+ async computeBranchSummary() {
432
+ const q = { timeoutMs: QUICK_TIMEOUT_MS };
433
+ let degraded = false;
434
+ // Branch detection and default-branch detection are independent —
435
+ // run their first probes concurrently (each is a child spawn, and
436
+ // this is user-facing tool latency on the first call per generation).
437
+ // symbolic-ref works on unborn branches (fresh init), where
438
+ // `rev-parse --abbrev-ref HEAD` errors.
439
+ const [symR, originR] = await Promise.all([
440
+ this.probe(['symbolic-ref', '--short', '-q', 'HEAD'], q),
441
+ this.probe(['rev-parse', '--abbrev-ref', 'origin/HEAD'], q),
442
+ ]);
443
+ // A transient symbolic-ref failure is NOT detached HEAD — fabricating
444
+ // 'HEAD (detached at ...)' from it would misreport the branch.
445
+ if (symR.transient)
446
+ return { value: null, degraded: true };
447
+ if (originR.transient)
448
+ degraded = true;
449
+ let branch;
450
+ if (symR.out !== null && symR.out.trim().length > 0) {
451
+ branch = symR.out.trim();
452
+ }
453
+ else {
454
+ const shortR = await this.probe(['rev-parse', '--short', 'HEAD'], q);
455
+ if (shortR.transient)
456
+ return { value: null, degraded: true };
457
+ if (shortR.out === null)
458
+ return { value: null, degraded }; // empty repo
459
+ branch = `HEAD (detached at ${shortR.out.trim()})`;
460
+ }
461
+ // base = the rev we diff against (may be a remote-tracking ref);
462
+ // defaultBranch = its display name.
463
+ let base = null;
464
+ let defaultBranch = null;
465
+ if (originR.out !== null && originR.out.trim().length > 0) {
466
+ base = originR.out.trim();
467
+ defaultBranch = base.replace(/^origin\//, '');
468
+ }
469
+ else if (!originR.transient) {
470
+ for (const candidate of ['main', 'master']) {
471
+ const okR = await this.probe(['rev-parse', '--verify', '-q', `refs/heads/${candidate}`], q);
472
+ if (okR.transient)
473
+ degraded = true;
474
+ if (okR.out !== null) {
475
+ base = candidate;
476
+ defaultBranch = candidate;
477
+ break;
478
+ }
479
+ }
480
+ }
481
+ if (base === null) {
482
+ return {
483
+ value: { branch, defaultBranch: null, ahead: null, changedFiles: null },
484
+ degraded,
485
+ };
486
+ }
487
+ if (branch === defaultBranch) {
488
+ return {
489
+ value: { branch, defaultBranch, ahead: 0, changedFiles: [] },
490
+ degraded,
491
+ };
492
+ }
493
+ // Independent of each other (both only need `base`); run together.
494
+ // '-- .' scopes the diff to the project subtree and --relative makes
495
+ // the output PROJECT-relative — every path leaving GitService must
496
+ // match index keys, exactly like the analyzer's prefix mapping.
497
+ const [aheadR, diffR] = await Promise.all([
498
+ this.probe(['rev-list', '--count', `${base}..HEAD`], q),
499
+ this.probe(['diff', '--name-only', '--relative', `${base}...HEAD`, '--', '.'], q),
500
+ ]);
501
+ if (aheadR.transient || diffR.transient)
502
+ degraded = true;
503
+ const ahead = aheadR.out === null ? null : Number(aheadR.out.trim());
504
+ const changedFiles = diffR.out === null
505
+ ? null
506
+ : diffR.out.split('\n').filter((l) => l.length > 0);
507
+ return {
508
+ value: {
509
+ branch,
510
+ defaultBranch,
511
+ ahead: ahead !== null && Number.isFinite(ahead) ? ahead : null,
512
+ changedFiles,
513
+ },
514
+ degraded,
515
+ };
516
+ }
517
+ // Last N commits touching one file. No --follow: rename tracking costs
518
+ // a full history walk per call; with renames the new path simply has a
519
+ // shorter history. NUL field separators make parsing immune to any
520
+ // subject content.
521
+ async recentCommits(path, n = RECENT_COMMITS_DEFAULT) {
522
+ if (this.closed)
523
+ return [];
524
+ this.maybeRetryStartup();
525
+ if (this.stateValue !== 'ready')
526
+ return [];
527
+ // The requested count is part of the memo key: a 2-row answer must
528
+ // not be served to a caller asking for 5 (or vice versa).
529
+ const memoKey = `${n}:${path}`;
530
+ const memo = this.recentMemo.get(memoKey);
531
+ if (memo && memo.gen === this.generationValue)
532
+ return memo.value;
533
+ // Captured pre-compute for the same reason as branchSummary.
534
+ const gen = this.generationValue;
535
+ // :(literal) disables pathspec magic: glob metacharacters in real
536
+ // filenames must not attribute foreign commits, and a ':'-prefixed
537
+ // name must not parse as pathspec syntax.
538
+ const { out } = await this.probe(['log', '-n', String(n), '--pretty=format:%h%x00%cs%x00%s', '--', `:(literal)${path}`], { timeoutMs: QUICK_TIMEOUT_MS });
539
+ // null = git did not give an answer (transient failure, or a real
540
+ // exit like an unborn HEAD): return empty WITHOUT memoizing so the
541
+ // next call retries. A successful empty answer (file never
542
+ // committed) is a real result and is memoized below.
543
+ if (out === null)
544
+ return [];
545
+ const value = [];
546
+ for (const line of out.split('\n')) {
547
+ const parts = line.split('\u0000');
548
+ if (parts.length === 3 && parts[0].length > 0) {
549
+ value.push({ hash: parts[0], date: parts[1], subject: parts[2] });
550
+ }
551
+ }
552
+ if (this.recentMemo.size >= RECENT_MEMO_CAP) {
553
+ const oldest = this.recentMemo.keys().next().value;
554
+ if (oldest !== undefined)
555
+ this.recentMemo.delete(oldest);
556
+ }
557
+ this.recentMemo.set(memoKey, { gen, value });
558
+ return value;
559
+ }
560
+ // Shutdown: kill in-flight children, never await the analysis — the
561
+ // 10s shutdown watchdog must not ride on a git subprocess.
562
+ close() {
563
+ this.closed = true;
564
+ this.headWatcher?.close();
565
+ this.headWatcher = null;
566
+ this.runner.abortAll();
567
+ }
568
+ }
@@ -0,0 +1,113 @@
1
+ // Live-refresh trigger for the git layer: a tiny non-recursive fs.watch
2
+ // on <gitdir>/logs/ filtered to the HEAD reflog. logs/HEAD is appended on
3
+ // every commit, checkout, merge, reset, and rebase step — exactly the
4
+ // events after which hotspots/co-change data should refresh. The main
5
+ // project watcher never sees these (.git is in DEFAULT_EXCLUDES); this
6
+ // watcher exists only for that one file.
7
+ //
8
+ // Watching the DIRECTORY and filtering on filename survives the
9
+ // rotate/replace edge cases that a direct file watch does not (same
10
+ // robustness reasoning as src/indexer/watcher.ts). Rebases append in
11
+ // bursts, hence the trailing 1s debounce — much coarser than the source
12
+ // watcher's 100ms because a refresh costs a whole git log pass.
13
+ import { watch as fsWatch } from 'node:fs';
14
+ import { dirname, basename } from 'node:path';
15
+ import { canonicalWatchPath } from '../indexer/watcher.js';
16
+ import { errMsg, log } from '../logger.js';
17
+ const defaultFactory = (dir, onEvent, onError) => {
18
+ // canonicalWatchPath: avoid the Windows 8.3-short-name libuv abort. See
19
+ // its definition in watcher.ts for the full failure mode.
20
+ const w = fsWatch(canonicalWatchPath(dir), { recursive: false });
21
+ w.on('change', onEvent);
22
+ w.on('error', onError);
23
+ // The stdio transport governs process lifetime, never this watcher.
24
+ w.unref();
25
+ return w;
26
+ };
27
+ export const DEFAULT_HEAD_DEBOUNCE_MS = 1_000;
28
+ // Max-wait cap on the trailing debounce: a sustained commit stream
29
+ // (scripted rebases, CI bots) must not postpone the refresh forever.
30
+ // Coarse — each refresh costs a whole git log pass.
31
+ export const DEFAULT_HEAD_MAX_DELAY_MS = 15_000;
32
+ export class HeadWatcher {
33
+ onChange;
34
+ dir;
35
+ file;
36
+ debounceMs;
37
+ maxDelayMs;
38
+ factory;
39
+ backend = null;
40
+ timer = null;
41
+ // Wall-clock bound for the current accumulation window.
42
+ deadline = null;
43
+ closed = false;
44
+ constructor(headLogPath, onChange, options = {}) {
45
+ this.onChange = onChange;
46
+ this.dir = dirname(headLogPath);
47
+ this.file = basename(headLogPath);
48
+ this.debounceMs = options.debounceMs ?? DEFAULT_HEAD_DEBOUNCE_MS;
49
+ this.maxDelayMs = options.maxDelayMs ?? DEFAULT_HEAD_MAX_DELAY_MS;
50
+ this.factory = options.watchFactory ?? defaultFactory;
51
+ this.onWindowStart = options.onWindowStart;
52
+ }
53
+ onWindowStart;
54
+ // False when the logs dir is missing or fs.watch fails — the caller
55
+ // degrades to startup-only refresh (staleness still heals on the next
56
+ // server start). Never throws.
57
+ start() {
58
+ if (this.backend || this.closed)
59
+ return this.backend !== null;
60
+ try {
61
+ this.backend = this.factory(this.dir, (_eventType, filename) => this.handleEvent(filename), (err) => {
62
+ log.debug(`git: HEAD watch error (${errMsg(err)}); live refresh off`);
63
+ this.close();
64
+ });
65
+ return true;
66
+ }
67
+ catch (err) {
68
+ log.debug(`git: cannot watch ${this.dir} (${errMsg(err)}); live refresh off`);
69
+ return false;
70
+ }
71
+ }
72
+ handleEvent(filename) {
73
+ if (this.closed)
74
+ return;
75
+ // A null filename means the platform couldn't attribute the event;
76
+ // treat it as potentially-HEAD rather than dropping a real commit.
77
+ if (filename !== null && filename.toString() !== this.file)
78
+ return;
79
+ if (this.timer !== null)
80
+ clearTimeout(this.timer);
81
+ // Trailing debounce: rebases append many reflog entries back to
82
+ // back; only the last one should trigger a refresh. The wall-clock
83
+ // deadline caps how long a continuous stream can keep postponing.
84
+ if (this.deadline === null) {
85
+ this.deadline = Date.now() + this.maxDelayMs;
86
+ this.onWindowStart?.();
87
+ }
88
+ const delay = Math.max(0, Math.min(this.debounceMs, this.deadline - Date.now()));
89
+ this.timer = setTimeout(() => {
90
+ this.timer = null;
91
+ this.deadline = null;
92
+ if (!this.closed)
93
+ this.onChange();
94
+ }, delay);
95
+ this.timer.unref();
96
+ }
97
+ close() {
98
+ if (this.closed)
99
+ return;
100
+ this.closed = true;
101
+ if (this.timer !== null) {
102
+ clearTimeout(this.timer);
103
+ this.timer = null;
104
+ }
105
+ try {
106
+ this.backend?.close();
107
+ }
108
+ catch {
109
+ // closing a dead FSWatcher must never propagate
110
+ }
111
+ this.backend = null;
112
+ }
113
+ }