gitnexus 1.6.3-rc.33 → 1.6.3-rc.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
13
  import { getStoragePaths, loadMeta, addToGitignore, registerRepo, } from '../storage/repo-manager.js';
14
- import { getGitRoot, isGitRepo } from '../storage/git.js';
14
+ import { getGitRoot, getRemoteUrl, isGitRepo } from '../storage/git.js';
15
15
  export const indexCommand = async (inputPathParts, options) => {
16
16
  console.log('\n GitNexus Index\n');
17
17
  const inputPath = inputPathParts?.length ? inputPathParts.join(' ') : undefined;
@@ -88,6 +88,13 @@ export const indexCommand = async (inputPathParts, options) => {
88
88
  };
89
89
  }
90
90
  // ── Register in global registry ───────────────────────────────────
91
+ // Refresh the on-disk meta with a freshly captured `remoteUrl` if
92
+ // it's missing, so an `index` of an older `.gitnexus/` still gets
93
+ // sibling-clone fingerprinting on subsequent use without forcing a
94
+ // full re-analyze.
95
+ if (!meta.remoteUrl && isGitRepo(repoPath)) {
96
+ meta.remoteUrl = getRemoteUrl(repoPath);
97
+ }
91
98
  await registerRepo(repoPath, meta);
92
99
  await addToGitignore(repoPath);
93
100
  const projectName = path.basename(repoPath);
@@ -2,6 +2,7 @@
2
2
  * Git working tree vs index commit staleness (used by MCP resources, group status, etc.).
3
3
  * Lives in core/ so application code does not depend on the MCP package layer.
4
4
  */
5
+ import { type CwdMatch } from '../storage/repo-manager.js';
5
6
  export interface StalenessInfo {
6
7
  isStale: boolean;
7
8
  commitsBehind: number;
@@ -11,3 +12,20 @@ export interface StalenessInfo {
11
12
  * Check how many commits the index is behind HEAD (synchronous; uses git CLI).
12
13
  */
13
14
  export declare function checkStaleness(repoPath: string, lastCommit: string): StalenessInfo;
15
+ /**
16
+ * Resolve a working directory against the global registry. Returns:
17
+ * - `match: 'path'` when `cwd` is inside a registered entry's path
18
+ * - `match: 'sibling-by-remote'` when `cwd` lives in a different on-disk clone
19
+ * of the same repo (same `remoteUrl`)
20
+ * - `match: 'none'` when neither match applies
21
+ *
22
+ * For sibling-by-remote matches, the caller's HEAD and the drift vs the
23
+ * indexed `lastCommit` are also returned so the MCP layer can warn
24
+ * before serving silently-stale answers (issue: silent graph drift
25
+ * across sibling clones).
26
+ *
27
+ * `path` matches deliberately use the longest-prefix rule so a cwd
28
+ * inside a sub-path of a registered repo still matches that repo, not
29
+ * a coincidentally-aliased shorter entry.
30
+ */
31
+ export declare function checkCwdMatch(cwd: string): Promise<CwdMatch>;
@@ -3,6 +3,9 @@
3
3
  * Lives in core/ so application code does not depend on the MCP package layer.
4
4
  */
5
5
  import { execFileSync } from 'node:child_process';
6
+ import path from 'path';
7
+ import { readRegistry } from '../storage/repo-manager.js';
8
+ import { getGitRoot, getCurrentCommit, getRemoteUrl } from '../storage/git.js';
6
9
  /**
7
10
  * Check how many commits the index is behind HEAD (synchronous; uses git CLI).
8
11
  */
@@ -27,3 +30,108 @@ export function checkStaleness(repoPath, lastCommit) {
27
30
  return { isStale: false, commitsBehind: 0 };
28
31
  }
29
32
  }
33
+ /**
34
+ * Compare a sibling-clone HEAD against an indexed `lastCommit`. Returns
35
+ * `undefined` when the indexed commit is not reachable from the sibling
36
+ * (e.g. divergent branches, shallow clone, missing ref). The caller
37
+ * should treat `undefined` as "drift unknown" rather than "no drift".
38
+ */
39
+ function commitsAheadOfIndexed(siblingPath, indexedCommit) {
40
+ if (!indexedCommit)
41
+ return undefined;
42
+ try {
43
+ const result = execFileSync('git', ['rev-list', '--count', `${indexedCommit}..HEAD`], {
44
+ cwd: siblingPath,
45
+ encoding: 'utf-8',
46
+ stdio: ['pipe', 'pipe', 'pipe'],
47
+ }).trim();
48
+ return parseInt(result, 10) || 0;
49
+ }
50
+ catch {
51
+ return undefined;
52
+ }
53
+ }
54
+ /**
55
+ * Resolve a working directory against the global registry. Returns:
56
+ * - `match: 'path'` when `cwd` is inside a registered entry's path
57
+ * - `match: 'sibling-by-remote'` when `cwd` lives in a different on-disk clone
58
+ * of the same repo (same `remoteUrl`)
59
+ * - `match: 'none'` when neither match applies
60
+ *
61
+ * For sibling-by-remote matches, the caller's HEAD and the drift vs the
62
+ * indexed `lastCommit` are also returned so the MCP layer can warn
63
+ * before serving silently-stale answers (issue: silent graph drift
64
+ * across sibling clones).
65
+ *
66
+ * `path` matches deliberately use the longest-prefix rule so a cwd
67
+ * inside a sub-path of a registered repo still matches that repo, not
68
+ * a coincidentally-aliased shorter entry.
69
+ */
70
+ export async function checkCwdMatch(cwd) {
71
+ const entries = await readRegistry();
72
+ if (entries.length === 0)
73
+ return { match: 'none' };
74
+ const isWin = process.platform === 'win32';
75
+ const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
76
+ const sep = path.sep;
77
+ const cwdResolved = path.resolve(cwd);
78
+ const cwdNorm = norm(cwdResolved);
79
+ // 1) Path-based match (longest prefix wins, boundary-safe).
80
+ let bestPath;
81
+ let bestLen = -1;
82
+ for (const e of entries) {
83
+ const p = norm(e.path);
84
+ if (cwdNorm === p || cwdNorm.startsWith(p + sep)) {
85
+ if (p.length > bestLen) {
86
+ bestPath = e;
87
+ bestLen = p.length;
88
+ }
89
+ }
90
+ }
91
+ if (bestPath)
92
+ return { match: 'path', entry: bestPath };
93
+ // 2) Sibling-by-remote: locate the cwd's git root, get its remote
94
+ // URL, and look for any registered entry with the same fingerprint.
95
+ const cwdGitRoot = getGitRoot(cwdResolved);
96
+ if (!cwdGitRoot)
97
+ return { match: 'none' };
98
+ const cwdRemote = getRemoteUrl(cwdGitRoot);
99
+ if (!cwdRemote)
100
+ return { match: 'none' };
101
+ const sibling = entries.find((e) => e.remoteUrl === cwdRemote && norm(e.path) !== norm(cwdGitRoot));
102
+ if (!sibling)
103
+ return { match: 'none' };
104
+ const cwdHead = getCurrentCommit(cwdGitRoot) || undefined;
105
+ const drift = commitsAheadOfIndexed(cwdGitRoot, sibling.lastCommit);
106
+ // Same commit on both clones → still report match=sibling-by-remote
107
+ // (the relationship is real and useful to callers like list_repos /
108
+ // future tooling) but leave `hint` unset: there's nothing to warn
109
+ // about, and `maybeWarnSiblingDrift` already short-circuits this
110
+ // case independently. Surfacing a no-op hint would force callers
111
+ // to second-guess whether they need to display it.
112
+ let hint;
113
+ if (cwdHead && cwdHead === sibling.lastCommit) {
114
+ hint = undefined;
115
+ }
116
+ else if (drift && drift > 0) {
117
+ hint =
118
+ `⚠️ Index for "${sibling.name}" was built at ${sibling.path}; ` +
119
+ `your cwd (${cwdGitRoot}) is a sibling clone that is ${drift} commit${drift > 1 ? 's' : ''} ` +
120
+ `ahead of the indexed commit. Results may be stale or incorrect — re-run \`gitnexus analyze\` ` +
121
+ `to refresh the index.`;
122
+ }
123
+ else {
124
+ hint =
125
+ `⚠️ Index for "${sibling.name}" was built at ${sibling.path}; ` +
126
+ `your cwd (${cwdGitRoot}) is a sibling clone whose HEAD differs from the indexed commit. ` +
127
+ `Results may be stale or incorrect — re-run \`gitnexus analyze\` to refresh the index.`;
128
+ }
129
+ return {
130
+ match: 'sibling-by-remote',
131
+ entry: sibling,
132
+ cwdGitRoot,
133
+ cwdHead,
134
+ drift,
135
+ hint,
136
+ };
137
+ }
@@ -13,7 +13,7 @@ import fs from 'fs/promises';
13
13
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
14
14
  import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
15
15
  import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
16
- import { getCurrentCommit, hasGitDir, getInferredRepoName } from '../storage/git.js';
16
+ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
17
17
  import { generateAIContextFiles } from '../cli/ai-context.js';
18
18
  import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
19
19
  import { STALE_HASH_SENTINEL } from './lbug/schema.js';
@@ -203,6 +203,13 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
203
203
  repoPath,
204
204
  lastCommit: currentCommit,
205
205
  indexedAt: new Date().toISOString(),
206
+ // Captured here (not at registration) so it travels with the
207
+ // on-disk meta.json — sibling-clone fingerprinting works for
208
+ // out-of-tree consumers (group-status, future tooling) without
209
+ // a second git shellout. `undefined` when the repo has no
210
+ // origin remote, which is fine: paths-only repos behave as
211
+ // before.
212
+ remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : undefined,
206
213
  stats: {
207
214
  files: pipelineResult.totalFileCount,
208
215
  nodes: stats.nodes,
@@ -56,6 +56,7 @@ interface RepoHandle {
56
56
  lbugPath: string;
57
57
  indexedAt: string;
58
58
  lastCommit: string;
59
+ remoteUrl?: string;
59
60
  stats?: RegistryEntry['stats'];
60
61
  }
61
62
  export declare class LocalBackend {
@@ -65,6 +66,13 @@ export declare class LocalBackend {
65
66
  private reinitPromises;
66
67
  private lastStalenessCheck;
67
68
  private groupToolSvc;
69
+ /**
70
+ * One-shot stderr warnings for sibling-clone drift, keyed by
71
+ * `${repoId}|${cwdGitRoot}`. Without this guard every tool call
72
+ * from inside a sibling clone would print the same warning,
73
+ * making MCP stderr unreadable.
74
+ */
75
+ private warnedSiblingDrift;
68
76
  /**
69
77
  * Cross-repo group tools (CLI). Shares logic with MCP `group_*` handlers.
70
78
  */
@@ -110,14 +118,55 @@ export declare class LocalBackend {
110
118
  * List all registered repos with their metadata.
111
119
  * Re-reads the global registry so newly indexed repos are discovered
112
120
  * without restarting the MCP server.
121
+ *
122
+ * Each entry includes:
123
+ * - `staleness`: if the indexed clone's own HEAD has moved past
124
+ * the recorded `lastCommit` (option D in the issue's fix list).
125
+ * - `siblings`: other registered entries sharing the same
126
+ * `remoteUrl` (option B's payoff: callers can see at a glance
127
+ * that another clone of the same logical repo is registered).
128
+ * - `remoteUrl`: the canonical origin URL recorded at index time.
113
129
  */
114
130
  listRepos(): Promise<Array<{
115
131
  name: string;
116
132
  path: string;
117
133
  indexedAt: string;
118
134
  lastCommit: string;
135
+ remoteUrl?: string;
119
136
  stats?: any;
137
+ staleness?: {
138
+ commitsBehind: number;
139
+ hint?: string;
140
+ };
141
+ siblings?: Array<{
142
+ name: string;
143
+ path: string;
144
+ lastCommit: string;
145
+ }>;
120
146
  }>>;
147
+ /**
148
+ * Best-effort sibling-clone drift warning.
149
+ *
150
+ * When the resolved index has a `remoteUrl` recorded and the caller's
151
+ * `process.cwd()` is inside a *different* clone of the same repo, emit
152
+ * one stderr line per (repo, cwd) pair so the operator knows the
153
+ * graph may be stale relative to what's actually on disk under their
154
+ * cwd. Silent on path matches and on repos without a remote URL.
155
+ *
156
+ * Limitation: in MCP stdio server mode `process.cwd()` is the
157
+ * server's CWD at start time, *not* the agent client's CWD. The
158
+ * warning therefore only fires when the MCP server itself was
159
+ * launched from inside a sibling clone (typical for `npx gitnexus
160
+ * serve` from a polecat workspace). Surfacing the client's CWD
161
+ * would require a per-tool-call `cwd` parameter — out of scope for
162
+ * the current MCP contract.
163
+ *
164
+ * Pure side-effect (stderr); never affects the returned handle.
165
+ * After the first computation for a given (repo, cwd) pair the
166
+ * result is cached so subsequent `resolveRepo()` calls don't
167
+ * re-shell-out to git.
168
+ */
169
+ private maybeWarnSiblingDrift;
121
170
  callTool(method: string, params: any): Promise<any>;
122
171
  /**
123
172
  * Query tool — process-grouped search.
@@ -20,6 +20,7 @@ import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member
20
20
  import { collectBestChunks } from '../../core/embeddings/types.js';
21
21
  import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
22
22
  import { PhaseTimer } from '../../core/search/phase-timer.js';
23
+ import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
23
24
  // AI context generation is CLI-only (gitnexus analyze)
24
25
  // import { generateAIContextFiles } from '../../cli/ai-context.js';
25
26
  /**
@@ -162,6 +163,13 @@ export class LocalBackend {
162
163
  reinitPromises = new Map();
163
164
  lastStalenessCheck = new Map();
164
165
  groupToolSvc = null;
166
+ /**
167
+ * One-shot stderr warnings for sibling-clone drift, keyed by
168
+ * `${repoId}|${cwdGitRoot}`. Without this guard every tool call
169
+ * from inside a sibling clone would print the same warning,
170
+ * making MCP stderr unreadable.
171
+ */
172
+ warnedSiblingDrift = new Set();
165
173
  /**
166
174
  * Cross-repo group tools (CLI). Shares logic with MCP `group_*` handlers.
167
175
  */
@@ -218,6 +226,7 @@ export class LocalBackend {
218
226
  lbugPath,
219
227
  indexedAt: entry.indexedAt,
220
228
  lastCommit: entry.lastCommit,
229
+ remoteUrl: entry.remoteUrl,
221
230
  stats: entry.stats,
222
231
  };
223
232
  this.repos.set(id, handle);
@@ -270,13 +279,25 @@ export class LocalBackend {
270
279
  */
271
280
  async resolveRepo(repoParam) {
272
281
  const result = this.resolveRepoFromCache(repoParam);
273
- if (result)
282
+ if (result) {
283
+ // Issue: silent graph drift across sibling clones.
284
+ // If the caller's cwd lives in a *different* on-disk clone of
285
+ // the same repo (matched by `remoteUrl`), warn once per
286
+ // (repo, cwd) pair on stderr. We do not fail or refuse to
287
+ // serve — the index is still the best answer we have — but
288
+ // the operator/agent has to know the answer may be stale.
289
+ this.maybeWarnSiblingDrift(result).catch(() => {
290
+ /* best-effort; never throw from resolveRepo */
291
+ });
274
292
  return result;
293
+ }
275
294
  // Miss — refresh registry and try once more
276
295
  await this.refreshRepos();
277
296
  const retried = this.resolveRepoFromCache(repoParam);
278
- if (retried)
297
+ if (retried) {
298
+ this.maybeWarnSiblingDrift(retried).catch(() => { });
279
299
  return retried;
300
+ }
280
301
  // Still no match — throw with helpful message
281
302
  if (this.repos.size === 0) {
282
303
  throw new Error('No indexed repositories. Run: gitnexus analyze');
@@ -406,16 +427,113 @@ export class LocalBackend {
406
427
  * List all registered repos with their metadata.
407
428
  * Re-reads the global registry so newly indexed repos are discovered
408
429
  * without restarting the MCP server.
430
+ *
431
+ * Each entry includes:
432
+ * - `staleness`: if the indexed clone's own HEAD has moved past
433
+ * the recorded `lastCommit` (option D in the issue's fix list).
434
+ * - `siblings`: other registered entries sharing the same
435
+ * `remoteUrl` (option B's payoff: callers can see at a glance
436
+ * that another clone of the same logical repo is registered).
437
+ * - `remoteUrl`: the canonical origin URL recorded at index time.
409
438
  */
410
439
  async listRepos() {
411
440
  await this.refreshRepos();
412
- return [...this.repos.values()].map((h) => ({
413
- name: h.name,
414
- path: h.repoPath,
415
- indexedAt: h.indexedAt,
416
- lastCommit: h.lastCommit,
417
- stats: h.stats,
418
- }));
441
+ const handles = [...this.repos.values()];
442
+ // Pre-group registered handles by `remoteUrl` so the sibling
443
+ // lookup is O(1) per handle. We reuse the in-memory `this.repos`
444
+ // (already populated by `refreshRepos`) instead of doing a fresh
445
+ // `readRegistry()` per entry — that would be N file reads for N
446
+ // registered repos.
447
+ const isWin = process.platform === 'win32';
448
+ const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
449
+ const byRemote = new Map();
450
+ for (const h of handles) {
451
+ if (!h.remoteUrl)
452
+ continue;
453
+ const list = byRemote.get(h.remoteUrl) ?? [];
454
+ list.push(h);
455
+ byRemote.set(h.remoteUrl, list);
456
+ }
457
+ return handles.map((h) => {
458
+ const stale = checkStaleness(h.repoPath, h.lastCommit);
459
+ const selfNorm = norm(h.repoPath);
460
+ const siblings = h.remoteUrl
461
+ ? (byRemote.get(h.remoteUrl) ?? []).filter((e) => norm(e.repoPath) !== selfNorm)
462
+ : [];
463
+ return {
464
+ name: h.name,
465
+ path: h.repoPath,
466
+ indexedAt: h.indexedAt,
467
+ lastCommit: h.lastCommit,
468
+ remoteUrl: h.remoteUrl,
469
+ stats: h.stats,
470
+ staleness: stale.isStale
471
+ ? { commitsBehind: stale.commitsBehind, hint: stale.hint }
472
+ : undefined,
473
+ siblings: siblings.length > 0
474
+ ? siblings.map((s) => ({
475
+ name: s.name,
476
+ path: s.repoPath,
477
+ lastCommit: s.lastCommit,
478
+ }))
479
+ : undefined,
480
+ };
481
+ });
482
+ }
483
+ /**
484
+ * Best-effort sibling-clone drift warning.
485
+ *
486
+ * When the resolved index has a `remoteUrl` recorded and the caller's
487
+ * `process.cwd()` is inside a *different* clone of the same repo, emit
488
+ * one stderr line per (repo, cwd) pair so the operator knows the
489
+ * graph may be stale relative to what's actually on disk under their
490
+ * cwd. Silent on path matches and on repos without a remote URL.
491
+ *
492
+ * Limitation: in MCP stdio server mode `process.cwd()` is the
493
+ * server's CWD at start time, *not* the agent client's CWD. The
494
+ * warning therefore only fires when the MCP server itself was
495
+ * launched from inside a sibling clone (typical for `npx gitnexus
496
+ * serve` from a polecat workspace). Surfacing the client's CWD
497
+ * would require a per-tool-call `cwd` parameter — out of scope for
498
+ * the current MCP contract.
499
+ *
500
+ * Pure side-effect (stderr); never affects the returned handle.
501
+ * After the first computation for a given (repo, cwd) pair the
502
+ * result is cached so subsequent `resolveRepo()` calls don't
503
+ * re-shell-out to git.
504
+ */
505
+ async maybeWarnSiblingDrift(handle) {
506
+ if (!handle.remoteUrl)
507
+ return;
508
+ let cwd;
509
+ try {
510
+ cwd = process.cwd();
511
+ }
512
+ catch {
513
+ return;
514
+ }
515
+ // Early-exit cache: keyed on (repo, cwd) BEFORE any git shellout.
516
+ // After the first call for a given cwd, this short-circuits the
517
+ // up-to-four `execSync`/`execFileSync` calls inside `checkCwdMatch`
518
+ // — important for MCP-server mode where `process.cwd()` is constant
519
+ // and `resolveRepo` runs on every tool call.
520
+ const cacheKey = `${handle.id}|${cwd}`;
521
+ if (this.warnedSiblingDrift.has(cacheKey))
522
+ return;
523
+ const match = await checkCwdMatch(cwd);
524
+ if (match.match !== 'sibling-by-remote' ||
525
+ !match.entry ||
526
+ !match.cwdGitRoot ||
527
+ match.entry.path !== handle.repoPath ||
528
+ !match.hint) {
529
+ // Cache "nothing to warn about" outcomes too — `checkCwdMatch`
530
+ // is deterministic for a fixed (registry, cwd) pair, so re-running
531
+ // it yields nothing new.
532
+ this.warnedSiblingDrift.add(cacheKey);
533
+ return;
534
+ }
535
+ this.warnedSiblingDrift.add(cacheKey);
536
+ console.error(`GitNexus: ${match.hint}`);
419
537
  }
420
538
  // ─── Tool Dispatch ───────────────────────────────────────────────
421
539
  async callTool(method, params) {
@@ -1,5 +1,29 @@
1
1
  export declare const isGitRepo: (repoPath: string) => boolean;
2
2
  export declare const getCurrentCommit: (repoPath: string) => string;
3
+ /**
4
+ * Get a stable canonical identifier for the repo's `origin` remote, if any.
5
+ *
6
+ * Used to fingerprint two on-disk clones as the same logical repository
7
+ * (issue #XXX — silent graph drift across sibling clones). `path` alone
8
+ * is unreliable: worktrees, "clean clone for indexing" hygiene, and
9
+ * multi-agent workspaces routinely have the same repo at multiple
10
+ * absolute paths. The remote URL is the only on-disk signal that
11
+ * survives those conventions.
12
+ *
13
+ * Normalisation strategy:
14
+ * - Strip a trailing `.git` so `https://x/y` and `https://x/y.git` collapse.
15
+ * - Strip a trailing `/` for the same reason.
16
+ * - `git@github.com:foo/bar` and `https://github.com/foo/bar` are
17
+ * intentionally NOT collapsed — they are different remotes from
18
+ * git's perspective and we don't want to assert equivalence.
19
+ * - Lower-case the host portion so `GitHub.com` and `github.com`
20
+ * don't desync; preserves case in path because some hosts
21
+ * (Bitbucket Server) treat repo paths case-sensitively.
22
+ *
23
+ * Returns `undefined` when there is no origin remote, the directory
24
+ * isn't a git repo, or git itself isn't available.
25
+ */
26
+ export declare const getRemoteUrl: (repoPath: string) => string | undefined;
3
27
  /**
4
28
  * Find the git repository root from any path inside the repo
5
29
  */
@@ -19,6 +19,65 @@ export const getCurrentCommit = (repoPath) => {
19
19
  return '';
20
20
  }
21
21
  };
22
+ /**
23
+ * Get a stable canonical identifier for the repo's `origin` remote, if any.
24
+ *
25
+ * Used to fingerprint two on-disk clones as the same logical repository
26
+ * (issue #XXX — silent graph drift across sibling clones). `path` alone
27
+ * is unreliable: worktrees, "clean clone for indexing" hygiene, and
28
+ * multi-agent workspaces routinely have the same repo at multiple
29
+ * absolute paths. The remote URL is the only on-disk signal that
30
+ * survives those conventions.
31
+ *
32
+ * Normalisation strategy:
33
+ * - Strip a trailing `.git` so `https://x/y` and `https://x/y.git` collapse.
34
+ * - Strip a trailing `/` for the same reason.
35
+ * - `git@github.com:foo/bar` and `https://github.com/foo/bar` are
36
+ * intentionally NOT collapsed — they are different remotes from
37
+ * git's perspective and we don't want to assert equivalence.
38
+ * - Lower-case the host portion so `GitHub.com` and `github.com`
39
+ * don't desync; preserves case in path because some hosts
40
+ * (Bitbucket Server) treat repo paths case-sensitively.
41
+ *
42
+ * Returns `undefined` when there is no origin remote, the directory
43
+ * isn't a git repo, or git itself isn't available.
44
+ */
45
+ export const getRemoteUrl = (repoPath) => {
46
+ let raw;
47
+ try {
48
+ raw = execSync('git config --get remote.origin.url', {
49
+ cwd: repoPath,
50
+ stdio: ['ignore', 'pipe', 'ignore'],
51
+ })
52
+ .toString()
53
+ .trim();
54
+ }
55
+ catch {
56
+ return undefined;
57
+ }
58
+ if (!raw)
59
+ return undefined;
60
+ let normalised = raw.replace(/\/$/, '').replace(/\.git$/, '');
61
+ // Lower-case the host segment of `scheme://[user@]host[:port]/...`
62
+ // and the host segment of `git@host:owner/repo` SCP form.
63
+ // SSH user-segment regex deliberately accepts the common
64
+ // `git@`/`<alnum>-_@` cases. Less common usernames (e.g. with
65
+ // dots) fall through to the URL-form branch — they will simply
66
+ // not get host-case normalisation, which is acceptable: the raw
67
+ // `git config` output is still a valid fingerprint, just slightly
68
+ // less collapsible across host casings.
69
+ const sshMatch = normalised.match(/^(git@|[a-zA-Z0-9_-]+@)([^:/]+)(:.+)$/);
70
+ if (sshMatch) {
71
+ normalised = `${sshMatch[1]}${sshMatch[2].toLowerCase()}${sshMatch[3]}`;
72
+ }
73
+ else {
74
+ const urlMatch = normalised.match(/^([a-zA-Z][a-zA-Z0-9+.-]*:\/\/)([^/]+)(\/.*)?$/);
75
+ if (urlMatch) {
76
+ normalised = `${urlMatch[1]}${urlMatch[2].toLowerCase()}${urlMatch[3] ?? ''}`;
77
+ }
78
+ }
79
+ return normalised;
80
+ };
22
81
  /**
23
82
  * Find the git repository root from any path inside the repo
24
83
  */
@@ -40,6 +40,14 @@ export interface RepoMeta {
40
40
  repoPath: string;
41
41
  lastCommit: string;
42
42
  indexedAt: string;
43
+ /**
44
+ * Canonical `origin` remote URL captured at index time. Used to
45
+ * fingerprint the same logical repo across multiple on-disk clones
46
+ * (worktrees, agent workspaces, "clean clone for indexing"). When
47
+ * absent (no remote configured, git unavailable, etc.) the repo is
48
+ * treated as path-only and sibling-clone detection is skipped.
49
+ */
50
+ remoteUrl?: string;
43
51
  stats?: {
44
52
  files?: number;
45
53
  nodes?: number;
@@ -65,6 +73,8 @@ export interface RegistryEntry {
65
73
  storagePath: string;
66
74
  indexedAt: string;
67
75
  lastCommit: string;
76
+ /** See {@link RepoMeta.remoteUrl}. Mirrored from meta at register time. */
77
+ remoteUrl?: string;
68
78
  stats?: RepoMeta['stats'];
69
79
  }
70
80
  /**
@@ -344,3 +354,36 @@ export declare const loadCLIConfig: () => Promise<CLIConfig>;
344
354
  * Save CLI config to ~/.gitnexus/config.json
345
355
  */
346
356
  export declare const saveCLIConfig: (config: CLIConfig) => Promise<void>;
357
+ /**
358
+ * Find other registered entries whose `remoteUrl` matches the given
359
+ * one, excluding `selfPath` (case-insensitive on Windows). Entries
360
+ * without a `remoteUrl` are ignored — we cannot prove sibling-ness
361
+ * without a fingerprint.
362
+ */
363
+ export declare const findSiblingClones: (remoteUrl: string | undefined, selfPath: string) => Promise<RegistryEntry[]>;
364
+ /**
365
+ * Description of how a working directory relates to a registered index.
366
+ *
367
+ * `match` semantics:
368
+ * - `path` — `cwd` is inside the registered entry's path.
369
+ * - `sibling-by-remote` — `cwd` is in a different on-disk clone of the
370
+ * same repo (same `remoteUrl`).
371
+ * - `none` — no relationship found.
372
+ */
373
+ export interface CwdMatch {
374
+ match: 'path' | 'sibling-by-remote' | 'none';
375
+ entry?: RegistryEntry;
376
+ /** The git toplevel of `cwd`, when `cwd` is inside a git work tree. */
377
+ cwdGitRoot?: string;
378
+ /** HEAD of the cwd's clone, when resolvable. */
379
+ cwdHead?: string;
380
+ /**
381
+ * Number of commits the registered `lastCommit` is behind the
382
+ * sibling-clone HEAD, when both refs are known to the cwd's clone.
383
+ * `undefined` when the comparison cannot be performed (e.g. the
384
+ * indexed commit isn't reachable from cwd).
385
+ */
386
+ drift?: number;
387
+ /** Human-readable hint, set whenever the situation warrants warning. */
388
+ hint?: string;
389
+ }
@@ -379,6 +379,7 @@ export const registerRepo = async (repoPath, meta, opts) => {
379
379
  storagePath,
380
380
  indexedAt: meta.indexedAt,
381
381
  lastCommit: meta.lastCommit,
382
+ remoteUrl: meta.remoteUrl,
382
383
  stats: meta.stats,
383
384
  };
384
385
  if (existingIdx >= 0) {
@@ -643,3 +644,38 @@ export const saveCLIConfig = async (config) => {
643
644
  }
644
645
  }
645
646
  };
647
+ // ─── Sibling-clone detection ─────────────────────────────────────────────
648
+ //
649
+ // A "sibling clone" is a different on-disk path that points at the same
650
+ // logical repository (same `origin` remote URL) as a registered index.
651
+ // This shows up in three operationally important shapes (see issue):
652
+ //
653
+ // 1. The same repo is checked out under multiple paths (worktrees,
654
+ // multi-agent workspaces). Only one is indexed; the others silently
655
+ // diverge from the graph.
656
+ // 2. The indexed clone is itself behind its own HEAD (the existing
657
+ // `checkStaleness` already handles this case).
658
+ // 3. A query is issued from a `cwd` that lives inside a sibling clone
659
+ // whose HEAD has drifted from the indexed `lastCommit`.
660
+ //
661
+ // Detection is intentionally remote-URL-based and does NOT walk the
662
+ // filesystem hunting for unregistered clones — only registered entries
663
+ // are considered. The `cwd`-driven branch ({@link checkSiblingDrift})
664
+ // also accepts an unregistered cwd, because the live caller's working
665
+ // directory is the one place we can cheaply learn about an
666
+ // unregistered clone.
667
+ /**
668
+ * Find other registered entries whose `remoteUrl` matches the given
669
+ * one, excluding `selfPath` (case-insensitive on Windows). Entries
670
+ * without a `remoteUrl` are ignored — we cannot prove sibling-ness
671
+ * without a fingerprint.
672
+ */
673
+ export const findSiblingClones = async (remoteUrl, selfPath) => {
674
+ if (!remoteUrl)
675
+ return [];
676
+ const entries = await readRegistry();
677
+ const isWin = process.platform === 'win32';
678
+ const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
679
+ const self = norm(selfPath);
680
+ return entries.filter((e) => e.remoteUrl === remoteUrl && norm(e.path) !== self);
681
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.3-rc.33",
3
+ "version": "1.6.3-rc.34",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",