gitnexus 1.6.3-rc.33 → 1.6.3-rc.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index-repo.js +8 -1
- package/dist/core/git-staleness.d.ts +18 -0
- package/dist/core/git-staleness.js +108 -0
- package/dist/core/run-analyze.js +8 -1
- package/dist/mcp/local/local-backend.d.ts +49 -0
- package/dist/mcp/local/local-backend.js +127 -9
- package/dist/storage/git.d.ts +24 -0
- package/dist/storage/git.js +59 -0
- package/dist/storage/repo-manager.d.ts +43 -0
- package/dist/storage/repo-manager.js +36 -0
- package/package.json +1 -1
package/dist/cli/index-repo.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import path from 'path';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
13
|
import { getStoragePaths, loadMeta, addToGitignore, registerRepo, } from '../storage/repo-manager.js';
|
|
14
|
-
import { getGitRoot, isGitRepo } from '../storage/git.js';
|
|
14
|
+
import { getGitRoot, getRemoteUrl, isGitRepo } from '../storage/git.js';
|
|
15
15
|
export const indexCommand = async (inputPathParts, options) => {
|
|
16
16
|
console.log('\n GitNexus Index\n');
|
|
17
17
|
const inputPath = inputPathParts?.length ? inputPathParts.join(' ') : undefined;
|
|
@@ -88,6 +88,13 @@ export const indexCommand = async (inputPathParts, options) => {
|
|
|
88
88
|
};
|
|
89
89
|
}
|
|
90
90
|
// ── Register in global registry ───────────────────────────────────
|
|
91
|
+
// Refresh the on-disk meta with a freshly captured `remoteUrl` if
|
|
92
|
+
// it's missing, so an `index` of an older `.gitnexus/` still gets
|
|
93
|
+
// sibling-clone fingerprinting on subsequent use without forcing a
|
|
94
|
+
// full re-analyze.
|
|
95
|
+
if (!meta.remoteUrl && isGitRepo(repoPath)) {
|
|
96
|
+
meta.remoteUrl = getRemoteUrl(repoPath);
|
|
97
|
+
}
|
|
91
98
|
await registerRepo(repoPath, meta);
|
|
92
99
|
await addToGitignore(repoPath);
|
|
93
100
|
const projectName = path.basename(repoPath);
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Git working tree vs index commit staleness (used by MCP resources, group status, etc.).
|
|
3
3
|
* Lives in core/ so application code does not depend on the MCP package layer.
|
|
4
4
|
*/
|
|
5
|
+
import { type CwdMatch } from '../storage/repo-manager.js';
|
|
5
6
|
export interface StalenessInfo {
|
|
6
7
|
isStale: boolean;
|
|
7
8
|
commitsBehind: number;
|
|
@@ -11,3 +12,20 @@ export interface StalenessInfo {
|
|
|
11
12
|
* Check how many commits the index is behind HEAD (synchronous; uses git CLI).
|
|
12
13
|
*/
|
|
13
14
|
export declare function checkStaleness(repoPath: string, lastCommit: string): StalenessInfo;
|
|
15
|
+
/**
|
|
16
|
+
* Resolve a working directory against the global registry. Returns:
|
|
17
|
+
* - `match: 'path'` when `cwd` is inside a registered entry's path
|
|
18
|
+
* - `match: 'sibling-by-remote'` when `cwd` lives in a different on-disk clone
|
|
19
|
+
* of the same repo (same `remoteUrl`)
|
|
20
|
+
* - `match: 'none'` when neither match applies
|
|
21
|
+
*
|
|
22
|
+
* For sibling-by-remote matches, the caller's HEAD and the drift vs the
|
|
23
|
+
* indexed `lastCommit` are also returned so the MCP layer can warn
|
|
24
|
+
* before serving silently-stale answers (issue: silent graph drift
|
|
25
|
+
* across sibling clones).
|
|
26
|
+
*
|
|
27
|
+
* `path` matches deliberately use the longest-prefix rule so a cwd
|
|
28
|
+
* inside a sub-path of a registered repo still matches that repo, not
|
|
29
|
+
* a coincidentally-aliased shorter entry.
|
|
30
|
+
*/
|
|
31
|
+
export declare function checkCwdMatch(cwd: string): Promise<CwdMatch>;
|
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
* Lives in core/ so application code does not depend on the MCP package layer.
|
|
4
4
|
*/
|
|
5
5
|
import { execFileSync } from 'node:child_process';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import { readRegistry } from '../storage/repo-manager.js';
|
|
8
|
+
import { getGitRoot, getCurrentCommit, getRemoteUrl } from '../storage/git.js';
|
|
6
9
|
/**
|
|
7
10
|
* Check how many commits the index is behind HEAD (synchronous; uses git CLI).
|
|
8
11
|
*/
|
|
@@ -27,3 +30,108 @@ export function checkStaleness(repoPath, lastCommit) {
|
|
|
27
30
|
return { isStale: false, commitsBehind: 0 };
|
|
28
31
|
}
|
|
29
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Compare a sibling-clone HEAD against an indexed `lastCommit`. Returns
|
|
35
|
+
* `undefined` when the indexed commit is not reachable from the sibling
|
|
36
|
+
* (e.g. divergent branches, shallow clone, missing ref). The caller
|
|
37
|
+
* should treat `undefined` as "drift unknown" rather than "no drift".
|
|
38
|
+
*/
|
|
39
|
+
function commitsAheadOfIndexed(siblingPath, indexedCommit) {
|
|
40
|
+
if (!indexedCommit)
|
|
41
|
+
return undefined;
|
|
42
|
+
try {
|
|
43
|
+
const result = execFileSync('git', ['rev-list', '--count', `${indexedCommit}..HEAD`], {
|
|
44
|
+
cwd: siblingPath,
|
|
45
|
+
encoding: 'utf-8',
|
|
46
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
47
|
+
}).trim();
|
|
48
|
+
return parseInt(result, 10) || 0;
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Resolve a working directory against the global registry. Returns:
|
|
56
|
+
* - `match: 'path'` when `cwd` is inside a registered entry's path
|
|
57
|
+
* - `match: 'sibling-by-remote'` when `cwd` lives in a different on-disk clone
|
|
58
|
+
* of the same repo (same `remoteUrl`)
|
|
59
|
+
* - `match: 'none'` when neither match applies
|
|
60
|
+
*
|
|
61
|
+
* For sibling-by-remote matches, the caller's HEAD and the drift vs the
|
|
62
|
+
* indexed `lastCommit` are also returned so the MCP layer can warn
|
|
63
|
+
* before serving silently-stale answers (issue: silent graph drift
|
|
64
|
+
* across sibling clones).
|
|
65
|
+
*
|
|
66
|
+
* `path` matches deliberately use the longest-prefix rule so a cwd
|
|
67
|
+
* inside a sub-path of a registered repo still matches that repo, not
|
|
68
|
+
* a coincidentally-aliased shorter entry.
|
|
69
|
+
*/
|
|
70
|
+
export async function checkCwdMatch(cwd) {
|
|
71
|
+
const entries = await readRegistry();
|
|
72
|
+
if (entries.length === 0)
|
|
73
|
+
return { match: 'none' };
|
|
74
|
+
const isWin = process.platform === 'win32';
|
|
75
|
+
const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
|
|
76
|
+
const sep = path.sep;
|
|
77
|
+
const cwdResolved = path.resolve(cwd);
|
|
78
|
+
const cwdNorm = norm(cwdResolved);
|
|
79
|
+
// 1) Path-based match (longest prefix wins, boundary-safe).
|
|
80
|
+
let bestPath;
|
|
81
|
+
let bestLen = -1;
|
|
82
|
+
for (const e of entries) {
|
|
83
|
+
const p = norm(e.path);
|
|
84
|
+
if (cwdNorm === p || cwdNorm.startsWith(p + sep)) {
|
|
85
|
+
if (p.length > bestLen) {
|
|
86
|
+
bestPath = e;
|
|
87
|
+
bestLen = p.length;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (bestPath)
|
|
92
|
+
return { match: 'path', entry: bestPath };
|
|
93
|
+
// 2) Sibling-by-remote: locate the cwd's git root, get its remote
|
|
94
|
+
// URL, and look for any registered entry with the same fingerprint.
|
|
95
|
+
const cwdGitRoot = getGitRoot(cwdResolved);
|
|
96
|
+
if (!cwdGitRoot)
|
|
97
|
+
return { match: 'none' };
|
|
98
|
+
const cwdRemote = getRemoteUrl(cwdGitRoot);
|
|
99
|
+
if (!cwdRemote)
|
|
100
|
+
return { match: 'none' };
|
|
101
|
+
const sibling = entries.find((e) => e.remoteUrl === cwdRemote && norm(e.path) !== norm(cwdGitRoot));
|
|
102
|
+
if (!sibling)
|
|
103
|
+
return { match: 'none' };
|
|
104
|
+
const cwdHead = getCurrentCommit(cwdGitRoot) || undefined;
|
|
105
|
+
const drift = commitsAheadOfIndexed(cwdGitRoot, sibling.lastCommit);
|
|
106
|
+
// Same commit on both clones → still report match=sibling-by-remote
|
|
107
|
+
// (the relationship is real and useful to callers like list_repos /
|
|
108
|
+
// future tooling) but leave `hint` unset: there's nothing to warn
|
|
109
|
+
// about, and `maybeWarnSiblingDrift` already short-circuits this
|
|
110
|
+
// case independently. Surfacing a no-op hint would force callers
|
|
111
|
+
// to second-guess whether they need to display it.
|
|
112
|
+
let hint;
|
|
113
|
+
if (cwdHead && cwdHead === sibling.lastCommit) {
|
|
114
|
+
hint = undefined;
|
|
115
|
+
}
|
|
116
|
+
else if (drift && drift > 0) {
|
|
117
|
+
hint =
|
|
118
|
+
`⚠️ Index for "${sibling.name}" was built at ${sibling.path}; ` +
|
|
119
|
+
`your cwd (${cwdGitRoot}) is a sibling clone that is ${drift} commit${drift > 1 ? 's' : ''} ` +
|
|
120
|
+
`ahead of the indexed commit. Results may be stale or incorrect — re-run \`gitnexus analyze\` ` +
|
|
121
|
+
`to refresh the index.`;
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
hint =
|
|
125
|
+
`⚠️ Index for "${sibling.name}" was built at ${sibling.path}; ` +
|
|
126
|
+
`your cwd (${cwdGitRoot}) is a sibling clone whose HEAD differs from the indexed commit. ` +
|
|
127
|
+
`Results may be stale or incorrect — re-run \`gitnexus analyze\` to refresh the index.`;
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
match: 'sibling-by-remote',
|
|
131
|
+
entry: sibling,
|
|
132
|
+
cwdGitRoot,
|
|
133
|
+
cwdHead,
|
|
134
|
+
drift,
|
|
135
|
+
hint,
|
|
136
|
+
};
|
|
137
|
+
}
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -13,7 +13,7 @@ import fs from 'fs/promises';
|
|
|
13
13
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
14
14
|
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
|
|
15
15
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
|
|
16
|
-
import { getCurrentCommit, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
16
|
+
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
17
17
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
18
18
|
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
19
19
|
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
@@ -203,6 +203,13 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
203
203
|
repoPath,
|
|
204
204
|
lastCommit: currentCommit,
|
|
205
205
|
indexedAt: new Date().toISOString(),
|
|
206
|
+
// Captured here (not at registration) so it travels with the
|
|
207
|
+
// on-disk meta.json — sibling-clone fingerprinting works for
|
|
208
|
+
// out-of-tree consumers (group-status, future tooling) without
|
|
209
|
+
// a second git shellout. `undefined` when the repo has no
|
|
210
|
+
// origin remote, which is fine: paths-only repos behave as
|
|
211
|
+
// before.
|
|
212
|
+
remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : undefined,
|
|
206
213
|
stats: {
|
|
207
214
|
files: pipelineResult.totalFileCount,
|
|
208
215
|
nodes: stats.nodes,
|
|
@@ -56,6 +56,7 @@ interface RepoHandle {
|
|
|
56
56
|
lbugPath: string;
|
|
57
57
|
indexedAt: string;
|
|
58
58
|
lastCommit: string;
|
|
59
|
+
remoteUrl?: string;
|
|
59
60
|
stats?: RegistryEntry['stats'];
|
|
60
61
|
}
|
|
61
62
|
export declare class LocalBackend {
|
|
@@ -65,6 +66,13 @@ export declare class LocalBackend {
|
|
|
65
66
|
private reinitPromises;
|
|
66
67
|
private lastStalenessCheck;
|
|
67
68
|
private groupToolSvc;
|
|
69
|
+
/**
|
|
70
|
+
* One-shot stderr warnings for sibling-clone drift, keyed by
|
|
71
|
+
* `${repoId}|${cwdGitRoot}`. Without this guard every tool call
|
|
72
|
+
* from inside a sibling clone would print the same warning,
|
|
73
|
+
* making MCP stderr unreadable.
|
|
74
|
+
*/
|
|
75
|
+
private warnedSiblingDrift;
|
|
68
76
|
/**
|
|
69
77
|
* Cross-repo group tools (CLI). Shares logic with MCP `group_*` handlers.
|
|
70
78
|
*/
|
|
@@ -110,14 +118,55 @@ export declare class LocalBackend {
|
|
|
110
118
|
* List all registered repos with their metadata.
|
|
111
119
|
* Re-reads the global registry so newly indexed repos are discovered
|
|
112
120
|
* without restarting the MCP server.
|
|
121
|
+
*
|
|
122
|
+
* Each entry includes:
|
|
123
|
+
* - `staleness`: if the indexed clone's own HEAD has moved past
|
|
124
|
+
* the recorded `lastCommit` (option D in the issue's fix list).
|
|
125
|
+
* - `siblings`: other registered entries sharing the same
|
|
126
|
+
* `remoteUrl` (option B's payoff: callers can see at a glance
|
|
127
|
+
* that another clone of the same logical repo is registered).
|
|
128
|
+
* - `remoteUrl`: the canonical origin URL recorded at index time.
|
|
113
129
|
*/
|
|
114
130
|
listRepos(): Promise<Array<{
|
|
115
131
|
name: string;
|
|
116
132
|
path: string;
|
|
117
133
|
indexedAt: string;
|
|
118
134
|
lastCommit: string;
|
|
135
|
+
remoteUrl?: string;
|
|
119
136
|
stats?: any;
|
|
137
|
+
staleness?: {
|
|
138
|
+
commitsBehind: number;
|
|
139
|
+
hint?: string;
|
|
140
|
+
};
|
|
141
|
+
siblings?: Array<{
|
|
142
|
+
name: string;
|
|
143
|
+
path: string;
|
|
144
|
+
lastCommit: string;
|
|
145
|
+
}>;
|
|
120
146
|
}>>;
|
|
147
|
+
/**
|
|
148
|
+
* Best-effort sibling-clone drift warning.
|
|
149
|
+
*
|
|
150
|
+
* When the resolved index has a `remoteUrl` recorded and the caller's
|
|
151
|
+
* `process.cwd()` is inside a *different* clone of the same repo, emit
|
|
152
|
+
* one stderr line per (repo, cwd) pair so the operator knows the
|
|
153
|
+
* graph may be stale relative to what's actually on disk under their
|
|
154
|
+
* cwd. Silent on path matches and on repos without a remote URL.
|
|
155
|
+
*
|
|
156
|
+
* Limitation: in MCP stdio server mode `process.cwd()` is the
|
|
157
|
+
* server's CWD at start time, *not* the agent client's CWD. The
|
|
158
|
+
* warning therefore only fires when the MCP server itself was
|
|
159
|
+
* launched from inside a sibling clone (typical for `npx gitnexus
|
|
160
|
+
* serve` from a polecat workspace). Surfacing the client's CWD
|
|
161
|
+
* would require a per-tool-call `cwd` parameter — out of scope for
|
|
162
|
+
* the current MCP contract.
|
|
163
|
+
*
|
|
164
|
+
* Pure side-effect (stderr); never affects the returned handle.
|
|
165
|
+
* After the first computation for a given (repo, cwd) pair the
|
|
166
|
+
* result is cached so subsequent `resolveRepo()` calls don't
|
|
167
|
+
* re-shell-out to git.
|
|
168
|
+
*/
|
|
169
|
+
private maybeWarnSiblingDrift;
|
|
121
170
|
callTool(method: string, params: any): Promise<any>;
|
|
122
171
|
/**
|
|
123
172
|
* Query tool — process-grouped search.
|
|
@@ -20,6 +20,7 @@ import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member
|
|
|
20
20
|
import { collectBestChunks } from '../../core/embeddings/types.js';
|
|
21
21
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
|
|
22
22
|
import { PhaseTimer } from '../../core/search/phase-timer.js';
|
|
23
|
+
import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
|
|
23
24
|
// AI context generation is CLI-only (gitnexus analyze)
|
|
24
25
|
// import { generateAIContextFiles } from '../../cli/ai-context.js';
|
|
25
26
|
/**
|
|
@@ -162,6 +163,13 @@ export class LocalBackend {
|
|
|
162
163
|
reinitPromises = new Map();
|
|
163
164
|
lastStalenessCheck = new Map();
|
|
164
165
|
groupToolSvc = null;
|
|
166
|
+
/**
|
|
167
|
+
* One-shot stderr warnings for sibling-clone drift, keyed by
|
|
168
|
+
* `${repoId}|${cwdGitRoot}`. Without this guard every tool call
|
|
169
|
+
* from inside a sibling clone would print the same warning,
|
|
170
|
+
* making MCP stderr unreadable.
|
|
171
|
+
*/
|
|
172
|
+
warnedSiblingDrift = new Set();
|
|
165
173
|
/**
|
|
166
174
|
* Cross-repo group tools (CLI). Shares logic with MCP `group_*` handlers.
|
|
167
175
|
*/
|
|
@@ -218,6 +226,7 @@ export class LocalBackend {
|
|
|
218
226
|
lbugPath,
|
|
219
227
|
indexedAt: entry.indexedAt,
|
|
220
228
|
lastCommit: entry.lastCommit,
|
|
229
|
+
remoteUrl: entry.remoteUrl,
|
|
221
230
|
stats: entry.stats,
|
|
222
231
|
};
|
|
223
232
|
this.repos.set(id, handle);
|
|
@@ -270,13 +279,25 @@ export class LocalBackend {
|
|
|
270
279
|
*/
|
|
271
280
|
async resolveRepo(repoParam) {
|
|
272
281
|
const result = this.resolveRepoFromCache(repoParam);
|
|
273
|
-
if (result)
|
|
282
|
+
if (result) {
|
|
283
|
+
// Issue: silent graph drift across sibling clones.
|
|
284
|
+
// If the caller's cwd lives in a *different* on-disk clone of
|
|
285
|
+
// the same repo (matched by `remoteUrl`), warn once per
|
|
286
|
+
// (repo, cwd) pair on stderr. We do not fail or refuse to
|
|
287
|
+
// serve — the index is still the best answer we have — but
|
|
288
|
+
// the operator/agent has to know the answer may be stale.
|
|
289
|
+
this.maybeWarnSiblingDrift(result).catch(() => {
|
|
290
|
+
/* best-effort; never throw from resolveRepo */
|
|
291
|
+
});
|
|
274
292
|
return result;
|
|
293
|
+
}
|
|
275
294
|
// Miss — refresh registry and try once more
|
|
276
295
|
await this.refreshRepos();
|
|
277
296
|
const retried = this.resolveRepoFromCache(repoParam);
|
|
278
|
-
if (retried)
|
|
297
|
+
if (retried) {
|
|
298
|
+
this.maybeWarnSiblingDrift(retried).catch(() => { });
|
|
279
299
|
return retried;
|
|
300
|
+
}
|
|
280
301
|
// Still no match — throw with helpful message
|
|
281
302
|
if (this.repos.size === 0) {
|
|
282
303
|
throw new Error('No indexed repositories. Run: gitnexus analyze');
|
|
@@ -406,16 +427,113 @@ export class LocalBackend {
|
|
|
406
427
|
* List all registered repos with their metadata.
|
|
407
428
|
* Re-reads the global registry so newly indexed repos are discovered
|
|
408
429
|
* without restarting the MCP server.
|
|
430
|
+
*
|
|
431
|
+
* Each entry includes:
|
|
432
|
+
* - `staleness`: if the indexed clone's own HEAD has moved past
|
|
433
|
+
* the recorded `lastCommit` (option D in the issue's fix list).
|
|
434
|
+
* - `siblings`: other registered entries sharing the same
|
|
435
|
+
* `remoteUrl` (option B's payoff: callers can see at a glance
|
|
436
|
+
* that another clone of the same logical repo is registered).
|
|
437
|
+
* - `remoteUrl`: the canonical origin URL recorded at index time.
|
|
409
438
|
*/
|
|
410
439
|
async listRepos() {
|
|
411
440
|
await this.refreshRepos();
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
441
|
+
const handles = [...this.repos.values()];
|
|
442
|
+
// Pre-group registered handles by `remoteUrl` so the sibling
|
|
443
|
+
// lookup is O(1) per handle. We reuse the in-memory `this.repos`
|
|
444
|
+
// (already populated by `refreshRepos`) instead of doing a fresh
|
|
445
|
+
// `readRegistry()` per entry — that would be N file reads for N
|
|
446
|
+
// registered repos.
|
|
447
|
+
const isWin = process.platform === 'win32';
|
|
448
|
+
const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
|
|
449
|
+
const byRemote = new Map();
|
|
450
|
+
for (const h of handles) {
|
|
451
|
+
if (!h.remoteUrl)
|
|
452
|
+
continue;
|
|
453
|
+
const list = byRemote.get(h.remoteUrl) ?? [];
|
|
454
|
+
list.push(h);
|
|
455
|
+
byRemote.set(h.remoteUrl, list);
|
|
456
|
+
}
|
|
457
|
+
return handles.map((h) => {
|
|
458
|
+
const stale = checkStaleness(h.repoPath, h.lastCommit);
|
|
459
|
+
const selfNorm = norm(h.repoPath);
|
|
460
|
+
const siblings = h.remoteUrl
|
|
461
|
+
? (byRemote.get(h.remoteUrl) ?? []).filter((e) => norm(e.repoPath) !== selfNorm)
|
|
462
|
+
: [];
|
|
463
|
+
return {
|
|
464
|
+
name: h.name,
|
|
465
|
+
path: h.repoPath,
|
|
466
|
+
indexedAt: h.indexedAt,
|
|
467
|
+
lastCommit: h.lastCommit,
|
|
468
|
+
remoteUrl: h.remoteUrl,
|
|
469
|
+
stats: h.stats,
|
|
470
|
+
staleness: stale.isStale
|
|
471
|
+
? { commitsBehind: stale.commitsBehind, hint: stale.hint }
|
|
472
|
+
: undefined,
|
|
473
|
+
siblings: siblings.length > 0
|
|
474
|
+
? siblings.map((s) => ({
|
|
475
|
+
name: s.name,
|
|
476
|
+
path: s.repoPath,
|
|
477
|
+
lastCommit: s.lastCommit,
|
|
478
|
+
}))
|
|
479
|
+
: undefined,
|
|
480
|
+
};
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
/**
|
|
484
|
+
* Best-effort sibling-clone drift warning.
|
|
485
|
+
*
|
|
486
|
+
* When the resolved index has a `remoteUrl` recorded and the caller's
|
|
487
|
+
* `process.cwd()` is inside a *different* clone of the same repo, emit
|
|
488
|
+
* one stderr line per (repo, cwd) pair so the operator knows the
|
|
489
|
+
* graph may be stale relative to what's actually on disk under their
|
|
490
|
+
* cwd. Silent on path matches and on repos without a remote URL.
|
|
491
|
+
*
|
|
492
|
+
* Limitation: in MCP stdio server mode `process.cwd()` is the
|
|
493
|
+
* server's CWD at start time, *not* the agent client's CWD. The
|
|
494
|
+
* warning therefore only fires when the MCP server itself was
|
|
495
|
+
* launched from inside a sibling clone (typical for `npx gitnexus
|
|
496
|
+
* serve` from a polecat workspace). Surfacing the client's CWD
|
|
497
|
+
* would require a per-tool-call `cwd` parameter — out of scope for
|
|
498
|
+
* the current MCP contract.
|
|
499
|
+
*
|
|
500
|
+
* Pure side-effect (stderr); never affects the returned handle.
|
|
501
|
+
* After the first computation for a given (repo, cwd) pair the
|
|
502
|
+
* result is cached so subsequent `resolveRepo()` calls don't
|
|
503
|
+
* re-shell-out to git.
|
|
504
|
+
*/
|
|
505
|
+
async maybeWarnSiblingDrift(handle) {
|
|
506
|
+
if (!handle.remoteUrl)
|
|
507
|
+
return;
|
|
508
|
+
let cwd;
|
|
509
|
+
try {
|
|
510
|
+
cwd = process.cwd();
|
|
511
|
+
}
|
|
512
|
+
catch {
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
// Early-exit cache: keyed on (repo, cwd) BEFORE any git shellout.
|
|
516
|
+
// After the first call for a given cwd, this short-circuits the
|
|
517
|
+
// up-to-four `execSync`/`execFileSync` calls inside `checkCwdMatch`
|
|
518
|
+
// — important for MCP-server mode where `process.cwd()` is constant
|
|
519
|
+
// and `resolveRepo` runs on every tool call.
|
|
520
|
+
const cacheKey = `${handle.id}|${cwd}`;
|
|
521
|
+
if (this.warnedSiblingDrift.has(cacheKey))
|
|
522
|
+
return;
|
|
523
|
+
const match = await checkCwdMatch(cwd);
|
|
524
|
+
if (match.match !== 'sibling-by-remote' ||
|
|
525
|
+
!match.entry ||
|
|
526
|
+
!match.cwdGitRoot ||
|
|
527
|
+
match.entry.path !== handle.repoPath ||
|
|
528
|
+
!match.hint) {
|
|
529
|
+
// Cache "nothing to warn about" outcomes too — `checkCwdMatch`
|
|
530
|
+
// is deterministic for a fixed (registry, cwd) pair, so re-running
|
|
531
|
+
// it yields nothing new.
|
|
532
|
+
this.warnedSiblingDrift.add(cacheKey);
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
this.warnedSiblingDrift.add(cacheKey);
|
|
536
|
+
console.error(`GitNexus: ${match.hint}`);
|
|
419
537
|
}
|
|
420
538
|
// ─── Tool Dispatch ───────────────────────────────────────────────
|
|
421
539
|
async callTool(method, params) {
|
package/dist/storage/git.d.ts
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
export declare const isGitRepo: (repoPath: string) => boolean;
|
|
2
2
|
export declare const getCurrentCommit: (repoPath: string) => string;
|
|
3
|
+
/**
|
|
4
|
+
* Get a stable canonical identifier for the repo's `origin` remote, if any.
|
|
5
|
+
*
|
|
6
|
+
* Used to fingerprint two on-disk clones as the same logical repository
|
|
7
|
+
* (issue #XXX — silent graph drift across sibling clones). `path` alone
|
|
8
|
+
* is unreliable: worktrees, "clean clone for indexing" hygiene, and
|
|
9
|
+
* multi-agent workspaces routinely have the same repo at multiple
|
|
10
|
+
* absolute paths. The remote URL is the only on-disk signal that
|
|
11
|
+
* survives those conventions.
|
|
12
|
+
*
|
|
13
|
+
* Normalisation strategy:
|
|
14
|
+
* - Strip a trailing `.git` so `https://x/y` and `https://x/y.git` collapse.
|
|
15
|
+
* - Strip a trailing `/` for the same reason.
|
|
16
|
+
* - `git@github.com:foo/bar` and `https://github.com/foo/bar` are
|
|
17
|
+
* intentionally NOT collapsed — they are different remotes from
|
|
18
|
+
* git's perspective and we don't want to assert equivalence.
|
|
19
|
+
* - Lower-case the host portion so `GitHub.com` and `github.com`
|
|
20
|
+
* don't desync; preserves case in path because some hosts
|
|
21
|
+
* (Bitbucket Server) treat repo paths case-sensitively.
|
|
22
|
+
*
|
|
23
|
+
* Returns `undefined` when there is no origin remote, the directory
|
|
24
|
+
* isn't a git repo, or git itself isn't available.
|
|
25
|
+
*/
|
|
26
|
+
export declare const getRemoteUrl: (repoPath: string) => string | undefined;
|
|
3
27
|
/**
|
|
4
28
|
* Find the git repository root from any path inside the repo
|
|
5
29
|
*/
|
package/dist/storage/git.js
CHANGED
|
@@ -19,6 +19,65 @@ export const getCurrentCommit = (repoPath) => {
|
|
|
19
19
|
return '';
|
|
20
20
|
}
|
|
21
21
|
};
|
|
22
|
+
/**
|
|
23
|
+
* Get a stable canonical identifier for the repo's `origin` remote, if any.
|
|
24
|
+
*
|
|
25
|
+
* Used to fingerprint two on-disk clones as the same logical repository
|
|
26
|
+
* (issue #XXX — silent graph drift across sibling clones). `path` alone
|
|
27
|
+
* is unreliable: worktrees, "clean clone for indexing" hygiene, and
|
|
28
|
+
* multi-agent workspaces routinely have the same repo at multiple
|
|
29
|
+
* absolute paths. The remote URL is the only on-disk signal that
|
|
30
|
+
* survives those conventions.
|
|
31
|
+
*
|
|
32
|
+
* Normalisation strategy:
|
|
33
|
+
* - Strip a trailing `.git` so `https://x/y` and `https://x/y.git` collapse.
|
|
34
|
+
* - Strip a trailing `/` for the same reason.
|
|
35
|
+
* - `git@github.com:foo/bar` and `https://github.com/foo/bar` are
|
|
36
|
+
* intentionally NOT collapsed — they are different remotes from
|
|
37
|
+
* git's perspective and we don't want to assert equivalence.
|
|
38
|
+
* - Lower-case the host portion so `GitHub.com` and `github.com`
|
|
39
|
+
* don't desync; preserves case in path because some hosts
|
|
40
|
+
* (Bitbucket Server) treat repo paths case-sensitively.
|
|
41
|
+
*
|
|
42
|
+
* Returns `undefined` when there is no origin remote, the directory
|
|
43
|
+
* isn't a git repo, or git itself isn't available.
|
|
44
|
+
*/
|
|
45
|
+
export const getRemoteUrl = (repoPath) => {
|
|
46
|
+
let raw;
|
|
47
|
+
try {
|
|
48
|
+
raw = execSync('git config --get remote.origin.url', {
|
|
49
|
+
cwd: repoPath,
|
|
50
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
51
|
+
})
|
|
52
|
+
.toString()
|
|
53
|
+
.trim();
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return undefined;
|
|
57
|
+
}
|
|
58
|
+
if (!raw)
|
|
59
|
+
return undefined;
|
|
60
|
+
let normalised = raw.replace(/\/$/, '').replace(/\.git$/, '');
|
|
61
|
+
// Lower-case the host segment of `scheme://[user@]host[:port]/...`
|
|
62
|
+
// and the host segment of `git@host:owner/repo` SCP form.
|
|
63
|
+
// SSH user-segment regex deliberately accepts the common
|
|
64
|
+
// `git@`/`<alnum>-_@` cases. Less common usernames (e.g. with
|
|
65
|
+
// dots) fall through to the URL-form branch — they will simply
|
|
66
|
+
// not get host-case normalisation, which is acceptable: the raw
|
|
67
|
+
// `git config` output is still a valid fingerprint, just slightly
|
|
68
|
+
// less collapsible across host casings.
|
|
69
|
+
const sshMatch = normalised.match(/^(git@|[a-zA-Z0-9_-]+@)([^:/]+)(:.+)$/);
|
|
70
|
+
if (sshMatch) {
|
|
71
|
+
normalised = `${sshMatch[1]}${sshMatch[2].toLowerCase()}${sshMatch[3]}`;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
const urlMatch = normalised.match(/^([a-zA-Z][a-zA-Z0-9+.-]*:\/\/)([^/]+)(\/.*)?$/);
|
|
75
|
+
if (urlMatch) {
|
|
76
|
+
normalised = `${urlMatch[1]}${urlMatch[2].toLowerCase()}${urlMatch[3] ?? ''}`;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return normalised;
|
|
80
|
+
};
|
|
22
81
|
/**
|
|
23
82
|
* Find the git repository root from any path inside the repo
|
|
24
83
|
*/
|
|
@@ -40,6 +40,14 @@ export interface RepoMeta {
|
|
|
40
40
|
repoPath: string;
|
|
41
41
|
lastCommit: string;
|
|
42
42
|
indexedAt: string;
|
|
43
|
+
/**
|
|
44
|
+
* Canonical `origin` remote URL captured at index time. Used to
|
|
45
|
+
* fingerprint the same logical repo across multiple on-disk clones
|
|
46
|
+
* (worktrees, agent workspaces, "clean clone for indexing"). When
|
|
47
|
+
* absent (no remote configured, git unavailable, etc.) the repo is
|
|
48
|
+
* treated as path-only and sibling-clone detection is skipped.
|
|
49
|
+
*/
|
|
50
|
+
remoteUrl?: string;
|
|
43
51
|
stats?: {
|
|
44
52
|
files?: number;
|
|
45
53
|
nodes?: number;
|
|
@@ -65,6 +73,8 @@ export interface RegistryEntry {
|
|
|
65
73
|
storagePath: string;
|
|
66
74
|
indexedAt: string;
|
|
67
75
|
lastCommit: string;
|
|
76
|
+
/** See {@link RepoMeta.remoteUrl}. Mirrored from meta at register time. */
|
|
77
|
+
remoteUrl?: string;
|
|
68
78
|
stats?: RepoMeta['stats'];
|
|
69
79
|
}
|
|
70
80
|
/**
|
|
@@ -344,3 +354,36 @@ export declare const loadCLIConfig: () => Promise<CLIConfig>;
|
|
|
344
354
|
* Save CLI config to ~/.gitnexus/config.json
|
|
345
355
|
*/
|
|
346
356
|
export declare const saveCLIConfig: (config: CLIConfig) => Promise<void>;
|
|
357
|
+
/**
|
|
358
|
+
* Find other registered entries whose `remoteUrl` matches the given
|
|
359
|
+
* one, excluding `selfPath` (case-insensitive on Windows). Entries
|
|
360
|
+
* without a `remoteUrl` are ignored — we cannot prove sibling-ness
|
|
361
|
+
* without a fingerprint.
|
|
362
|
+
*/
|
|
363
|
+
export declare const findSiblingClones: (remoteUrl: string | undefined, selfPath: string) => Promise<RegistryEntry[]>;
|
|
364
|
+
/**
|
|
365
|
+
* Description of how a working directory relates to a registered index.
|
|
366
|
+
*
|
|
367
|
+
* `match` semantics:
|
|
368
|
+
* - `path` — `cwd` is inside the registered entry's path.
|
|
369
|
+
* - `sibling-by-remote` — `cwd` is in a different on-disk clone of the
|
|
370
|
+
* same repo (same `remoteUrl`).
|
|
371
|
+
* - `none` — no relationship found.
|
|
372
|
+
*/
|
|
373
|
+
export interface CwdMatch {
|
|
374
|
+
match: 'path' | 'sibling-by-remote' | 'none';
|
|
375
|
+
entry?: RegistryEntry;
|
|
376
|
+
/** The git toplevel of `cwd`, when `cwd` is inside a git work tree. */
|
|
377
|
+
cwdGitRoot?: string;
|
|
378
|
+
/** HEAD of the cwd's clone, when resolvable. */
|
|
379
|
+
cwdHead?: string;
|
|
380
|
+
/**
|
|
381
|
+
* Number of commits the registered `lastCommit` is behind the
|
|
382
|
+
* sibling-clone HEAD, when both refs are known to the cwd's clone.
|
|
383
|
+
* `undefined` when the comparison cannot be performed (e.g. the
|
|
384
|
+
* indexed commit isn't reachable from cwd).
|
|
385
|
+
*/
|
|
386
|
+
drift?: number;
|
|
387
|
+
/** Human-readable hint, set whenever the situation warrants warning. */
|
|
388
|
+
hint?: string;
|
|
389
|
+
}
|
|
@@ -379,6 +379,7 @@ export const registerRepo = async (repoPath, meta, opts) => {
|
|
|
379
379
|
storagePath,
|
|
380
380
|
indexedAt: meta.indexedAt,
|
|
381
381
|
lastCommit: meta.lastCommit,
|
|
382
|
+
remoteUrl: meta.remoteUrl,
|
|
382
383
|
stats: meta.stats,
|
|
383
384
|
};
|
|
384
385
|
if (existingIdx >= 0) {
|
|
@@ -643,3 +644,38 @@ export const saveCLIConfig = async (config) => {
|
|
|
643
644
|
}
|
|
644
645
|
}
|
|
645
646
|
};
|
|
647
|
+
// ─── Sibling-clone detection ─────────────────────────────────────────────
|
|
648
|
+
//
|
|
649
|
+
// A "sibling clone" is a different on-disk path that points at the same
|
|
650
|
+
// logical repository (same `origin` remote URL) as a registered index.
|
|
651
|
+
// This shows up in three operationally important shapes (see issue):
|
|
652
|
+
//
|
|
653
|
+
// 1. The same repo is checked out under multiple paths (worktrees,
|
|
654
|
+
// multi-agent workspaces). Only one is indexed; the others silently
|
|
655
|
+
// diverge from the graph.
|
|
656
|
+
// 2. The indexed clone is itself behind its own HEAD (the existing
|
|
657
|
+
// `checkStaleness` already handles this case).
|
|
658
|
+
// 3. A query is issued from a `cwd` that lives inside a sibling clone
|
|
659
|
+
// whose HEAD has drifted from the indexed `lastCommit`.
|
|
660
|
+
//
|
|
661
|
+
// Detection is intentionally remote-URL-based and does NOT walk the
|
|
662
|
+
// filesystem hunting for unregistered clones — only registered entries
|
|
663
|
+
// are considered. The `cwd`-driven branch ({@link checkSiblingDrift})
|
|
664
|
+
// also accepts an unregistered cwd, because the live caller's working
|
|
665
|
+
// directory is the one place we can cheaply learn about an
|
|
666
|
+
// unregistered clone.
|
|
667
|
+
/**
|
|
668
|
+
* Find other registered entries whose `remoteUrl` matches the given
|
|
669
|
+
* one, excluding `selfPath` (case-insensitive on Windows). Entries
|
|
670
|
+
* without a `remoteUrl` are ignored — we cannot prove sibling-ness
|
|
671
|
+
* without a fingerprint.
|
|
672
|
+
*/
|
|
673
|
+
export const findSiblingClones = async (remoteUrl, selfPath) => {
|
|
674
|
+
if (!remoteUrl)
|
|
675
|
+
return [];
|
|
676
|
+
const entries = await readRegistry();
|
|
677
|
+
const isWin = process.platform === 'win32';
|
|
678
|
+
const norm = (p) => (isWin ? path.resolve(p).toLowerCase() : path.resolve(p));
|
|
679
|
+
const self = norm(selfPath);
|
|
680
|
+
return entries.filter((e) => e.remoteUrl === remoteUrl && norm(e.path) !== self);
|
|
681
|
+
};
|
package/package.json
CHANGED