gitnexus 1.6.4-rc.90 → 1.6.4-rc.92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/git-staleness.d.ts +6 -0
- package/dist/core/git-staleness.js +30 -1
- package/dist/core/group/cross-impact.d.ts +44 -0
- package/dist/core/group/cross-impact.js +82 -8
- package/dist/core/group/extractors/rust-workspace-extractor.d.ts +14 -0
- package/dist/core/group/extractors/rust-workspace-extractor.js +29 -4
- package/dist/core/group/service.d.ts +1 -0
- package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +2 -0
- package/dist/core/ingestion/cobol/cobol-preprocessor.js +14 -2
- package/dist/core/lbug/lbug-config.d.ts +3 -0
- package/dist/core/lbug/lbug-config.js +15 -1
- package/dist/core/lbug/pool-adapter.js +54 -6
- package/dist/mcp/local/local-backend.d.ts +2 -0
- package/dist/mcp/local/local-backend.js +38 -4
- package/package.json +1 -1
|
@@ -12,6 +12,12 @@ export interface StalenessInfo {
|
|
|
12
12
|
* Check how many commits the index is behind HEAD (synchronous; uses git CLI).
|
|
13
13
|
*/
|
|
14
14
|
export declare function checkStaleness(repoPath: string, lastCommit: string): StalenessInfo;
|
|
15
|
+
/**
|
|
16
|
+
* Async variant of {@link checkStaleness} — spawns git as a child process
|
|
17
|
+
* instead of blocking the event loop. Used by `listRepos()` to check many
|
|
18
|
+
* repos in parallel (issue #1363: 200 repos × sync spawn ≈ 50 s).
|
|
19
|
+
*/
|
|
20
|
+
export declare function checkStalenessAsync(repoPath: string, lastCommit: string): Promise<StalenessInfo>;
|
|
15
21
|
/**
|
|
16
22
|
* Resolve a working directory against the global registry. Returns:
|
|
17
23
|
* - `match: 'path'` when `cwd` is inside a registered entry's path
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
* Git working tree vs index commit staleness (used by MCP resources, group status, etc.).
|
|
3
3
|
* Lives in core/ so application code does not depend on the MCP package layer.
|
|
4
4
|
*/
|
|
5
|
-
import { execFileSync } from 'node:child_process';
|
|
5
|
+
import { execFile, execFileSync } from 'node:child_process';
|
|
6
|
+
import { promisify } from 'node:util';
|
|
6
7
|
import path from 'path';
|
|
7
8
|
import { readRegistry } from '../storage/repo-manager.js';
|
|
8
9
|
import { findGitRootByDotGit, getCurrentCommit, getRemoteUrl } from '../storage/git.js';
|
|
10
|
+
const execFileAsync = promisify(execFile);
|
|
9
11
|
/**
|
|
10
12
|
* Check how many commits the index is behind HEAD (synchronous; uses git CLI).
|
|
11
13
|
*/
|
|
@@ -30,6 +32,33 @@ export function checkStaleness(repoPath, lastCommit) {
|
|
|
30
32
|
return { isStale: false, commitsBehind: 0 };
|
|
31
33
|
}
|
|
32
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Async variant of {@link checkStaleness} — spawns git as a child process
|
|
37
|
+
* instead of blocking the event loop. Used by `listRepos()` to check many
|
|
38
|
+
* repos in parallel (issue #1363: 200 repos × sync spawn ≈ 50 s).
|
|
39
|
+
*/
|
|
40
|
+
export async function checkStalenessAsync(repoPath, lastCommit) {
|
|
41
|
+
try {
|
|
42
|
+
// Note: promisified execFile captures stdout/stderr by default (no stdio option needed,
|
|
43
|
+
// unlike the sync variant which requires explicit stdio: ['pipe','pipe','pipe']).
|
|
44
|
+
const { stdout } = await execFileAsync('git', ['rev-list', '--count', `${lastCommit}..HEAD`], {
|
|
45
|
+
cwd: repoPath,
|
|
46
|
+
encoding: 'utf-8',
|
|
47
|
+
});
|
|
48
|
+
const commitsBehind = parseInt(stdout.trim(), 10) || 0;
|
|
49
|
+
if (commitsBehind > 0) {
|
|
50
|
+
return {
|
|
51
|
+
isStale: true,
|
|
52
|
+
commitsBehind,
|
|
53
|
+
hint: `⚠️ Index is ${commitsBehind} commit${commitsBehind > 1 ? 's' : ''} behind HEAD. Run analyze tool to update.`,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
return { isStale: false, commitsBehind: 0 };
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
return { isStale: false, commitsBehind: 0 };
|
|
60
|
+
}
|
|
61
|
+
}
|
|
33
62
|
/**
|
|
34
63
|
* Compare a sibling-clone HEAD against an indexed `lastCommit`. Returns
|
|
35
64
|
* `undefined` when the indexed commit is not reachable from the sibling
|
|
@@ -12,6 +12,20 @@ export interface RunGroupImpactDeps {
|
|
|
12
12
|
port: GroupToolPort;
|
|
13
13
|
gitnexusDir: string;
|
|
14
14
|
}
|
|
15
|
+
/**
|
|
16
|
+
* Clamp the impact timeout to a sane bounded range. Callers can feed this
|
|
17
|
+
* via tool params, so an unclamped value lets a single request hold a
|
|
18
|
+
* timer slot for an arbitrarily long duration (CodeQL js/resource-
|
|
19
|
+
* exhaustion). 100ms lower bound preserves test-suite scenarios that
|
|
20
|
+
* exercise tight timeouts; 5min upper bound is well above any legitimate
|
|
21
|
+
* single-impact compute. Applied at the validate boundary so the
|
|
22
|
+
* downstream `deadline` (Date.now() + timeoutMs) and the local-leg
|
|
23
|
+
* `setTimeout` see the same clamped value — earlier shapes had a 1hr
|
|
24
|
+
* outer cap and a 5min inner clamp that disagreed.
|
|
25
|
+
*/
|
|
26
|
+
export declare const IMPACT_TIMEOUT_MIN_MS = 100;
|
|
27
|
+
export declare const IMPACT_TIMEOUT_MAX_MS: number;
|
|
28
|
+
export declare function clampTimeout(timeoutMs: number): number;
|
|
15
29
|
export declare function validateGroupImpactParams(params: Record<string, unknown>): {
|
|
16
30
|
ok: true;
|
|
17
31
|
name: string;
|
|
@@ -31,6 +45,36 @@ export declare function validateGroupImpactParams(params: Record<string, unknown
|
|
|
31
45
|
ok: false;
|
|
32
46
|
error: string;
|
|
33
47
|
};
|
|
48
|
+
/**
|
|
49
|
+
* Race a single Phase-2 `impactByUid` call against a remaining-budget
|
|
50
|
+
* timer. The Codex adversarial review on PR #1331 surfaced that the
|
|
51
|
+
* fanout loop only checked `Date.now() > deadline` *between* neighbor
|
|
52
|
+
* calls — once `await port.impactByUid(...)` was reached, a hung
|
|
53
|
+
* neighbor could pin the request indefinitely, and slow neighbors
|
|
54
|
+
* could compound past the 5-min `IMPACT_TIMEOUT_MAX_MS` cap.
|
|
55
|
+
*
|
|
56
|
+
* This helper wraps each call: a `setTimeout(remainingMs)` aborts an
|
|
57
|
+
* `AbortController` whose signal is forwarded to `impactByUid`, and a
|
|
58
|
+
* `Promise.race` resolves to `{ timedOut: true }` when the timer
|
|
59
|
+
* fires before the call completes. Implementors that ignore the
|
|
60
|
+
* signal (current local backend) still see their await resolved by
|
|
61
|
+
* the race; full cooperative cancellation inside the BFS is a future
|
|
62
|
+
* follow-up. On rejection, the value is `null` (matching the
|
|
63
|
+
* fanout's existing `if (fan == null)` truncation contract).
|
|
64
|
+
*
|
|
65
|
+
* Exported for direct unit testing — the helper IS the load-bearing
|
|
66
|
+
* mitigation surface, so the U3 regression test pins it directly
|
|
67
|
+
* rather than driving the full `runGroupImpact` path.
|
|
68
|
+
*/
|
|
69
|
+
export declare function safeNeighborImpact(port: GroupToolPort, repoId: string, uid: string, direction: string, opts: {
|
|
70
|
+
maxDepth: number;
|
|
71
|
+
relationTypes: string[];
|
|
72
|
+
minConfidence: number;
|
|
73
|
+
includeTests: boolean;
|
|
74
|
+
}, remainingMs: number): Promise<{
|
|
75
|
+
value: unknown;
|
|
76
|
+
timedOut: boolean;
|
|
77
|
+
}>;
|
|
34
78
|
export declare function collectImpactSymbolUids(local: unknown, servicePrefix: string | undefined): {
|
|
35
79
|
uids: string[];
|
|
36
80
|
targetFilePath?: string;
|
|
@@ -55,6 +55,24 @@ function clampCrossDepth(raw) {
|
|
|
55
55
|
}
|
|
56
56
|
return { depth: d };
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* Clamp the impact timeout to a sane bounded range. Callers can feed this
|
|
60
|
+
* via tool params, so an unclamped value lets a single request hold a
|
|
61
|
+
* timer slot for an arbitrarily long duration (CodeQL js/resource-
|
|
62
|
+
* exhaustion). 100ms lower bound preserves test-suite scenarios that
|
|
63
|
+
* exercise tight timeouts; 5min upper bound is well above any legitimate
|
|
64
|
+
* single-impact compute. Applied at the validate boundary so the
|
|
65
|
+
* downstream `deadline` (Date.now() + timeoutMs) and the local-leg
|
|
66
|
+
* `setTimeout` see the same clamped value — earlier shapes had a 1hr
|
|
67
|
+
* outer cap and a 5min inner clamp that disagreed.
|
|
68
|
+
*/
|
|
69
|
+
export const IMPACT_TIMEOUT_MIN_MS = 100;
|
|
70
|
+
export const IMPACT_TIMEOUT_MAX_MS = 5 * 60 * 1_000;
|
|
71
|
+
export function clampTimeout(timeoutMs) {
|
|
72
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
73
|
+
return IMPACT_TIMEOUT_MIN_MS;
|
|
74
|
+
return Math.min(IMPACT_TIMEOUT_MAX_MS, Math.max(IMPACT_TIMEOUT_MIN_MS, Math.trunc(timeoutMs)));
|
|
75
|
+
}
|
|
58
76
|
export function validateGroupImpactParams(params) {
|
|
59
77
|
const name = String(params.name ?? '').trim();
|
|
60
78
|
const repoPath = String(params.repo ?? '').trim();
|
|
@@ -88,13 +106,18 @@ export function validateGroupImpactParams(params) {
|
|
|
88
106
|
minConfidence = 1;
|
|
89
107
|
const service = normalizeServicePrefix(params.service);
|
|
90
108
|
const subgroup = typeof params.subgroup === 'string' ? params.subgroup : undefined;
|
|
91
|
-
|
|
109
|
+
// Clamp at the validate boundary so the downstream `deadline` (line
|
|
110
|
+
// ~366) and `safeLocalImpact`'s `setTimeout` both see a single
|
|
111
|
+
// bounded value. Without this, the outer deadline budgeted Phase-2
|
|
112
|
+
// cross-repo fanout up to 1hr while only the inner setTimeout was
|
|
113
|
+
// capped to 5min — the two halves of CodeQL #184's mitigation
|
|
114
|
+
// disagreed.
|
|
115
|
+
const rawTimeoutMs = typeof params.timeoutMs === 'number' && params.timeoutMs > 0
|
|
92
116
|
? params.timeoutMs
|
|
93
117
|
: typeof params.timeout === 'number' && params.timeout > 0
|
|
94
118
|
? params.timeout
|
|
95
119
|
: DEFAULT_LOCAL_IMPACT_TIMEOUT_MS;
|
|
96
|
-
|
|
97
|
-
timeoutMs = 3_600_000;
|
|
120
|
+
const timeoutMs = clampTimeout(rawTimeoutMs);
|
|
98
121
|
return {
|
|
99
122
|
ok: true,
|
|
100
123
|
name,
|
|
@@ -125,12 +148,13 @@ async function resolveGroupRepo(port, config, repoPath) {
|
|
|
125
148
|
}
|
|
126
149
|
}
|
|
127
150
|
async function safeLocalImpact(port, repo, impactParams, timeoutMs) {
|
|
151
|
+
const safeTimeoutMs = clampTimeout(timeoutMs);
|
|
128
152
|
let timer;
|
|
129
153
|
const impactP = port.impact(repo, impactParams).catch((err) => ({
|
|
130
154
|
error: err instanceof Error ? err.message : String(err),
|
|
131
155
|
}));
|
|
132
156
|
const timeoutP = new Promise((resolve) => {
|
|
133
|
-
timer = setTimeout(() => resolve('timeout'),
|
|
157
|
+
timer = setTimeout(() => resolve('timeout'), safeTimeoutMs);
|
|
134
158
|
});
|
|
135
159
|
const won = await Promise.race([
|
|
136
160
|
impactP.then((v) => ({ tag: 'impact', v })),
|
|
@@ -146,6 +170,50 @@ async function safeLocalImpact(port, repo, impactParams, timeoutMs) {
|
|
|
146
170
|
}
|
|
147
171
|
return { value: won.v, timedOut: false };
|
|
148
172
|
}
|
|
173
|
+
/**
|
|
174
|
+
* Race a single Phase-2 `impactByUid` call against a remaining-budget
|
|
175
|
+
* timer. The Codex adversarial review on PR #1331 surfaced that the
|
|
176
|
+
* fanout loop only checked `Date.now() > deadline` *between* neighbor
|
|
177
|
+
* calls — once `await port.impactByUid(...)` was reached, a hung
|
|
178
|
+
* neighbor could pin the request indefinitely, and slow neighbors
|
|
179
|
+
* could compound past the 5-min `IMPACT_TIMEOUT_MAX_MS` cap.
|
|
180
|
+
*
|
|
181
|
+
* This helper wraps each call: a `setTimeout(remainingMs)` aborts an
|
|
182
|
+
* `AbortController` whose signal is forwarded to `impactByUid`, and a
|
|
183
|
+
* `Promise.race` resolves to `{ timedOut: true }` when the timer
|
|
184
|
+
* fires before the call completes. Implementors that ignore the
|
|
185
|
+
* signal (current local backend) still see their await resolved by
|
|
186
|
+
* the race; full cooperative cancellation inside the BFS is a future
|
|
187
|
+
* follow-up. On rejection, the value is `null` (matching the
|
|
188
|
+
* fanout's existing `if (fan == null)` truncation contract).
|
|
189
|
+
*
|
|
190
|
+
* Exported for direct unit testing — the helper IS the load-bearing
|
|
191
|
+
* mitigation surface, so the U3 regression test pins it directly
|
|
192
|
+
* rather than driving the full `runGroupImpact` path.
|
|
193
|
+
*/
|
|
194
|
+
export async function safeNeighborImpact(port, repoId, uid, direction, opts, remainingMs) {
|
|
195
|
+
const controller = new AbortController();
|
|
196
|
+
let timer;
|
|
197
|
+
const callP = port
|
|
198
|
+
.impactByUid(repoId, uid, direction, { ...opts, signal: controller.signal })
|
|
199
|
+
.catch(() => null);
|
|
200
|
+
const timeoutP = new Promise((resolve) => {
|
|
201
|
+
timer = setTimeout(() => {
|
|
202
|
+
controller.abort();
|
|
203
|
+
resolve('timeout');
|
|
204
|
+
}, Math.max(0, remainingMs));
|
|
205
|
+
});
|
|
206
|
+
const won = await Promise.race([
|
|
207
|
+
callP.then((v) => ({ tag: 'impact', v })),
|
|
208
|
+
timeoutP.then(() => ({ tag: 'timeout' })),
|
|
209
|
+
]);
|
|
210
|
+
if (timer !== undefined)
|
|
211
|
+
clearTimeout(timer);
|
|
212
|
+
if (won.tag === 'timeout') {
|
|
213
|
+
return { value: null, timedOut: true };
|
|
214
|
+
}
|
|
215
|
+
return { value: won.v, timedOut: false };
|
|
216
|
+
}
|
|
149
217
|
export function collectImpactSymbolUids(local, servicePrefix) {
|
|
150
218
|
const uids = new Set();
|
|
151
219
|
let targetFilePath;
|
|
@@ -372,7 +440,8 @@ export async function runGroupImpact(deps, params) {
|
|
|
372
440
|
if (seen.has(key))
|
|
373
441
|
continue;
|
|
374
442
|
seen.add(key);
|
|
375
|
-
|
|
443
|
+
const remainingMs = deadline - Date.now();
|
|
444
|
+
if (remainingMs <= 0) {
|
|
376
445
|
truncatedRepos.push(n.neighborRepo);
|
|
377
446
|
continue;
|
|
378
447
|
}
|
|
@@ -387,13 +456,18 @@ export async function runGroupImpact(deps, params) {
|
|
|
387
456
|
truncatedRepos.push(n.neighborRepo);
|
|
388
457
|
continue;
|
|
389
458
|
}
|
|
390
|
-
|
|
459
|
+
// Phase-2 hardening: race each impactByUid against a per-call
|
|
460
|
+
// timeout derived from the remaining budget. Without this wrap a
|
|
461
|
+
// single hung neighbor would pin the request past the clamped
|
|
462
|
+
// timeout, which Codex's adversarial review on PR #1331 flagged
|
|
463
|
+
// as the still-open half of CodeQL #184 / js/resource-exhaustion.
|
|
464
|
+
const { value: fan, timedOut: neighborTimedOut } = await safeNeighborImpact(deps.port, neighborHandle.id, n.neighborUid, direction, {
|
|
391
465
|
maxDepth,
|
|
392
466
|
relationTypes: relationTypes ?? [],
|
|
393
467
|
minConfidence,
|
|
394
468
|
includeTests,
|
|
395
|
-
});
|
|
396
|
-
if (fan == null) {
|
|
469
|
+
}, remainingMs);
|
|
470
|
+
if (neighborTimedOut || fan == null) {
|
|
397
471
|
truncatedRepos.push(n.neighborRepo);
|
|
398
472
|
continue;
|
|
399
473
|
}
|
|
@@ -17,6 +17,20 @@ interface CrateMeta {
|
|
|
17
17
|
repoPath: string;
|
|
18
18
|
workspaceDeps: string[];
|
|
19
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* Linear-time `[package].name = "..."` lookup. The previous regex
|
|
22
|
+
* `^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"` had a nested
|
|
23
|
+
* lazy quantifier on `\n` that CodeQL js/redos flagged as exponential
|
|
24
|
+
* on inputs like `[package]\n` + many bare `\n`. We walk lines
|
|
25
|
+
* explicitly: scan from the first `[package]` header until we hit the
|
|
26
|
+
* next `[...]` section header, looking for the `name = "..."` line.
|
|
27
|
+
* O(n) with the line count.
|
|
28
|
+
*
|
|
29
|
+
* Exported so the U8 ReDoS regression test can drive the production
|
|
30
|
+
* line-walk directly with adversarial fixtures (multi-line strings,
|
|
31
|
+
* trailing sections, etc.) instead of duplicating it inline.
|
|
32
|
+
*/
|
|
33
|
+
export declare function parseCargoPackageName(content: string): string | null;
|
|
20
34
|
export interface RustWorkspaceResult {
|
|
21
35
|
links: GroupManifestLink[];
|
|
22
36
|
discoveredCrates: Map<string, CrateMeta>;
|
|
@@ -3,6 +3,34 @@ import path from 'node:path';
|
|
|
3
3
|
import { shouldIgnorePath } from '../../../config/ignore-service.js';
|
|
4
4
|
import { loadIgnoreRules } from '../../../config/ignore-service.js';
|
|
5
5
|
import { logger } from '../../logger.js';
|
|
6
|
+
/**
|
|
7
|
+
* Linear-time `[package].name = "..."` lookup. The previous regex
|
|
8
|
+
* `^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"` had a nested
|
|
9
|
+
* lazy quantifier on `\n` that CodeQL js/redos flagged as exponential
|
|
10
|
+
* on inputs like `[package]\n` + many bare `\n`. We walk lines
|
|
11
|
+
* explicitly: scan from the first `[package]` header until we hit the
|
|
12
|
+
* next `[...]` section header, looking for the `name = "..."` line.
|
|
13
|
+
* O(n) with the line count.
|
|
14
|
+
*
|
|
15
|
+
* Exported so the U8 ReDoS regression test can drive the production
|
|
16
|
+
* line-walk directly with adversarial fixtures (multi-line strings,
|
|
17
|
+
* trailing sections, etc.) instead of duplicating it inline.
|
|
18
|
+
*/
|
|
19
|
+
export function parseCargoPackageName(content) {
|
|
20
|
+
const lines = content.split('\n');
|
|
21
|
+
const packageStart = lines.findIndex((l) => l.trim() === '[package]');
|
|
22
|
+
if (packageStart < 0)
|
|
23
|
+
return null;
|
|
24
|
+
for (let i = packageStart + 1; i < lines.length; i++) {
|
|
25
|
+
const line = lines[i].trimStart();
|
|
26
|
+
if (line.startsWith('['))
|
|
27
|
+
break; // hit the next section header
|
|
28
|
+
const m = /^name\s*=\s*"([^"]+)"/.exec(line);
|
|
29
|
+
if (m)
|
|
30
|
+
return m[1];
|
|
31
|
+
}
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
6
34
|
/**
|
|
7
35
|
* Parse a Cargo.toml to extract the crate name and workspace dependency
|
|
8
36
|
* names. Uses simple line-based parsing — no TOML library needed for
|
|
@@ -17,11 +45,8 @@ async function parseCrateManifest(repoPath) {
|
|
|
17
45
|
catch {
|
|
18
46
|
return null;
|
|
19
47
|
}
|
|
20
|
-
|
|
48
|
+
const name = parseCargoPackageName(content) ?? '';
|
|
21
49
|
const workspaceDeps = [];
|
|
22
|
-
const nameMatch = content.match(/^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"/m);
|
|
23
|
-
if (nameMatch)
|
|
24
|
-
name = nameMatch[1];
|
|
25
50
|
// Match dependencies that use workspace = true, which indicates they
|
|
26
51
|
// are workspace-internal deps:
|
|
27
52
|
// dep_name = { workspace = true }
|
|
@@ -202,6 +202,8 @@ export interface CobolRegexResults {
|
|
|
202
202
|
* Preserves exact line count for position mapping.
|
|
203
203
|
*/
|
|
204
204
|
export declare function preprocessCobolSource(content: string): string;
|
|
205
|
+
export declare const RE_SET_TO_TRUE: RegExp;
|
|
206
|
+
export declare const RE_SET_INDEX: RegExp;
|
|
205
207
|
/**
|
|
206
208
|
* Extract COBOL symbols using a single-pass state machine.
|
|
207
209
|
* Extracts program name, paragraphs, sections, CALL, PERFORM, COPY,
|
|
@@ -178,8 +178,20 @@ const RE_DECLARATIVES_START = /^\s*DECLARATIVES\s*\.\s*$/i;
|
|
|
178
178
|
const RE_DECLARATIVES_END = /^\s*END\s+DECLARATIVES\s*\.\s*$/i;
|
|
179
179
|
const RE_USE_AFTER = /\bUSE\s+(?:AFTER\s+)?(?:STANDARD\s+)?(?:EXCEPTION|ERROR)\s+ON\s+([A-Z][A-Z0-9-]+|INPUT|OUTPUT|I-O|EXTEND)\b/i;
|
|
180
180
|
// SET statement (condition, index)
|
|
181
|
-
|
|
182
|
-
|
|
181
|
+
//
|
|
182
|
+
// Catastrophic-backtracking note (CodeQL js/redos): the previous shape
|
|
183
|
+
// `((?:[A-Z][A-Z0-9-]+(?:\s+OF\s+[A-Z][A-Z0-9-]+)?\s+)+)TO\s+TRUE`
|
|
184
|
+
// nested `\s+` quantifiers across alternations and was exponential on
|
|
185
|
+
// inputs like "SET a OF a OF a ... TO TRUE". Replaced with a lazy
|
|
186
|
+
// dot-match bounded by the explicit `\s+TO\s+TRUE` suffix — `.+?` is
|
|
187
|
+
// O(n) with the trailing anchor, and the captured group is parsed
|
|
188
|
+
// downstream the same way as before.
|
|
189
|
+
// Exported so the U8 ReDoS regression test can pin the exact production
|
|
190
|
+
// pattern. Direct import is the only way to ensure the test's
|
|
191
|
+
// pathological-input timing assertion exercises the production regex
|
|
192
|
+
// instead of an inline copy that drifts.
|
|
193
|
+
export const RE_SET_TO_TRUE = /\bSET\s+(.+?)\s+TO\s+TRUE\b/i;
|
|
194
|
+
export const RE_SET_INDEX = /\bSET\s+(.+?)\s+(TO|UP\s+BY|DOWN\s+BY)\s+(\d+|[A-Z][A-Z0-9-]+)/i;
|
|
183
195
|
// INITIALIZE statement — data reset (captures targets before REPLACING/WITH clause)
|
|
184
196
|
const RE_INITIALIZE = /\bINITIALIZE\s+([\s\S]*?)(?=\bREPLACING\b|\bWITH\b|\.\s*$|$)/i;
|
|
185
197
|
const INITIALIZE_CLAUSE_KEYWORDS = new Set([
|
|
@@ -32,9 +32,12 @@ import type lbug from '@ladybugdb/core';
|
|
|
32
32
|
* integer; anything invalid falls back to the default.
|
|
33
33
|
*/
|
|
34
34
|
export declare const LBUG_MAX_DB_SIZE: number;
|
|
35
|
+
export declare const WAL_RECOVERY_SUGGESTION = "WAL corruption detected. Run `gitnexus analyze` to rebuild the index.";
|
|
36
|
+
export declare function isWalCorruptionError(err: unknown): boolean;
|
|
35
37
|
type LbugModule = typeof lbug;
|
|
36
38
|
export interface LbugDatabaseOptions {
|
|
37
39
|
readOnly?: boolean;
|
|
40
|
+
throwOnWalReplayFailure?: boolean;
|
|
38
41
|
}
|
|
39
42
|
export interface LbugConnectionHandle {
|
|
40
43
|
db: lbug.Database;
|
|
@@ -39,8 +39,22 @@ export const LBUG_MAX_DB_SIZE = (() => {
|
|
|
39
39
|
}
|
|
40
40
|
return 16 * 1024 * 1024 * 1024;
|
|
41
41
|
})();
|
|
42
|
+
/** Matches WAL corruption errors from the LadybugDB engine. */
|
|
43
|
+
const WAL_CORRUPTION_RE = /corrupt(ed)?\s+wal|invalid\s+wal\s+record|wal.*corrupt|checksum.*wal/i;
|
|
44
|
+
export const WAL_RECOVERY_SUGGESTION = 'WAL corruption detected. Run `gitnexus analyze` to rebuild the index.';
|
|
45
|
+
export function isWalCorruptionError(err) {
|
|
46
|
+
if (!err)
|
|
47
|
+
return false;
|
|
48
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
49
|
+
return WAL_CORRUPTION_RE.test(msg);
|
|
50
|
+
}
|
|
42
51
|
export function createLbugDatabase(lbugModule, databasePath, options = {}) {
|
|
43
|
-
|
|
52
|
+
// .d.ts declares fewer args than the native constructor accepts.
|
|
53
|
+
return new lbugModule.Database(databasePath, 0, // bufferManagerSize
|
|
54
|
+
false, // enableCompression (pinned for v0.16.0)
|
|
55
|
+
options.readOnly ?? false, LBUG_MAX_DB_SIZE, true, // autoCheckpoint
|
|
56
|
+
-1, // checkpointThreshold
|
|
57
|
+
options.throwOnWalReplayFailure ?? true, true);
|
|
44
58
|
}
|
|
45
59
|
export async function openLbugConnection(lbugModule, databasePath, options = {}) {
|
|
46
60
|
let db;
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
import fs from 'fs/promises';
|
|
18
18
|
import lbug from '@ladybugdb/core';
|
|
19
19
|
import { loadFTSExtension } from './lbug-adapter.js';
|
|
20
|
-
import { createLbugDatabase } from './lbug-config.js';
|
|
20
|
+
import { createLbugDatabase, isWalCorruptionError } from './lbug-config.js';
|
|
21
21
|
const pool = new Map();
|
|
22
22
|
const poolCloseListeners = new Set();
|
|
23
23
|
/**
|
|
@@ -51,7 +51,7 @@ let idleTimer = null;
|
|
|
51
51
|
// @ladybugdb/core), corrupting stdout in the pre-sentinel window. Routing
|
|
52
52
|
// through the leaf breaks that chain.
|
|
53
53
|
export { realStdoutWrite, realStderrWrite, setActiveStdoutWrite } from '../../mcp/stdio-capture.js';
|
|
54
|
-
import { getActiveStdoutWrite } from '../../mcp/stdio-capture.js';
|
|
54
|
+
import { getActiveStdoutWrite, realStderrWrite } from '../../mcp/stdio-capture.js';
|
|
55
55
|
let stdoutSilenceCount = 0;
|
|
56
56
|
/** True while pre-warming connections — prevents watchdog from prematurely restoring stdout */
|
|
57
57
|
let preWarmActive = false;
|
|
@@ -203,6 +203,44 @@ const QUERY_TIMEOUT_MS = 30_000;
|
|
|
203
203
|
const WAITER_TIMEOUT_MS = 15_000;
|
|
204
204
|
const LOCK_RETRY_ATTEMPTS = 3;
|
|
205
205
|
const LOCK_RETRY_DELAY_MS = 2000;
|
|
206
|
+
async function openReadOnlyDatabase(dbPath) {
|
|
207
|
+
let db;
|
|
208
|
+
silenceStdout();
|
|
209
|
+
try {
|
|
210
|
+
db = createLbugDatabase(lbug, dbPath, {
|
|
211
|
+
readOnly: true,
|
|
212
|
+
throwOnWalReplayFailure: false,
|
|
213
|
+
});
|
|
214
|
+
await db.init();
|
|
215
|
+
return db;
|
|
216
|
+
}
|
|
217
|
+
catch (err) {
|
|
218
|
+
if (db)
|
|
219
|
+
await db.close().catch(() => { });
|
|
220
|
+
throw err;
|
|
221
|
+
}
|
|
222
|
+
finally {
|
|
223
|
+
restoreStdout();
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Quarantine the .wal file and retry opening the database.
|
|
228
|
+
* Used when the initial open fails with a WAL corruption error.
|
|
229
|
+
*/
|
|
230
|
+
async function tryQuarantineAndReopen(dbPath, repoId) {
|
|
231
|
+
const walPath = dbPath + '.wal';
|
|
232
|
+
const quarantineName = `${walPath}.corrupt.${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
233
|
+
try {
|
|
234
|
+
await fs.rename(walPath, quarantineName);
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
|
|
238
|
+
`Run \`gitnexus analyze\` to rebuild the index. (quarantine failed)`);
|
|
239
|
+
}
|
|
240
|
+
realStderrWrite(`GitNexus: LadybugDB WAL quarantined for ${repoId}; graph may be stale. ` +
|
|
241
|
+
`Run \`gitnexus analyze\` to rebuild the index.\n`);
|
|
242
|
+
return await openReadOnlyDatabase(dbPath);
|
|
243
|
+
}
|
|
206
244
|
/** Deduplicates concurrent initLbug calls for the same repoId */
|
|
207
245
|
const initPromises = new Map();
|
|
208
246
|
/**
|
|
@@ -256,17 +294,27 @@ async function doInitLbug(repoId, dbPath) {
|
|
|
256
294
|
// avoids lock conflicts when `gitnexus analyze` is writing.
|
|
257
295
|
let lastError = null;
|
|
258
296
|
for (let attempt = 1; attempt <= LOCK_RETRY_ATTEMPTS; attempt++) {
|
|
259
|
-
silenceStdout();
|
|
260
297
|
try {
|
|
261
|
-
const db =
|
|
262
|
-
restoreStdout();
|
|
298
|
+
const db = await openReadOnlyDatabase(dbPath);
|
|
263
299
|
shared = { db, refCount: 0, ftsLoaded: false };
|
|
264
300
|
dbCache.set(dbPath, shared);
|
|
265
301
|
break;
|
|
266
302
|
}
|
|
267
303
|
catch (err) {
|
|
268
|
-
restoreStdout();
|
|
269
304
|
lastError = err instanceof Error ? err : new Error(String(err));
|
|
305
|
+
if (isWalCorruptionError(lastError)) {
|
|
306
|
+
try {
|
|
307
|
+
const db = await tryQuarantineAndReopen(dbPath, repoId);
|
|
308
|
+
shared = { db, refCount: 0, ftsLoaded: false };
|
|
309
|
+
dbCache.set(dbPath, shared);
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
catch (retryErr) {
|
|
313
|
+
throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
|
|
314
|
+
`Run \`gitnexus analyze\` to rebuild the index. ` +
|
|
315
|
+
`(${retryErr instanceof Error ? retryErr.message : String(retryErr)})`);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
270
318
|
const isLockError = lastError.message.includes('Could not set lock') || lastError.message.includes('lock');
|
|
271
319
|
if (!isLockError || attempt === LOCK_RETRY_ATTEMPTS)
|
|
272
320
|
break;
|
|
@@ -259,6 +259,7 @@ export declare class LocalBackend {
|
|
|
259
259
|
* UID-based direct lookup. No cluster in output.
|
|
260
260
|
*/
|
|
261
261
|
private context;
|
|
262
|
+
private _contextImpl;
|
|
262
263
|
/**
|
|
263
264
|
* Legacy explore — kept for backwards compatibility with resources.ts.
|
|
264
265
|
* Routes cluster/process types to direct graph queries.
|
|
@@ -290,6 +291,7 @@ export declare class LocalBackend {
|
|
|
290
291
|
relationTypes: string[];
|
|
291
292
|
minConfidence: number;
|
|
292
293
|
includeTests: boolean;
|
|
294
|
+
signal?: AbortSignal;
|
|
293
295
|
}): Promise<any | null>;
|
|
294
296
|
private handleGroupTool;
|
|
295
297
|
/**
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import fs from 'fs/promises';
|
|
9
9
|
import path from 'path';
|
|
10
10
|
import { initLbug, executeQuery, executeParameterized, closeLbug, isLbugReady, isWriteQuery, } from '../../core/lbug/pool-adapter.js';
|
|
11
|
+
import { isWalCorruptionError, WAL_RECOVERY_SUGGESTION } from '../../core/lbug/lbug-config.js';
|
|
11
12
|
export { isWriteQuery };
|
|
12
13
|
// Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
|
|
13
14
|
// at MCP server startup — crashes on unsupported Node ABI versions (#89)
|
|
@@ -22,7 +23,7 @@ import { rankExactEmbeddingRows, } from '../../core/embeddings/exact-search.js';
|
|
|
22
23
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
|
|
23
24
|
import { getExactScanLimit, isVectorExtensionSupportedByPlatform, } from '../../core/platform/capabilities.js';
|
|
24
25
|
import { PhaseTimer } from '../../core/search/phase-timer.js';
|
|
25
|
-
import {
|
|
26
|
+
import { checkStalenessAsync, checkCwdMatch } from '../../core/git-staleness.js';
|
|
26
27
|
import { logger } from '../../core/logger.js';
|
|
27
28
|
// AI context generation is CLI-only (gitnexus analyze)
|
|
28
29
|
// import { generateAIContextFiles } from '../../cli/ai-context.js';
|
|
@@ -464,8 +465,12 @@ export class LocalBackend {
|
|
|
464
465
|
list.push(h);
|
|
465
466
|
byRemote.set(h.remoteUrl, list);
|
|
466
467
|
}
|
|
467
|
-
|
|
468
|
-
|
|
468
|
+
// Check staleness for all repos in parallel instead of sequentially.
|
|
469
|
+
// Each check spawns an async `git rev-list` — with 200 repos the sync
|
|
470
|
+
// variant took ~50 s; parallel async brings it under a second (#1363).
|
|
471
|
+
const stalenessResults = await Promise.all(handles.map((h) => checkStalenessAsync(h.repoPath, h.lastCommit)));
|
|
472
|
+
return handles.map((h, i) => {
|
|
473
|
+
const stale = stalenessResults[i];
|
|
469
474
|
const selfNorm = norm(h.repoPath);
|
|
470
475
|
const siblings = h.remoteUrl
|
|
471
476
|
? (byRemote.get(h.remoteUrl) ?? []).filter((e) => norm(e.repoPath) !== selfNorm)
|
|
@@ -1018,7 +1023,14 @@ export class LocalBackend {
|
|
|
1018
1023
|
return result;
|
|
1019
1024
|
}
|
|
1020
1025
|
catch (err) {
|
|
1021
|
-
|
|
1026
|
+
const msg = err.message || 'Query failed';
|
|
1027
|
+
if (isWalCorruptionError(err)) {
|
|
1028
|
+
return {
|
|
1029
|
+
error: msg,
|
|
1030
|
+
recoverySuggestion: WAL_RECOVERY_SUGGESTION,
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
return { error: msg };
|
|
1022
1034
|
}
|
|
1023
1035
|
}
|
|
1024
1036
|
/**
|
|
@@ -1385,6 +1397,21 @@ export class LocalBackend {
|
|
|
1385
1397
|
* UID-based direct lookup. No cluster in output.
|
|
1386
1398
|
*/
|
|
1387
1399
|
async context(repo, params) {
|
|
1400
|
+
try {
|
|
1401
|
+
return await this._contextImpl(repo, params);
|
|
1402
|
+
}
|
|
1403
|
+
catch (err) {
|
|
1404
|
+
const msg = (err instanceof Error ? err.message : String(err)) || 'Context query failed';
|
|
1405
|
+
if (isWalCorruptionError(err)) {
|
|
1406
|
+
return {
|
|
1407
|
+
error: msg,
|
|
1408
|
+
recoverySuggestion: WAL_RECOVERY_SUGGESTION,
|
|
1409
|
+
};
|
|
1410
|
+
}
|
|
1411
|
+
throw err;
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
async _contextImpl(repo, params) {
|
|
1388
1415
|
await this.ensureInitialized(repo.id);
|
|
1389
1416
|
const { name, uid, file_path, kind, include_content } = params;
|
|
1390
1417
|
if (!name && !uid) {
|
|
@@ -1986,6 +2013,7 @@ export class LocalBackend {
|
|
|
1986
2013
|
impactedCount: 0,
|
|
1987
2014
|
risk: 'UNKNOWN',
|
|
1988
2015
|
suggestion: 'The graph query failed — try gitnexus context <symbol> as a fallback',
|
|
2016
|
+
...(isWalCorruptionError(err) ? { recoverySuggestion: WAL_RECOVERY_SUGGESTION } : {}),
|
|
1989
2017
|
};
|
|
1990
2018
|
}
|
|
1991
2019
|
}
|
|
@@ -2423,6 +2451,12 @@ export class LocalBackend {
|
|
|
2423
2451
|
* Returns null if the repo is unknown, the UID is missing, or analysis fails.
|
|
2424
2452
|
*/
|
|
2425
2453
|
async impactByUid(repoId, uid, direction, opts) {
|
|
2454
|
+
// Honor an already-aborted signal at the entry boundary as a fast
|
|
2455
|
+
// path. Cooperative cancellation inside _runImpactBFS is out of
|
|
2456
|
+
// scope — the caller's Promise.race against the same signal
|
|
2457
|
+
// resolves the await regardless of how long this body runs.
|
|
2458
|
+
if (opts.signal?.aborted)
|
|
2459
|
+
return null;
|
|
2426
2460
|
try {
|
|
2427
2461
|
await this.refreshRepos();
|
|
2428
2462
|
await this.ensureInitialized(repoId);
|
package/package.json
CHANGED