gitnexus 1.6.4-rc.91 → 1.6.4-rc.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,12 +32,6 @@ export interface RelCsvSplitResult {
32
32
  export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
33
33
  /** Expose the current Database for pool adapter reuse in tests. */
34
34
  export declare const getDatabase: () => lbug.Database | null;
35
- /**
36
- * Return true when the error message indicates that another process holds
37
- * an exclusive lock on the LadybugDB file (e.g. `gitnexus analyze` or
38
- * `gitnexus serve` running at the same time).
39
- */
40
- export declare const isDbBusyError: (err: unknown) => boolean;
41
35
  /**
42
36
  * Return true when the error message indicates a write was attempted against
43
37
  * a read-only LadybugDB connection. The MCP query pool opens DBs read-only,
@@ -8,7 +8,7 @@ import lbug from '@ladybugdb/core';
8
8
  import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
9
9
  import { streamAllCSVsToDisk } from './csv-generator.js';
10
10
  import { extensionManager } from './extension-loader.js';
11
- import { closeLbugConnection, openLbugConnection, } from './lbug-config.js';
11
+ import { closeLbugConnection, isDbBusyError, isOpenRetryExhausted, openLbugConnection, waitForWindowsHandleRelease, } from './lbug-config.js';
12
12
  import { isVectorExtensionSupportedByPlatform } from '../platform/capabilities.js';
13
13
  import { logger } from '../logger.js';
14
14
  /**
@@ -140,18 +140,6 @@ let sessionLock = Promise.resolve();
140
140
  const DB_LOCK_RETRY_ATTEMPTS = 3;
141
141
  /** Base back-off in ms between BUSY retries (multiplied by attempt number). */
142
142
  const DB_LOCK_RETRY_DELAY_MS = 500;
143
- /**
144
- * Return true when the error message indicates that another process holds
145
- * an exclusive lock on the LadybugDB file (e.g. `gitnexus analyze` or
146
- * `gitnexus serve` running at the same time).
147
- */
148
- export const isDbBusyError = (err) => {
149
- const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
150
- return (msg.includes('busy') ||
151
- msg.includes('lock') ||
152
- msg.includes('already in use') ||
153
- msg.includes('could not set lock'));
154
- };
155
143
  /**
156
144
  * Return true when the error message indicates a write was attempted against
157
145
  * a read-only LadybugDB connection. The MCP query pool opens DBs read-only,
@@ -201,7 +189,11 @@ export const withLbugDb = async (dbPath, operation) => {
201
189
  }
202
190
  catch (err) {
203
191
  lastError = err;
204
- if (!isDbBusyError(err) || attempt === DB_LOCK_RETRY_ATTEMPTS) {
192
+ // Skip outer retry when the inner open-retry already exhausted: the
193
+ // ~1.5s open-time budget was just spent, repeating the full reset+
194
+ // reopen cycle would only add 4-5s of tail latency without changing
195
+ // the outcome (both layers consult the same isDbBusyError matcher).
196
+ if (!isDbBusyError(err) || isOpenRetryExhausted(err) || attempt === DB_LOCK_RETRY_ATTEMPTS) {
205
197
  throw err;
206
198
  }
207
199
  // Close stale connection inside the session lock to prevent race conditions
@@ -274,7 +266,16 @@ const doInitLbug = async (dbPath) => {
274
266
  }
275
267
  catch (err) {
276
268
  const msg = err instanceof Error ? err.message : String(err);
277
- if (!msg.includes('already exists')) {
269
+ // Suppression list:
270
+ // - "already exists": expected idempotent re-create on existing DBs
271
+ // - "could not set lock on file": LadybugDB v0.16.1 emits this on
272
+ // Windows when CREATE NODE TABLE runs against a path that was
273
+ // just opened (the WAL handle from a fresh Database briefly
274
+ // contests the table's first-write lock). The table is created
275
+ // anyway and any genuine cross-process lock contention surfaces
276
+ // on the next operation via withLbugDb's retry. Logging it here
277
+ // would just be noise in CI.
278
+ if (!msg.includes('already exists') && !isDbBusyError(err)) {
278
279
  logger.warn(`⚠️ Schema creation warning: ${msg.slice(0, 120)}`);
279
280
  }
280
281
  }
@@ -940,6 +941,9 @@ export const flushWAL = async () => {
940
941
  */
941
942
  export const safeClose = async () => {
942
943
  await flushWAL();
944
+ // Capture before close — currentDbPath stays set so the Windows post-close
945
+ // probe below knows which file to wait on.
946
+ const closingDbPath = currentDbPath;
943
947
  if (conn) {
944
948
  try {
945
949
  // eslint-disable-next-line no-restricted-syntax -- sole authorised close site
@@ -960,6 +964,21 @@ export const safeClose = async () => {
960
964
  }
961
965
  db = null;
962
966
  }
967
+ // Windows: libuv reports `db.close()` resolved before the kernel has
968
+ // released the file handle. A subsequent `new Database(samePath)` in
969
+ // the same process can race the release. The probe (lbug-config.ts)
970
+ // forces any residual lock to surface as EBUSY/EPERM/EACCES so the
971
+ // open-time retry absorbs the lag.
972
+ if (process.platform === 'win32' && closingDbPath) {
973
+ const released = await waitForWindowsHandleRelease(closingDbPath);
974
+ if (!released) {
975
+ // Probe exhausted with a lock code still in flight. The next
976
+ // openLbugConnection will absorb whatever residual lag remains, but
977
+ // a chronic warning helps operators spot AV interference (Windows
978
+ // Defender holding the file far past the 250ms budget).
979
+ logger.warn({ dbPath: closingDbPath }, '⚠️ LadybugDB file handle still locked after close (Windows). If this repeats, check antivirus/Defender exclusions for the GitNexus storage directory.');
980
+ }
981
+ }
963
982
  };
964
983
  export const closeLbug = async () => {
965
984
  await safeClose();
@@ -32,15 +32,71 @@ import type lbug from '@ladybugdb/core';
32
32
  * integer; anything invalid falls back to the default.
33
33
  */
34
34
  export declare const LBUG_MAX_DB_SIZE: number;
35
+ export declare const WAL_RECOVERY_SUGGESTION = "WAL corruption detected. Run `gitnexus analyze` to rebuild the index.";
36
+ export declare function isWalCorruptionError(err: unknown): boolean;
35
37
  type LbugModule = typeof lbug;
36
38
  export interface LbugDatabaseOptions {
37
39
  readOnly?: boolean;
40
+ throwOnWalReplayFailure?: boolean;
38
41
  }
39
42
  export interface LbugConnectionHandle {
40
43
  db: lbug.Database;
41
44
  conn: lbug.Connection;
42
45
  }
46
+ /**
47
+ * Return true when the error message indicates that a LadybugDB file lock
48
+ * could not be acquired — either at construction time
49
+ * (`new lbug.Database(...)` raises from `local_file_system.cpp`) or during
50
+ * a query (another writer holds the exclusive lock).
51
+ *
52
+ * Lives here (not in `lbug-adapter.ts`) so both the construction-time
53
+ * retry (`openWithLockRetry` in this file) and the query-time retry
54
+ * (`withLbugDb` in `lbug-adapter.ts`) consult the same matcher. Callers
55
+ * import directly from this module — no re-export to keep in sync.
56
+ */
57
+ export declare const isDbBusyError: (err: unknown) => boolean;
43
58
  export declare function createLbugDatabase(lbugModule: LbugModule, databasePath: string, options?: LbugDatabaseOptions): lbug.Database;
59
+ /**
60
+ * Marker symbol attached to lock errors after `openWithLockRetry` exhausts
61
+ * its budget. `withLbugDb`'s outer query-time retry consults this so it
62
+ * does not re-retry a path that just spent up to ~1.5s in the open-time
63
+ * loop — preventing 6s tail latencies (3× outer × 5× inner attempts).
64
+ *
65
+ * The symbol is internal to GitNexus; consumers should treat the underlying
66
+ * error message as the user-visible signal.
67
+ */
68
+ export declare const LBUG_OPEN_RETRY_EXHAUSTED: unique symbol;
69
+ export declare const isOpenRetryExhausted: (err: unknown) => boolean;
70
+ /** Exported only for direct unit testing — production callers use `openWithLockRetry`. */
71
+ export declare const _isTestFixturePathForTest: (dbPath: string) => boolean;
44
72
  export declare function openLbugConnection(lbugModule: LbugModule, databasePath: string, options?: LbugDatabaseOptions): Promise<LbugConnectionHandle>;
45
73
  export declare function closeLbugConnection(handle: LbugConnectionHandle): Promise<void>;
74
+ /**
75
+ * Probe `dbPath` AND its `.wal` sidecar after `db.close()` so any
76
+ * residual native file handle surfaces as EBUSY/EPERM/EACCES and the
77
+ * bounded retry absorbs the release lag. Windows-only — Linux/macOS do
78
+ * not exhibit this race.
79
+ *
80
+ * Both files matter. Empirically, on rapid open→close→reopen cycles the
81
+ * main `dbPath` handle releases first; the `.wal` handle from the
82
+ * previous Database lingers and the new Database's first write (CREATE
83
+ * NODE TABLE during schema init) fails with "Could not set lock on
84
+ * file". Probing both makes safeClose actually return when the kernel
85
+ * is fully done with the path.
86
+ *
87
+ * Returns `true` when both probes succeeded (or skipped on non-lock
88
+ * errors / missing files). Returns `false` when either probe exhausted
89
+ * its budget with a lock code still in flight.
90
+ *
91
+ * Defensive shape:
92
+ * - Opens read+write (`'r+'`) so the probe actually surfaces exclusive
93
+ * locks held by the previous Database. A read-only probe (`'r'`) is
94
+ * insufficient — Windows will grant read access while the previous
95
+ * handle's exclusive write lock is still in flight, which lets
96
+ * `safeClose` return before the next CREATE NODE TABLE can lock the
97
+ * file.
98
+ * - `try/finally` around `handle.close()` guarantees no fd leak even
99
+ * if close itself throws.
100
+ */
101
+ export declare const waitForWindowsHandleRelease: (dbPath: string) => Promise<boolean>;
46
102
  export {};
@@ -1,3 +1,6 @@
1
+ import fs from 'fs/promises';
2
+ import os from 'os';
3
+ import path from 'path';
1
4
  /**
2
5
  * Shared configuration for `@ladybugdb/core` `Database` construction.
3
6
  *
@@ -39,13 +42,193 @@ export const LBUG_MAX_DB_SIZE = (() => {
39
42
  }
40
43
  return 16 * 1024 * 1024 * 1024;
41
44
  })();
45
+ /** Matches WAL corruption errors from the LadybugDB engine. */
46
+ const WAL_CORRUPTION_RE = /corrupt(ed)?\s+wal|invalid\s+wal\s+record|wal.*corrupt|checksum.*wal/i;
47
+ export const WAL_RECOVERY_SUGGESTION = 'WAL corruption detected. Run `gitnexus analyze` to rebuild the index.';
48
+ export function isWalCorruptionError(err) {
49
+ if (!err)
50
+ return false;
51
+ const msg = err instanceof Error ? err.message : String(err);
52
+ return WAL_CORRUPTION_RE.test(msg);
53
+ }
54
+ /**
55
+ * Return true when the error message indicates that a LadybugDB file lock
56
+ * could not be acquired — either at construction time
57
+ * (`new lbug.Database(...)` raises from `local_file_system.cpp`) or during
58
+ * a query (another writer holds the exclusive lock).
59
+ *
60
+ * Lives here (not in `lbug-adapter.ts`) so both the construction-time
61
+ * retry (`openWithLockRetry` in this file) and the query-time retry
62
+ * (`withLbugDb` in `lbug-adapter.ts`) consult the same matcher. Callers
63
+ * import directly from this module — no re-export to keep in sync.
64
+ */
65
+ export const isDbBusyError = (err) => {
66
+ const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
67
+ // `lock` already subsumes `could not set lock`; the broader term is kept
68
+ // because graph-DB transient errors include "deadlock", "lock contention",
69
+ // and the LadybugDB native module's "could not set lock on file" — all of
70
+ // which deserve a retry. If a non-transient lock-shaped error ever
71
+ // surfaces (e.g., "lock file missing" during recovery), tighten this
72
+ // matcher rather than raising the retry budget.
73
+ return msg.includes('busy') || msg.includes('lock') || msg.includes('already in use');
74
+ };
42
75
  export function createLbugDatabase(lbugModule, databasePath, options = {}) {
43
- return new lbugModule.Database(databasePath, 0, false, options.readOnly ?? false, LBUG_MAX_DB_SIZE);
76
+ // .d.ts declares fewer args than the native constructor accepts.
77
+ return new lbugModule.Database(databasePath, 0, // bufferManagerSize
78
+ false, // enableCompression (pinned for v0.16.0)
79
+ options.readOnly ?? false, LBUG_MAX_DB_SIZE, true, // autoCheckpoint
80
+ -1, // checkpointThreshold
81
+ options.throwOnWalReplayFailure ?? true, true);
44
82
  }
83
+ // ─── Lock-busy retry tuning knobs ───────────────────────────────────────────
84
+ //
85
+ // All four GitNexus retry pairs that touch native LadybugDB locks live with
86
+ // a comment cross-reference here so an SRE tuning Windows flakes finds them
87
+ // in one grep:
88
+ //
89
+ // 1. OPEN_LOCK_RETRY_ATTEMPTS / OPEN_LOCK_RETRY_DELAY_MS (this file)
90
+ // → `new lbug.Database()` constructor lock failures
91
+ // 2. HANDLE_RELEASE_PROBE_ATTEMPTS / HANDLE_RELEASE_PROBE_DELAY_MS (this file)
92
+ // → post-close fs.open probe to absorb Windows handle-release lag
93
+ // 3. DB_LOCK_RETRY_ATTEMPTS / DB_LOCK_RETRY_DELAY_MS (lbug-adapter.ts withLbugDb)
94
+ // → query-time busy/lock retry around already-open connections
95
+ //
96
+ // `new lbug.Database()` calls into the native module which performs an
97
+ // OS-level exclusive lock on `<dbPath>`. On Windows that lock can fail
98
+ // for reasons specific to the OS (Defender briefly opens new files,
99
+ // libuv handle release lags the JS-side close). 5 attempts × 100ms
100
+ // linear back-off (max sleep 100+200+300+400 = 1s, plus 5 ctor RTTs
101
+ // of 10–50ms each = ~1.0–1.2s worst case) clears the typical
102
+ // AV-scanner hold without masking real cross-process conflicts.
103
+ //
104
+ // Source: https://github.com/LadybugDB/ladybug/blob/v0.16.1/src/common/file_system/local_file_system.cpp#L126
105
+ const OPEN_LOCK_RETRY_ATTEMPTS = 5;
106
+ const OPEN_LOCK_RETRY_DELAY_MS = 100;
107
+ const HANDLE_RELEASE_PROBE_ATTEMPTS = 5;
108
+ const HANDLE_RELEASE_PROBE_DELAY_MS = 50;
109
+ const HANDLE_RELEASE_LOCK_CODES = new Set(['EBUSY', 'EPERM', 'EACCES']);
110
+ /**
111
+ * Test-fixture directory prefixes recognized by `isTestFixturePath`.
112
+ *
113
+ * IMPORTANT: this list must stay in sync with the prefixes passed to
114
+ * `createTempDir` in `gitnexus/test/helpers/test-db.ts` and the prefixes
115
+ * used by `withTestLbugDB` (`gitnexus/test/helpers/test-indexed-db.ts`).
116
+ * If you add a new test that passes a custom prefix to `createTempDir`,
117
+ * add it here too — otherwise the stale-sidecar sweep silently won't
118
+ * fire for that fixture and CI flakes return.
119
+ *
120
+ * The default `createTempDir('gitnexus-test-')` and the lbug variant
121
+ * `'gitnexus-lbug-'` cover today's call sites.
122
+ */
123
+ const TEST_FIXTURE_PREFIXES = ['gitnexus-lbug-', 'gitnexus-test-'];
124
+ /**
125
+ * Marker symbol attached to lock errors after `openWithLockRetry` exhausts
126
+ * its budget. `withLbugDb`'s outer query-time retry consults this so it
127
+ * does not re-retry a path that just spent up to ~1.5s in the open-time
128
+ * loop — preventing 6s tail latencies (3× outer × 5× inner attempts).
129
+ *
130
+ * The symbol is internal to GitNexus; consumers should treat the underlying
131
+ * error message as the user-visible signal.
132
+ */
133
+ export const LBUG_OPEN_RETRY_EXHAUSTED = Symbol.for('gitnexus.lbug.openRetryExhausted');
134
+ export const isOpenRetryExhausted = (err) => {
135
+ if (err === null || err === undefined || typeof err !== 'object')
136
+ return false;
137
+ return err[LBUG_OPEN_RETRY_EXHAUSTED] === true;
138
+ };
139
+ const tagOpenRetryExhausted = (err) => {
140
+ if (err && typeof err === 'object') {
141
+ err[LBUG_OPEN_RETRY_EXHAUSTED] = true;
142
+ }
143
+ return err;
144
+ };
145
+ /**
146
+ * True when `dbPath` resolves to a recognized test fixture under the OS
147
+ * temp directory. Used to gate the stale-sidecar sweep so production
148
+ * paths never have their `.wal` / `.lock` files deleted.
149
+ *
150
+ * Defensive shape:
151
+ * - `path.resolve` normalizes `..` segments before the prefix check, so
152
+ * `<tmp>/gitnexus-lbug-x/../../etc/passwd` is rejected.
153
+ * - The tmpRoot check trims any trailing separator returned by some
154
+ * Windows TMP configurations (`C:\Users\X\Temp\`) so the startsWith
155
+ * comparison stays correct.
156
+ * - Only the IMMEDIATE parent directory is matched against the prefix
157
+ * list. An ancestor walk would let a tmpdir whose own basename starts
158
+ * with `gitnexus-lbug-` accept arbitrary nested paths under it.
159
+ */
160
+ const isTestFixturePath = (dbPath) => {
161
+ const tmpRoot = os.tmpdir().replace(new RegExp(`${path.sep === '\\' ? '\\\\' : path.sep}+$`), '');
162
+ const resolved = path.resolve(dbPath);
163
+ if (!resolved.startsWith(tmpRoot + path.sep) && resolved !== tmpRoot)
164
+ return false;
165
+ const parentBase = path.basename(path.dirname(resolved));
166
+ return TEST_FIXTURE_PREFIXES.some((p) => parentBase.startsWith(p));
167
+ };
168
+ /** Exported only for direct unit testing — production callers use `openWithLockRetry`. */
169
+ export const _isTestFixturePathForTest = isTestFixturePath;
170
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
171
+ /**
172
+ * Attempt to remove stale `.wal` / `.lock` sidecars that a previous aborted
173
+ * test run may have left behind. Best-effort: ENOENT is normal, anything
174
+ * else is swallowed so the caller's retry can surface the original error.
175
+ */
176
+ const sweepStaleSidecars = async (dbPath) => {
177
+ for (const suffix of ['.wal', '.lock']) {
178
+ try {
179
+ await fs.unlink(dbPath + suffix);
180
+ }
181
+ catch {
182
+ /* missing sidecar or permission error — let the open retry surface it */
183
+ }
184
+ }
185
+ };
186
+ /**
187
+ * Run `construct` with bounded retries when `new lbug.Database(...)` throws
188
+ * a busy/lock error. The original (loop-captured) error is preferred over
189
+ * any post-sweep error so triage sees the real LadybugDB lock message.
190
+ * On exhaustion the rethrown error is tagged via
191
+ * `LBUG_OPEN_RETRY_EXHAUSTED` so the outer query-time retry in
192
+ * `withLbugDb` skips re-retrying a freshly-exhausted path.
193
+ */
194
+ const openWithLockRetry = async (construct, dbPath) => {
195
+ let originalLockError;
196
+ for (let attempt = 1; attempt <= OPEN_LOCK_RETRY_ATTEMPTS; attempt++) {
197
+ try {
198
+ return construct();
199
+ }
200
+ catch (err) {
201
+ if (!isDbBusyError(err))
202
+ throw err;
203
+ originalLockError = err;
204
+ if (attempt === OPEN_LOCK_RETRY_ATTEMPTS)
205
+ break;
206
+ await sleep(OPEN_LOCK_RETRY_DELAY_MS * attempt);
207
+ }
208
+ }
209
+ // Final defense: only for recognized test fixtures, sweep stale sidecars
210
+ // (a prior aborted test run can leave a `.wal` lock that survives the
211
+ // tmp dir cleanup). Production paths never reach this branch — the guard
212
+ // requires the immediate parent dir to match a test prefix AND the
213
+ // resolved path to live under the OS temp directory.
214
+ if (isTestFixturePath(dbPath)) {
215
+ await sweepStaleSidecars(dbPath);
216
+ try {
217
+ return construct();
218
+ }
219
+ catch {
220
+ // Intentionally do NOT overwrite originalLockError. The user-actionable
221
+ // signal is "we exhausted lock retries" — a different error from the
222
+ // post-sweep attempt is less useful than the lock failure that drove
223
+ // the sweep in the first place.
224
+ }
225
+ }
226
+ throw tagOpenRetryExhausted(originalLockError);
227
+ };
45
228
  export async function openLbugConnection(lbugModule, databasePath, options = {}) {
46
229
  let db;
47
230
  try {
48
- db = createLbugDatabase(lbugModule, databasePath, options);
231
+ db = await openWithLockRetry(() => createLbugDatabase(lbugModule, databasePath, options), databasePath);
49
232
  return { db, conn: new lbugModule.Connection(db) };
50
233
  }
51
234
  catch (err) {
@@ -58,3 +241,63 @@ export async function closeLbugConnection(handle) {
58
241
  await handle.conn.close().catch(() => { });
59
242
  await handle.db.close().catch(() => { });
60
243
  }
244
+ /**
245
+ * Probe `dbPath` AND its `.wal` sidecar after `db.close()` so any
246
+ * residual native file handle surfaces as EBUSY/EPERM/EACCES and the
247
+ * bounded retry absorbs the release lag. Windows-only — Linux/macOS do
248
+ * not exhibit this race.
249
+ *
250
+ * Both files matter. Empirically, on rapid open→close→reopen cycles the
251
+ * main `dbPath` handle releases first; the `.wal` handle from the
252
+ * previous Database lingers and the new Database's first write (CREATE
253
+ * NODE TABLE during schema init) fails with "Could not set lock on
254
+ * file". Probing both makes safeClose actually return when the kernel
255
+ * is fully done with the path.
256
+ *
257
+ * Returns `true` when both probes succeeded (or skipped on non-lock
258
+ * errors / missing files). Returns `false` when either probe exhausted
259
+ * its budget with a lock code still in flight.
260
+ *
261
+ * Defensive shape:
262
+ * - Opens read+write (`'r+'`) so the probe actually surfaces exclusive
263
+ * locks held by the previous Database. A read-only probe (`'r'`) is
264
+ * insufficient — Windows will grant read access while the previous
265
+ * handle's exclusive write lock is still in flight, which lets
266
+ * `safeClose` return before the next CREATE NODE TABLE can lock the
267
+ * file.
268
+ * - `try/finally` around `handle.close()` guarantees no fd leak even
269
+ * if close itself throws.
270
+ */
271
+ export const waitForWindowsHandleRelease = async (dbPath) => {
272
+ const mainReleased = await probeSinglePath(dbPath);
273
+ const walReleased = await probeSinglePath(dbPath + '.wal');
274
+ return mainReleased && walReleased;
275
+ };
276
+ const probeSinglePath = async (filePath) => {
277
+ for (let attempt = 1; attempt <= HANDLE_RELEASE_PROBE_ATTEMPTS; attempt++) {
278
+ let handle;
279
+ try {
280
+ handle = await fs.open(filePath, 'r+');
281
+ return true;
282
+ }
283
+ catch (err) {
284
+ const code = err?.code;
285
+ if (!code || !HANDLE_RELEASE_LOCK_CODES.has(code))
286
+ return true; // ENOENT / unrelated → not our problem
287
+ if (attempt === HANDLE_RELEASE_PROBE_ATTEMPTS)
288
+ return false;
289
+ await sleep(HANDLE_RELEASE_PROBE_DELAY_MS * attempt);
290
+ }
291
+ finally {
292
+ if (handle) {
293
+ try {
294
+ await handle.close();
295
+ }
296
+ catch {
297
+ /* swallow — caller cannot do anything useful with a probe-close failure */
298
+ }
299
+ }
300
+ }
301
+ }
302
+ return false;
303
+ };
@@ -17,7 +17,7 @@
17
17
  import fs from 'fs/promises';
18
18
  import lbug from '@ladybugdb/core';
19
19
  import { loadFTSExtension } from './lbug-adapter.js';
20
- import { createLbugDatabase } from './lbug-config.js';
20
+ import { createLbugDatabase, isWalCorruptionError } from './lbug-config.js';
21
21
  const pool = new Map();
22
22
  const poolCloseListeners = new Set();
23
23
  /**
@@ -51,7 +51,7 @@ let idleTimer = null;
51
51
  // @ladybugdb/core), corrupting stdout in the pre-sentinel window. Routing
52
52
  // through the leaf breaks that chain.
53
53
  export { realStdoutWrite, realStderrWrite, setActiveStdoutWrite } from '../../mcp/stdio-capture.js';
54
- import { getActiveStdoutWrite } from '../../mcp/stdio-capture.js';
54
+ import { getActiveStdoutWrite, realStderrWrite } from '../../mcp/stdio-capture.js';
55
55
  let stdoutSilenceCount = 0;
56
56
  /** True while pre-warming connections — prevents watchdog from prematurely restoring stdout */
57
57
  let preWarmActive = false;
@@ -203,6 +203,44 @@ const QUERY_TIMEOUT_MS = 30_000;
203
203
  const WAITER_TIMEOUT_MS = 15_000;
204
204
  const LOCK_RETRY_ATTEMPTS = 3;
205
205
  const LOCK_RETRY_DELAY_MS = 2000;
206
+ async function openReadOnlyDatabase(dbPath) {
207
+ let db;
208
+ silenceStdout();
209
+ try {
210
+ db = createLbugDatabase(lbug, dbPath, {
211
+ readOnly: true,
212
+ throwOnWalReplayFailure: false,
213
+ });
214
+ await db.init();
215
+ return db;
216
+ }
217
+ catch (err) {
218
+ if (db)
219
+ await db.close().catch(() => { });
220
+ throw err;
221
+ }
222
+ finally {
223
+ restoreStdout();
224
+ }
225
+ }
226
+ /**
227
+ * Quarantine the .wal file and retry opening the database.
228
+ * Used when the initial open fails with a WAL corruption error.
229
+ */
230
+ async function tryQuarantineAndReopen(dbPath, repoId) {
231
+ const walPath = dbPath + '.wal';
232
+ const quarantineName = `${walPath}.corrupt.${Date.now()}-${Math.random().toString(36).slice(2)}`;
233
+ try {
234
+ await fs.rename(walPath, quarantineName);
235
+ }
236
+ catch {
237
+ throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
238
+ `Run \`gitnexus analyze\` to rebuild the index. (quarantine failed)`);
239
+ }
240
+ realStderrWrite(`GitNexus: LadybugDB WAL quarantined for ${repoId}; graph may be stale. ` +
241
+ `Run \`gitnexus analyze\` to rebuild the index.\n`);
242
+ return await openReadOnlyDatabase(dbPath);
243
+ }
206
244
  /** Deduplicates concurrent initLbug calls for the same repoId */
207
245
  const initPromises = new Map();
208
246
  /**
@@ -256,17 +294,27 @@ async function doInitLbug(repoId, dbPath) {
256
294
  // avoids lock conflicts when `gitnexus analyze` is writing.
257
295
  let lastError = null;
258
296
  for (let attempt = 1; attempt <= LOCK_RETRY_ATTEMPTS; attempt++) {
259
- silenceStdout();
260
297
  try {
261
- const db = createLbugDatabase(lbug, dbPath, { readOnly: true });
262
- restoreStdout();
298
+ const db = await openReadOnlyDatabase(dbPath);
263
299
  shared = { db, refCount: 0, ftsLoaded: false };
264
300
  dbCache.set(dbPath, shared);
265
301
  break;
266
302
  }
267
303
  catch (err) {
268
- restoreStdout();
269
304
  lastError = err instanceof Error ? err : new Error(String(err));
305
+ if (isWalCorruptionError(lastError)) {
306
+ try {
307
+ const db = await tryQuarantineAndReopen(dbPath, repoId);
308
+ shared = { db, refCount: 0, ftsLoaded: false };
309
+ dbCache.set(dbPath, shared);
310
+ break;
311
+ }
312
+ catch (retryErr) {
313
+ throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
314
+ `Run \`gitnexus analyze\` to rebuild the index. ` +
315
+ `(${retryErr instanceof Error ? retryErr.message : String(retryErr)})`);
316
+ }
317
+ }
270
318
  const isLockError = lastError.message.includes('Could not set lock') || lastError.message.includes('lock');
271
319
  if (!isLockError || attempt === LOCK_RETRY_ATTEMPTS)
272
320
  break;
@@ -259,6 +259,7 @@ export declare class LocalBackend {
259
259
  * UID-based direct lookup. No cluster in output.
260
260
  */
261
261
  private context;
262
+ private _contextImpl;
262
263
  /**
263
264
  * Legacy explore — kept for backwards compatibility with resources.ts.
264
265
  * Routes cluster/process types to direct graph queries.
@@ -8,6 +8,7 @@
8
8
  import fs from 'fs/promises';
9
9
  import path from 'path';
10
10
  import { initLbug, executeQuery, executeParameterized, closeLbug, isLbugReady, isWriteQuery, } from '../../core/lbug/pool-adapter.js';
11
+ import { isWalCorruptionError, WAL_RECOVERY_SUGGESTION } from '../../core/lbug/lbug-config.js';
11
12
  export { isWriteQuery };
12
13
  // Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
13
14
  // at MCP server startup — crashes on unsupported Node ABI versions (#89)
@@ -1022,7 +1023,14 @@ export class LocalBackend {
1022
1023
  return result;
1023
1024
  }
1024
1025
  catch (err) {
1025
- return { error: err.message || 'Query failed' };
1026
+ const msg = err.message || 'Query failed';
1027
+ if (isWalCorruptionError(err)) {
1028
+ return {
1029
+ error: msg,
1030
+ recoverySuggestion: WAL_RECOVERY_SUGGESTION,
1031
+ };
1032
+ }
1033
+ return { error: msg };
1026
1034
  }
1027
1035
  }
1028
1036
  /**
@@ -1389,6 +1397,21 @@ export class LocalBackend {
1389
1397
  * UID-based direct lookup. No cluster in output.
1390
1398
  */
1391
1399
  async context(repo, params) {
1400
+ try {
1401
+ return await this._contextImpl(repo, params);
1402
+ }
1403
+ catch (err) {
1404
+ const msg = (err instanceof Error ? err.message : String(err)) || 'Context query failed';
1405
+ if (isWalCorruptionError(err)) {
1406
+ return {
1407
+ error: msg,
1408
+ recoverySuggestion: WAL_RECOVERY_SUGGESTION,
1409
+ };
1410
+ }
1411
+ throw err;
1412
+ }
1413
+ }
1414
+ async _contextImpl(repo, params) {
1392
1415
  await this.ensureInitialized(repo.id);
1393
1416
  const { name, uid, file_path, kind, include_content } = params;
1394
1417
  if (!name && !uid) {
@@ -1990,6 +2013,7 @@ export class LocalBackend {
1990
2013
  impactedCount: 0,
1991
2014
  risk: 'UNKNOWN',
1992
2015
  suggestion: 'The graph query failed — try gitnexus context <symbol> as a fallback',
2016
+ ...(isWalCorruptionError(err) ? { recoverySuggestion: WAL_RECOVERY_SUGGESTION } : {}),
1993
2017
  };
1994
2018
  }
1995
2019
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.91",
3
+ "version": "1.6.4-rc.93",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",