gitnexus 1.6.4-rc.91 → 1.6.4-rc.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/lbug/lbug-adapter.d.ts +0 -6
- package/dist/core/lbug/lbug-adapter.js +34 -15
- package/dist/core/lbug/lbug-config.d.ts +56 -0
- package/dist/core/lbug/lbug-config.js +245 -2
- package/dist/core/lbug/pool-adapter.js +54 -6
- package/dist/mcp/local/local-backend.d.ts +1 -0
- package/dist/mcp/local/local-backend.js +25 -1
- package/package.json +1 -1
|
@@ -32,12 +32,6 @@ export interface RelCsvSplitResult {
|
|
|
32
32
|
export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
|
|
33
33
|
/** Expose the current Database for pool adapter reuse in tests. */
|
|
34
34
|
export declare const getDatabase: () => lbug.Database | null;
|
|
35
|
-
/**
|
|
36
|
-
* Return true when the error message indicates that another process holds
|
|
37
|
-
* an exclusive lock on the LadybugDB file (e.g. `gitnexus analyze` or
|
|
38
|
-
* `gitnexus serve` running at the same time).
|
|
39
|
-
*/
|
|
40
|
-
export declare const isDbBusyError: (err: unknown) => boolean;
|
|
41
35
|
/**
|
|
42
36
|
* Return true when the error message indicates a write was attempted against
|
|
43
37
|
* a read-only LadybugDB connection. The MCP query pool opens DBs read-only,
|
|
@@ -8,7 +8,7 @@ import lbug from '@ladybugdb/core';
|
|
|
8
8
|
import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
|
|
9
9
|
import { streamAllCSVsToDisk } from './csv-generator.js';
|
|
10
10
|
import { extensionManager } from './extension-loader.js';
|
|
11
|
-
import { closeLbugConnection, openLbugConnection, } from './lbug-config.js';
|
|
11
|
+
import { closeLbugConnection, isDbBusyError, isOpenRetryExhausted, openLbugConnection, waitForWindowsHandleRelease, } from './lbug-config.js';
|
|
12
12
|
import { isVectorExtensionSupportedByPlatform } from '../platform/capabilities.js';
|
|
13
13
|
import { logger } from '../logger.js';
|
|
14
14
|
/**
|
|
@@ -140,18 +140,6 @@ let sessionLock = Promise.resolve();
|
|
|
140
140
|
const DB_LOCK_RETRY_ATTEMPTS = 3;
|
|
141
141
|
/** Base back-off in ms between BUSY retries (multiplied by attempt number). */
|
|
142
142
|
const DB_LOCK_RETRY_DELAY_MS = 500;
|
|
143
|
-
/**
|
|
144
|
-
* Return true when the error message indicates that another process holds
|
|
145
|
-
* an exclusive lock on the LadybugDB file (e.g. `gitnexus analyze` or
|
|
146
|
-
* `gitnexus serve` running at the same time).
|
|
147
|
-
*/
|
|
148
|
-
export const isDbBusyError = (err) => {
|
|
149
|
-
const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
150
|
-
return (msg.includes('busy') ||
|
|
151
|
-
msg.includes('lock') ||
|
|
152
|
-
msg.includes('already in use') ||
|
|
153
|
-
msg.includes('could not set lock'));
|
|
154
|
-
};
|
|
155
143
|
/**
|
|
156
144
|
* Return true when the error message indicates a write was attempted against
|
|
157
145
|
* a read-only LadybugDB connection. The MCP query pool opens DBs read-only,
|
|
@@ -201,7 +189,11 @@ export const withLbugDb = async (dbPath, operation) => {
|
|
|
201
189
|
}
|
|
202
190
|
catch (err) {
|
|
203
191
|
lastError = err;
|
|
204
|
-
|
|
192
|
+
// Skip outer retry when the inner open-retry already exhausted: the
|
|
193
|
+
// ~1.5s open-time budget was just spent, repeating the full reset+
|
|
194
|
+
// reopen cycle would only add 4-5s of tail latency without changing
|
|
195
|
+
// the outcome (both layers consult the same isDbBusyError matcher).
|
|
196
|
+
if (!isDbBusyError(err) || isOpenRetryExhausted(err) || attempt === DB_LOCK_RETRY_ATTEMPTS) {
|
|
205
197
|
throw err;
|
|
206
198
|
}
|
|
207
199
|
// Close stale connection inside the session lock to prevent race conditions
|
|
@@ -274,7 +266,16 @@ const doInitLbug = async (dbPath) => {
|
|
|
274
266
|
}
|
|
275
267
|
catch (err) {
|
|
276
268
|
const msg = err instanceof Error ? err.message : String(err);
|
|
277
|
-
|
|
269
|
+
// Suppression list:
|
|
270
|
+
// - "already exists": expected idempotent re-create on existing DBs
|
|
271
|
+
// - "could not set lock on file": LadybugDB v0.16.1 emits this on
|
|
272
|
+
// Windows when CREATE NODE TABLE runs against a path that was
|
|
273
|
+
// just opened (the WAL handle from a fresh Database briefly
|
|
274
|
+
// contests the table's first-write lock). The table is created
|
|
275
|
+
// anyway and any genuine cross-process lock contention surfaces
|
|
276
|
+
// on the next operation via withLbugDb's retry. Logging it here
|
|
277
|
+
// would just be noise in CI.
|
|
278
|
+
if (!msg.includes('already exists') && !isDbBusyError(err)) {
|
|
278
279
|
logger.warn(`⚠️ Schema creation warning: ${msg.slice(0, 120)}`);
|
|
279
280
|
}
|
|
280
281
|
}
|
|
@@ -940,6 +941,9 @@ export const flushWAL = async () => {
|
|
|
940
941
|
*/
|
|
941
942
|
export const safeClose = async () => {
|
|
942
943
|
await flushWAL();
|
|
944
|
+
// Capture before close — currentDbPath stays set so the Windows post-close
|
|
945
|
+
// probe below knows which file to wait on.
|
|
946
|
+
const closingDbPath = currentDbPath;
|
|
943
947
|
if (conn) {
|
|
944
948
|
try {
|
|
945
949
|
// eslint-disable-next-line no-restricted-syntax -- sole authorised close site
|
|
@@ -960,6 +964,21 @@ export const safeClose = async () => {
|
|
|
960
964
|
}
|
|
961
965
|
db = null;
|
|
962
966
|
}
|
|
967
|
+
// Windows: libuv reports `db.close()` resolved before the kernel has
|
|
968
|
+
// released the file handle. A subsequent `new Database(samePath)` in
|
|
969
|
+
// the same process can race the release. The probe (lbug-config.ts)
|
|
970
|
+
// forces any residual lock to surface as EBUSY/EPERM/EACCES so the
|
|
971
|
+
// open-time retry absorbs the lag.
|
|
972
|
+
if (process.platform === 'win32' && closingDbPath) {
|
|
973
|
+
const released = await waitForWindowsHandleRelease(closingDbPath);
|
|
974
|
+
if (!released) {
|
|
975
|
+
// Probe exhausted with a lock code still in flight. The next
|
|
976
|
+
// openLbugConnection will absorb whatever residual lag remains, but
|
|
977
|
+
// a chronic warning helps operators spot AV interference (Windows
|
|
978
|
+
// Defender holding the file far past the 250ms budget).
|
|
979
|
+
logger.warn({ dbPath: closingDbPath }, '⚠️ LadybugDB file handle still locked after close (Windows). If this repeats, check antivirus/Defender exclusions for the GitNexus storage directory.');
|
|
980
|
+
}
|
|
981
|
+
}
|
|
963
982
|
};
|
|
964
983
|
export const closeLbug = async () => {
|
|
965
984
|
await safeClose();
|
|
@@ -32,15 +32,71 @@ import type lbug from '@ladybugdb/core';
|
|
|
32
32
|
* integer; anything invalid falls back to the default.
|
|
33
33
|
*/
|
|
34
34
|
export declare const LBUG_MAX_DB_SIZE: number;
|
|
35
|
+
export declare const WAL_RECOVERY_SUGGESTION = "WAL corruption detected. Run `gitnexus analyze` to rebuild the index.";
|
|
36
|
+
export declare function isWalCorruptionError(err: unknown): boolean;
|
|
35
37
|
type LbugModule = typeof lbug;
|
|
36
38
|
export interface LbugDatabaseOptions {
|
|
37
39
|
readOnly?: boolean;
|
|
40
|
+
throwOnWalReplayFailure?: boolean;
|
|
38
41
|
}
|
|
39
42
|
export interface LbugConnectionHandle {
|
|
40
43
|
db: lbug.Database;
|
|
41
44
|
conn: lbug.Connection;
|
|
42
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Return true when the error message indicates that a LadybugDB file lock
|
|
48
|
+
* could not be acquired — either at construction time
|
|
49
|
+
* (`new lbug.Database(...)` raises from `local_file_system.cpp`) or during
|
|
50
|
+
* a query (another writer holds the exclusive lock).
|
|
51
|
+
*
|
|
52
|
+
* Lives here (not in `lbug-adapter.ts`) so both the construction-time
|
|
53
|
+
* retry (`openWithLockRetry` in this file) and the query-time retry
|
|
54
|
+
* (`withLbugDb` in `lbug-adapter.ts`) consult the same matcher. Callers
|
|
55
|
+
* import directly from this module — no re-export to keep in sync.
|
|
56
|
+
*/
|
|
57
|
+
export declare const isDbBusyError: (err: unknown) => boolean;
|
|
43
58
|
export declare function createLbugDatabase(lbugModule: LbugModule, databasePath: string, options?: LbugDatabaseOptions): lbug.Database;
|
|
59
|
+
/**
|
|
60
|
+
* Marker symbol attached to lock errors after `openWithLockRetry` exhausts
|
|
61
|
+
* its budget. `withLbugDb`'s outer query-time retry consults this so it
|
|
62
|
+
* does not re-retry a path that just spent up to ~1.5s in the open-time
|
|
63
|
+
* loop — preventing 6s tail latencies (3× outer × 5× inner attempts).
|
|
64
|
+
*
|
|
65
|
+
* The symbol is internal to GitNexus; consumers should treat the underlying
|
|
66
|
+
* error message as the user-visible signal.
|
|
67
|
+
*/
|
|
68
|
+
export declare const LBUG_OPEN_RETRY_EXHAUSTED: unique symbol;
|
|
69
|
+
export declare const isOpenRetryExhausted: (err: unknown) => boolean;
|
|
70
|
+
/** Exported only for direct unit testing — production callers use `openWithLockRetry`. */
|
|
71
|
+
export declare const _isTestFixturePathForTest: (dbPath: string) => boolean;
|
|
44
72
|
export declare function openLbugConnection(lbugModule: LbugModule, databasePath: string, options?: LbugDatabaseOptions): Promise<LbugConnectionHandle>;
|
|
45
73
|
export declare function closeLbugConnection(handle: LbugConnectionHandle): Promise<void>;
|
|
74
|
+
/**
|
|
75
|
+
* Probe `dbPath` AND its `.wal` sidecar after `db.close()` so any
|
|
76
|
+
* residual native file handle surfaces as EBUSY/EPERM/EACCES and the
|
|
77
|
+
* bounded retry absorbs the release lag. Windows-only — Linux/macOS do
|
|
78
|
+
* not exhibit this race.
|
|
79
|
+
*
|
|
80
|
+
* Both files matter. Empirically, on rapid open→close→reopen cycles the
|
|
81
|
+
* main `dbPath` handle releases first; the `.wal` handle from the
|
|
82
|
+
* previous Database lingers and the new Database's first write (CREATE
|
|
83
|
+
* NODE TABLE during schema init) fails with "Could not set lock on
|
|
84
|
+
* file". Probing both makes safeClose actually return when the kernel
|
|
85
|
+
* is fully done with the path.
|
|
86
|
+
*
|
|
87
|
+
* Returns `true` when both probes succeeded (or skipped on non-lock
|
|
88
|
+
* errors / missing files). Returns `false` when either probe exhausted
|
|
89
|
+
* its budget with a lock code still in flight.
|
|
90
|
+
*
|
|
91
|
+
* Defensive shape:
|
|
92
|
+
* - Opens read+write (`'r+'`) so the probe actually surfaces exclusive
|
|
93
|
+
* locks held by the previous Database. A read-only probe (`'r'`) is
|
|
94
|
+
* insufficient — Windows will grant read access while the previous
|
|
95
|
+
* handle's exclusive write lock is still in flight, which lets
|
|
96
|
+
* `safeClose` return before the next CREATE NODE TABLE can lock the
|
|
97
|
+
* file.
|
|
98
|
+
* - `try/finally` around `handle.close()` guarantees no fd leak even
|
|
99
|
+
* if close itself throws.
|
|
100
|
+
*/
|
|
101
|
+
export declare const waitForWindowsHandleRelease: (dbPath: string) => Promise<boolean>;
|
|
46
102
|
export {};
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import os from 'os';
|
|
3
|
+
import path from 'path';
|
|
1
4
|
/**
|
|
2
5
|
* Shared configuration for `@ladybugdb/core` `Database` construction.
|
|
3
6
|
*
|
|
@@ -39,13 +42,193 @@ export const LBUG_MAX_DB_SIZE = (() => {
|
|
|
39
42
|
}
|
|
40
43
|
return 16 * 1024 * 1024 * 1024;
|
|
41
44
|
})();
|
|
45
|
+
/** Matches WAL corruption errors from the LadybugDB engine. */
|
|
46
|
+
const WAL_CORRUPTION_RE = /corrupt(ed)?\s+wal|invalid\s+wal\s+record|wal.*corrupt|checksum.*wal/i;
|
|
47
|
+
export const WAL_RECOVERY_SUGGESTION = 'WAL corruption detected. Run `gitnexus analyze` to rebuild the index.';
|
|
48
|
+
export function isWalCorruptionError(err) {
|
|
49
|
+
if (!err)
|
|
50
|
+
return false;
|
|
51
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
52
|
+
return WAL_CORRUPTION_RE.test(msg);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Return true when the error message indicates that a LadybugDB file lock
|
|
56
|
+
* could not be acquired — either at construction time
|
|
57
|
+
* (`new lbug.Database(...)` raises from `local_file_system.cpp`) or during
|
|
58
|
+
* a query (another writer holds the exclusive lock).
|
|
59
|
+
*
|
|
60
|
+
* Lives here (not in `lbug-adapter.ts`) so both the construction-time
|
|
61
|
+
* retry (`openWithLockRetry` in this file) and the query-time retry
|
|
62
|
+
* (`withLbugDb` in `lbug-adapter.ts`) consult the same matcher. Callers
|
|
63
|
+
* import directly from this module — no re-export to keep in sync.
|
|
64
|
+
*/
|
|
65
|
+
export const isDbBusyError = (err) => {
|
|
66
|
+
const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
67
|
+
// `lock` already subsumes `could not set lock`; the broader term is kept
|
|
68
|
+
// because graph-DB transient errors include "deadlock", "lock contention",
|
|
69
|
+
// and the LadybugDB native module's "could not set lock on file" — all of
|
|
70
|
+
// which deserve a retry. If a non-transient lock-shaped error ever
|
|
71
|
+
// surfaces (e.g., "lock file missing" during recovery), tighten this
|
|
72
|
+
// matcher rather than raising the retry budget.
|
|
73
|
+
return msg.includes('busy') || msg.includes('lock') || msg.includes('already in use');
|
|
74
|
+
};
|
|
42
75
|
export function createLbugDatabase(lbugModule, databasePath, options = {}) {
|
|
43
|
-
|
|
76
|
+
// .d.ts declares fewer args than the native constructor accepts.
|
|
77
|
+
return new lbugModule.Database(databasePath, 0, // bufferManagerSize
|
|
78
|
+
false, // enableCompression (pinned for v0.16.0)
|
|
79
|
+
options.readOnly ?? false, LBUG_MAX_DB_SIZE, true, // autoCheckpoint
|
|
80
|
+
-1, // checkpointThreshold
|
|
81
|
+
options.throwOnWalReplayFailure ?? true, true);
|
|
44
82
|
}
|
|
83
|
+
// ─── Lock-busy retry tuning knobs ───────────────────────────────────────────
|
|
84
|
+
//
|
|
85
|
+
// All four GitNexus retry pairs that touch native LadybugDB locks live with
|
|
86
|
+
// a comment cross-reference here so an SRE tuning Windows flakes finds them
|
|
87
|
+
// in one grep:
|
|
88
|
+
//
|
|
89
|
+
// 1. OPEN_LOCK_RETRY_ATTEMPTS / OPEN_LOCK_RETRY_DELAY_MS (this file)
|
|
90
|
+
// → `new lbug.Database()` constructor lock failures
|
|
91
|
+
// 2. HANDLE_RELEASE_PROBE_ATTEMPTS / HANDLE_RELEASE_PROBE_DELAY_MS (this file)
|
|
92
|
+
// → post-close fs.open probe to absorb Windows handle-release lag
|
|
93
|
+
// 3. DB_LOCK_RETRY_ATTEMPTS / DB_LOCK_RETRY_DELAY_MS (lbug-adapter.ts withLbugDb)
|
|
94
|
+
// → query-time busy/lock retry around already-open connections
|
|
95
|
+
//
|
|
96
|
+
// `new lbug.Database()` calls into the native module which performs an
|
|
97
|
+
// OS-level exclusive lock on `<dbPath>`. On Windows that lock can fail
|
|
98
|
+
// for reasons specific to the OS (Defender briefly opens new files,
|
|
99
|
+
// libuv handle release lags the JS-side close). 5 attempts × 100ms
|
|
100
|
+
// linear back-off (max sleep 100+200+300+400 = 1s, plus 5 ctor RTTs
|
|
101
|
+
// of 10–50ms each = ~1.0–1.2s worst case) clears the typical
|
|
102
|
+
// AV-scanner hold without masking real cross-process conflicts.
|
|
103
|
+
//
|
|
104
|
+
// Source: https://github.com/LadybugDB/ladybug/blob/v0.16.1/src/common/file_system/local_file_system.cpp#L126
|
|
105
|
+
const OPEN_LOCK_RETRY_ATTEMPTS = 5;
|
|
106
|
+
const OPEN_LOCK_RETRY_DELAY_MS = 100;
|
|
107
|
+
const HANDLE_RELEASE_PROBE_ATTEMPTS = 5;
|
|
108
|
+
const HANDLE_RELEASE_PROBE_DELAY_MS = 50;
|
|
109
|
+
const HANDLE_RELEASE_LOCK_CODES = new Set(['EBUSY', 'EPERM', 'EACCES']);
|
|
110
|
+
/**
|
|
111
|
+
* Test-fixture directory prefixes recognized by `isTestFixturePath`.
|
|
112
|
+
*
|
|
113
|
+
* IMPORTANT: this list must stay in sync with the prefixes passed to
|
|
114
|
+
* `createTempDir` in `gitnexus/test/helpers/test-db.ts` and the prefixes
|
|
115
|
+
* used by `withTestLbugDB` (`gitnexus/test/helpers/test-indexed-db.ts`).
|
|
116
|
+
* If you add a new test that passes a custom prefix to `createTempDir`,
|
|
117
|
+
* add it here too — otherwise the stale-sidecar sweep silently won't
|
|
118
|
+
* fire for that fixture and CI flakes return.
|
|
119
|
+
*
|
|
120
|
+
* The default `createTempDir('gitnexus-test-')` and the lbug variant
|
|
121
|
+
* `'gitnexus-lbug-'` cover today's call sites.
|
|
122
|
+
*/
|
|
123
|
+
const TEST_FIXTURE_PREFIXES = ['gitnexus-lbug-', 'gitnexus-test-'];
|
|
124
|
+
/**
|
|
125
|
+
* Marker symbol attached to lock errors after `openWithLockRetry` exhausts
|
|
126
|
+
* its budget. `withLbugDb`'s outer query-time retry consults this so it
|
|
127
|
+
* does not re-retry a path that just spent up to ~1.5s in the open-time
|
|
128
|
+
* loop — preventing 6s tail latencies (3× outer × 5× inner attempts).
|
|
129
|
+
*
|
|
130
|
+
* The symbol is internal to GitNexus; consumers should treat the underlying
|
|
131
|
+
* error message as the user-visible signal.
|
|
132
|
+
*/
|
|
133
|
+
export const LBUG_OPEN_RETRY_EXHAUSTED = Symbol.for('gitnexus.lbug.openRetryExhausted');
|
|
134
|
+
export const isOpenRetryExhausted = (err) => {
|
|
135
|
+
if (err === null || err === undefined || typeof err !== 'object')
|
|
136
|
+
return false;
|
|
137
|
+
return err[LBUG_OPEN_RETRY_EXHAUSTED] === true;
|
|
138
|
+
};
|
|
139
|
+
const tagOpenRetryExhausted = (err) => {
|
|
140
|
+
if (err && typeof err === 'object') {
|
|
141
|
+
err[LBUG_OPEN_RETRY_EXHAUSTED] = true;
|
|
142
|
+
}
|
|
143
|
+
return err;
|
|
144
|
+
};
|
|
145
|
+
/**
|
|
146
|
+
* True when `dbPath` resolves to a recognized test fixture under the OS
|
|
147
|
+
* temp directory. Used to gate the stale-sidecar sweep so production
|
|
148
|
+
* paths never have their `.wal` / `.lock` files deleted.
|
|
149
|
+
*
|
|
150
|
+
* Defensive shape:
|
|
151
|
+
* - `path.resolve` normalizes `..` segments before the prefix check, so
|
|
152
|
+
* `<tmp>/gitnexus-lbug-x/../../etc/passwd` is rejected.
|
|
153
|
+
* - The tmpRoot check trims any trailing separator returned by some
|
|
154
|
+
* Windows TMP configurations (`C:\Users\X\Temp\`) so the startsWith
|
|
155
|
+
* comparison stays correct.
|
|
156
|
+
* - Only the IMMEDIATE parent directory is matched against the prefix
|
|
157
|
+
* list. An ancestor walk would let a tmpdir whose own basename starts
|
|
158
|
+
* with `gitnexus-lbug-` accept arbitrary nested paths under it.
|
|
159
|
+
*/
|
|
160
|
+
const isTestFixturePath = (dbPath) => {
|
|
161
|
+
const tmpRoot = os.tmpdir().replace(new RegExp(`${path.sep === '\\' ? '\\\\' : path.sep}+$`), '');
|
|
162
|
+
const resolved = path.resolve(dbPath);
|
|
163
|
+
if (!resolved.startsWith(tmpRoot + path.sep) && resolved !== tmpRoot)
|
|
164
|
+
return false;
|
|
165
|
+
const parentBase = path.basename(path.dirname(resolved));
|
|
166
|
+
return TEST_FIXTURE_PREFIXES.some((p) => parentBase.startsWith(p));
|
|
167
|
+
};
|
|
168
|
+
/** Exported only for direct unit testing — production callers use `openWithLockRetry`. */
|
|
169
|
+
export const _isTestFixturePathForTest = isTestFixturePath;
|
|
170
|
+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
171
|
+
/**
|
|
172
|
+
* Attempt to remove stale `.wal` / `.lock` sidecars that a previous aborted
|
|
173
|
+
* test run may have left behind. Best-effort: ENOENT is normal, anything
|
|
174
|
+
* else is swallowed so the caller's retry can surface the original error.
|
|
175
|
+
*/
|
|
176
|
+
const sweepStaleSidecars = async (dbPath) => {
|
|
177
|
+
for (const suffix of ['.wal', '.lock']) {
|
|
178
|
+
try {
|
|
179
|
+
await fs.unlink(dbPath + suffix);
|
|
180
|
+
}
|
|
181
|
+
catch {
|
|
182
|
+
/* missing sidecar or permission error — let the open retry surface it */
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
/**
|
|
187
|
+
* Run `construct` with bounded retries when `new lbug.Database(...)` throws
|
|
188
|
+
* a busy/lock error. The original (loop-captured) error is preferred over
|
|
189
|
+
* any post-sweep error so triage sees the real LadybugDB lock message.
|
|
190
|
+
* On exhaustion the rethrown error is tagged via
|
|
191
|
+
* `LBUG_OPEN_RETRY_EXHAUSTED` so the outer query-time retry in
|
|
192
|
+
* `withLbugDb` skips re-retrying a freshly-exhausted path.
|
|
193
|
+
*/
|
|
194
|
+
const openWithLockRetry = async (construct, dbPath) => {
|
|
195
|
+
let originalLockError;
|
|
196
|
+
for (let attempt = 1; attempt <= OPEN_LOCK_RETRY_ATTEMPTS; attempt++) {
|
|
197
|
+
try {
|
|
198
|
+
return construct();
|
|
199
|
+
}
|
|
200
|
+
catch (err) {
|
|
201
|
+
if (!isDbBusyError(err))
|
|
202
|
+
throw err;
|
|
203
|
+
originalLockError = err;
|
|
204
|
+
if (attempt === OPEN_LOCK_RETRY_ATTEMPTS)
|
|
205
|
+
break;
|
|
206
|
+
await sleep(OPEN_LOCK_RETRY_DELAY_MS * attempt);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// Final defense: only for recognized test fixtures, sweep stale sidecars
|
|
210
|
+
// (a prior aborted test run can leave a `.wal` lock that survives the
|
|
211
|
+
// tmp dir cleanup). Production paths never reach this branch — the guard
|
|
212
|
+
// requires the immediate parent dir to match a test prefix AND the
|
|
213
|
+
// resolved path to live under the OS temp directory.
|
|
214
|
+
if (isTestFixturePath(dbPath)) {
|
|
215
|
+
await sweepStaleSidecars(dbPath);
|
|
216
|
+
try {
|
|
217
|
+
return construct();
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
// Intentionally do NOT overwrite originalLockError. The user-actionable
|
|
221
|
+
// signal is "we exhausted lock retries" — a different error from the
|
|
222
|
+
// post-sweep attempt is less useful than the lock failure that drove
|
|
223
|
+
// the sweep in the first place.
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
throw tagOpenRetryExhausted(originalLockError);
|
|
227
|
+
};
|
|
45
228
|
export async function openLbugConnection(lbugModule, databasePath, options = {}) {
|
|
46
229
|
let db;
|
|
47
230
|
try {
|
|
48
|
-
db = createLbugDatabase(lbugModule, databasePath, options);
|
|
231
|
+
db = await openWithLockRetry(() => createLbugDatabase(lbugModule, databasePath, options), databasePath);
|
|
49
232
|
return { db, conn: new lbugModule.Connection(db) };
|
|
50
233
|
}
|
|
51
234
|
catch (err) {
|
|
@@ -58,3 +241,63 @@ export async function closeLbugConnection(handle) {
|
|
|
58
241
|
await handle.conn.close().catch(() => { });
|
|
59
242
|
await handle.db.close().catch(() => { });
|
|
60
243
|
}
|
|
244
|
+
/**
|
|
245
|
+
* Probe `dbPath` AND its `.wal` sidecar after `db.close()` so any
|
|
246
|
+
* residual native file handle surfaces as EBUSY/EPERM/EACCES and the
|
|
247
|
+
* bounded retry absorbs the release lag. Windows-only — Linux/macOS do
|
|
248
|
+
* not exhibit this race.
|
|
249
|
+
*
|
|
250
|
+
* Both files matter. Empirically, on rapid open→close→reopen cycles the
|
|
251
|
+
* main `dbPath` handle releases first; the `.wal` handle from the
|
|
252
|
+
* previous Database lingers and the new Database's first write (CREATE
|
|
253
|
+
* NODE TABLE during schema init) fails with "Could not set lock on
|
|
254
|
+
* file". Probing both makes safeClose actually return when the kernel
|
|
255
|
+
* is fully done with the path.
|
|
256
|
+
*
|
|
257
|
+
* Returns `true` when both probes succeeded (or skipped on non-lock
|
|
258
|
+
* errors / missing files). Returns `false` when either probe exhausted
|
|
259
|
+
* its budget with a lock code still in flight.
|
|
260
|
+
*
|
|
261
|
+
* Defensive shape:
|
|
262
|
+
* - Opens read+write (`'r+'`) so the probe actually surfaces exclusive
|
|
263
|
+
* locks held by the previous Database. A read-only probe (`'r'`) is
|
|
264
|
+
* insufficient — Windows will grant read access while the previous
|
|
265
|
+
* handle's exclusive write lock is still in flight, which lets
|
|
266
|
+
* `safeClose` return before the next CREATE NODE TABLE can lock the
|
|
267
|
+
* file.
|
|
268
|
+
* - `try/finally` around `handle.close()` guarantees no fd leak even
|
|
269
|
+
* if close itself throws.
|
|
270
|
+
*/
|
|
271
|
+
export const waitForWindowsHandleRelease = async (dbPath) => {
|
|
272
|
+
const mainReleased = await probeSinglePath(dbPath);
|
|
273
|
+
const walReleased = await probeSinglePath(dbPath + '.wal');
|
|
274
|
+
return mainReleased && walReleased;
|
|
275
|
+
};
|
|
276
|
+
const probeSinglePath = async (filePath) => {
|
|
277
|
+
for (let attempt = 1; attempt <= HANDLE_RELEASE_PROBE_ATTEMPTS; attempt++) {
|
|
278
|
+
let handle;
|
|
279
|
+
try {
|
|
280
|
+
handle = await fs.open(filePath, 'r+');
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
catch (err) {
|
|
284
|
+
const code = err?.code;
|
|
285
|
+
if (!code || !HANDLE_RELEASE_LOCK_CODES.has(code))
|
|
286
|
+
return true; // ENOENT / unrelated → not our problem
|
|
287
|
+
if (attempt === HANDLE_RELEASE_PROBE_ATTEMPTS)
|
|
288
|
+
return false;
|
|
289
|
+
await sleep(HANDLE_RELEASE_PROBE_DELAY_MS * attempt);
|
|
290
|
+
}
|
|
291
|
+
finally {
|
|
292
|
+
if (handle) {
|
|
293
|
+
try {
|
|
294
|
+
await handle.close();
|
|
295
|
+
}
|
|
296
|
+
catch {
|
|
297
|
+
/* swallow — caller cannot do anything useful with a probe-close failure */
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return false;
|
|
303
|
+
};
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
import fs from 'fs/promises';
|
|
18
18
|
import lbug from '@ladybugdb/core';
|
|
19
19
|
import { loadFTSExtension } from './lbug-adapter.js';
|
|
20
|
-
import { createLbugDatabase } from './lbug-config.js';
|
|
20
|
+
import { createLbugDatabase, isWalCorruptionError } from './lbug-config.js';
|
|
21
21
|
const pool = new Map();
|
|
22
22
|
const poolCloseListeners = new Set();
|
|
23
23
|
/**
|
|
@@ -51,7 +51,7 @@ let idleTimer = null;
|
|
|
51
51
|
// @ladybugdb/core), corrupting stdout in the pre-sentinel window. Routing
|
|
52
52
|
// through the leaf breaks that chain.
|
|
53
53
|
export { realStdoutWrite, realStderrWrite, setActiveStdoutWrite } from '../../mcp/stdio-capture.js';
|
|
54
|
-
import { getActiveStdoutWrite } from '../../mcp/stdio-capture.js';
|
|
54
|
+
import { getActiveStdoutWrite, realStderrWrite } from '../../mcp/stdio-capture.js';
|
|
55
55
|
let stdoutSilenceCount = 0;
|
|
56
56
|
/** True while pre-warming connections — prevents watchdog from prematurely restoring stdout */
|
|
57
57
|
let preWarmActive = false;
|
|
@@ -203,6 +203,44 @@ const QUERY_TIMEOUT_MS = 30_000;
|
|
|
203
203
|
const WAITER_TIMEOUT_MS = 15_000;
|
|
204
204
|
const LOCK_RETRY_ATTEMPTS = 3;
|
|
205
205
|
const LOCK_RETRY_DELAY_MS = 2000;
|
|
206
|
+
async function openReadOnlyDatabase(dbPath) {
|
|
207
|
+
let db;
|
|
208
|
+
silenceStdout();
|
|
209
|
+
try {
|
|
210
|
+
db = createLbugDatabase(lbug, dbPath, {
|
|
211
|
+
readOnly: true,
|
|
212
|
+
throwOnWalReplayFailure: false,
|
|
213
|
+
});
|
|
214
|
+
await db.init();
|
|
215
|
+
return db;
|
|
216
|
+
}
|
|
217
|
+
catch (err) {
|
|
218
|
+
if (db)
|
|
219
|
+
await db.close().catch(() => { });
|
|
220
|
+
throw err;
|
|
221
|
+
}
|
|
222
|
+
finally {
|
|
223
|
+
restoreStdout();
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Quarantine the .wal file and retry opening the database.
|
|
228
|
+
* Used when the initial open fails with a WAL corruption error.
|
|
229
|
+
*/
|
|
230
|
+
async function tryQuarantineAndReopen(dbPath, repoId) {
|
|
231
|
+
const walPath = dbPath + '.wal';
|
|
232
|
+
const quarantineName = `${walPath}.corrupt.${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
233
|
+
try {
|
|
234
|
+
await fs.rename(walPath, quarantineName);
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
|
|
238
|
+
`Run \`gitnexus analyze\` to rebuild the index. (quarantine failed)`);
|
|
239
|
+
}
|
|
240
|
+
realStderrWrite(`GitNexus: LadybugDB WAL quarantined for ${repoId}; graph may be stale. ` +
|
|
241
|
+
`Run \`gitnexus analyze\` to rebuild the index.\n`);
|
|
242
|
+
return await openReadOnlyDatabase(dbPath);
|
|
243
|
+
}
|
|
206
244
|
/** Deduplicates concurrent initLbug calls for the same repoId */
|
|
207
245
|
const initPromises = new Map();
|
|
208
246
|
/**
|
|
@@ -256,17 +294,27 @@ async function doInitLbug(repoId, dbPath) {
|
|
|
256
294
|
// avoids lock conflicts when `gitnexus analyze` is writing.
|
|
257
295
|
let lastError = null;
|
|
258
296
|
for (let attempt = 1; attempt <= LOCK_RETRY_ATTEMPTS; attempt++) {
|
|
259
|
-
silenceStdout();
|
|
260
297
|
try {
|
|
261
|
-
const db =
|
|
262
|
-
restoreStdout();
|
|
298
|
+
const db = await openReadOnlyDatabase(dbPath);
|
|
263
299
|
shared = { db, refCount: 0, ftsLoaded: false };
|
|
264
300
|
dbCache.set(dbPath, shared);
|
|
265
301
|
break;
|
|
266
302
|
}
|
|
267
303
|
catch (err) {
|
|
268
|
-
restoreStdout();
|
|
269
304
|
lastError = err instanceof Error ? err : new Error(String(err));
|
|
305
|
+
if (isWalCorruptionError(lastError)) {
|
|
306
|
+
try {
|
|
307
|
+
const db = await tryQuarantineAndReopen(dbPath, repoId);
|
|
308
|
+
shared = { db, refCount: 0, ftsLoaded: false };
|
|
309
|
+
dbCache.set(dbPath, shared);
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
catch (retryErr) {
|
|
313
|
+
throw new Error(`LadybugDB WAL corruption detected for ${repoId}. ` +
|
|
314
|
+
`Run \`gitnexus analyze\` to rebuild the index. ` +
|
|
315
|
+
`(${retryErr instanceof Error ? retryErr.message : String(retryErr)})`);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
270
318
|
const isLockError = lastError.message.includes('Could not set lock') || lastError.message.includes('lock');
|
|
271
319
|
if (!isLockError || attempt === LOCK_RETRY_ATTEMPTS)
|
|
272
320
|
break;
|
|
@@ -259,6 +259,7 @@ export declare class LocalBackend {
|
|
|
259
259
|
* UID-based direct lookup. No cluster in output.
|
|
260
260
|
*/
|
|
261
261
|
private context;
|
|
262
|
+
private _contextImpl;
|
|
262
263
|
/**
|
|
263
264
|
* Legacy explore — kept for backwards compatibility with resources.ts.
|
|
264
265
|
* Routes cluster/process types to direct graph queries.
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import fs from 'fs/promises';
|
|
9
9
|
import path from 'path';
|
|
10
10
|
import { initLbug, executeQuery, executeParameterized, closeLbug, isLbugReady, isWriteQuery, } from '../../core/lbug/pool-adapter.js';
|
|
11
|
+
import { isWalCorruptionError, WAL_RECOVERY_SUGGESTION } from '../../core/lbug/lbug-config.js';
|
|
11
12
|
export { isWriteQuery };
|
|
12
13
|
// Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
|
|
13
14
|
// at MCP server startup — crashes on unsupported Node ABI versions (#89)
|
|
@@ -1022,7 +1023,14 @@ export class LocalBackend {
|
|
|
1022
1023
|
return result;
|
|
1023
1024
|
}
|
|
1024
1025
|
catch (err) {
|
|
1025
|
-
|
|
1026
|
+
const msg = err.message || 'Query failed';
|
|
1027
|
+
if (isWalCorruptionError(err)) {
|
|
1028
|
+
return {
|
|
1029
|
+
error: msg,
|
|
1030
|
+
recoverySuggestion: WAL_RECOVERY_SUGGESTION,
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
return { error: msg };
|
|
1026
1034
|
}
|
|
1027
1035
|
}
|
|
1028
1036
|
/**
|
|
@@ -1389,6 +1397,21 @@ export class LocalBackend {
|
|
|
1389
1397
|
* UID-based direct lookup. No cluster in output.
|
|
1390
1398
|
*/
|
|
1391
1399
|
async context(repo, params) {
|
|
1400
|
+
try {
|
|
1401
|
+
return await this._contextImpl(repo, params);
|
|
1402
|
+
}
|
|
1403
|
+
catch (err) {
|
|
1404
|
+
const msg = (err instanceof Error ? err.message : String(err)) || 'Context query failed';
|
|
1405
|
+
if (isWalCorruptionError(err)) {
|
|
1406
|
+
return {
|
|
1407
|
+
error: msg,
|
|
1408
|
+
recoverySuggestion: WAL_RECOVERY_SUGGESTION,
|
|
1409
|
+
};
|
|
1410
|
+
}
|
|
1411
|
+
throw err;
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
async _contextImpl(repo, params) {
|
|
1392
1415
|
await this.ensureInitialized(repo.id);
|
|
1393
1416
|
const { name, uid, file_path, kind, include_content } = params;
|
|
1394
1417
|
if (!name && !uid) {
|
|
@@ -1990,6 +2013,7 @@ export class LocalBackend {
|
|
|
1990
2013
|
impactedCount: 0,
|
|
1991
2014
|
risk: 'UNKNOWN',
|
|
1992
2015
|
suggestion: 'The graph query failed — try gitnexus context <symbol> as a fallback',
|
|
2016
|
+
...(isWalCorruptionError(err) ? { recoverySuggestion: WAL_RECOVERY_SUGGESTION } : {}),
|
|
1993
2017
|
};
|
|
1994
2018
|
}
|
|
1995
2019
|
}
|
package/package.json
CHANGED