gitnexus 1.6.4-rc.17 → 1.6.4-rc.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/core/embeddings/embedding-pipeline.js +5 -2
- package/dist/core/group/config-parser.js +2 -0
- package/dist/core/group/matching.d.ts +3 -3
- package/dist/core/group/matching.js +46 -6
- package/dist/core/group/storage.js +2 -0
- package/dist/core/group/sync.js +1 -1
- package/dist/core/group/types.d.ts +18 -0
- package/dist/core/lbug/extension-loader.d.ts +86 -0
- package/dist/core/lbug/extension-loader.js +184 -0
- package/dist/core/lbug/lbug-adapter.d.ts +15 -13
- package/dist/core/lbug/lbug-adapter.js +32 -61
- package/dist/core/lbug/pool-adapter.js +11 -22
- package/package.json +1 -1
- package/scripts/install-duckdb-extension.mjs +37 -0
package/README.md
CHANGED
|
@@ -296,6 +296,25 @@ If `npm install -g gitnexus` fails on native modules:
|
|
|
296
296
|
npm install -g gitnexus
|
|
297
297
|
```
|
|
298
298
|
|
|
299
|
+
### Analyze warns about unavailable FTS or VECTOR extensions
|
|
300
|
+
|
|
301
|
+
GitNexus uses optional DuckDB extensions for BM25 and vector search. The `gitnexus serve` and MCP read paths only ever try to `LOAD` the extensions — they never block on a network install. The `analyze` command, by default, attempts one bounded out-of-process `INSTALL` if `LOAD` fails and proceeds even when that install times out, so the index is always written to disk; BM25/vector search degrade gracefully until the extensions become available.
|
|
302
|
+
|
|
303
|
+
Configure the behavior with two environment variables:
|
|
304
|
+
|
|
305
|
+
| Variable | Values | Default | Effect |
|
|
306
|
+
|----------|--------|---------|--------|
|
|
307
|
+
| `GITNEXUS_LBUG_EXTENSION_INSTALL` | `auto`, `load-only`, `never` | `auto` | `auto` runs one bounded INSTALL if LOAD fails. `load-only` only uses already-installed extensions (recommended for offline / firewalled environments). `never` skips optional extensions entirely. |
|
|
308
|
+
| `GITNEXUS_LBUG_EXTENSION_INSTALL_TIMEOUT_MS` | positive integer | `15000` | Wall-clock budget for the out-of-process `INSTALL` child before it is killed. |
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
# Offline/airgapped: never reach the network for extensions
|
|
312
|
+
GITNEXUS_LBUG_EXTENSION_INSTALL=load-only npx gitnexus analyze
|
|
313
|
+
|
|
314
|
+
# Slow network: give extension downloads more time
|
|
315
|
+
GITNEXUS_LBUG_EXTENSION_INSTALL_TIMEOUT_MS=30000 npx gitnexus analyze
|
|
316
|
+
```
|
|
317
|
+
|
|
299
318
|
### Analysis runs out of memory
|
|
300
319
|
|
|
301
320
|
For very large repositories:
|
|
@@ -132,8 +132,11 @@ export const batchInsertEmbeddings = async (executeWithReusedStatement, updates)
|
|
|
132
132
|
|
|
133
133
|
*/
|
|
134
134
|
const createVectorIndex = async (executeQuery) => {
|
|
135
|
-
// Delegate to the adapter which tracks loaded state and handles DB reconnect resets
|
|
136
|
-
|
|
135
|
+
// Delegate to the adapter which tracks loaded state and handles DB reconnect resets.
|
|
136
|
+
// If the optional VECTOR extension cannot be loaded, semantic search degrades gracefully.
|
|
137
|
+
if (!(await loadVectorExtension())) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
137
140
|
try {
|
|
138
141
|
await executeQuery(CREATE_VECTOR_INDEX_QUERY);
|
|
139
142
|
}
|
|
@@ -14,6 +14,8 @@ const DEFAULT_MATCHING = {
|
|
|
14
14
|
bm25_threshold: 0.7,
|
|
15
15
|
embedding_threshold: 0.65,
|
|
16
16
|
max_candidates_per_step: 3,
|
|
17
|
+
exclude_links_paths: [],
|
|
18
|
+
exclude_links_param_only_paths: false,
|
|
17
19
|
};
|
|
18
20
|
export function parseGroupConfig(yamlContent) {
|
|
19
21
|
const raw = yaml.load(yamlContent, { schema: yaml.JSON_SCHEMA });
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { StoredContract, CrossLink } from './types.js';
|
|
1
|
+
import type { StoredContract, CrossLink, MatchingConfig } from './types.js';
|
|
2
2
|
export interface MatchResult {
|
|
3
3
|
matched: CrossLink[];
|
|
4
4
|
unmatched: StoredContract[];
|
|
@@ -8,6 +8,6 @@ export interface WildcardMatchResult {
|
|
|
8
8
|
remaining: StoredContract[];
|
|
9
9
|
}
|
|
10
10
|
export declare function normalizeContractId(id: string): string;
|
|
11
|
-
export declare function buildProviderIndex(contracts: StoredContract[]): Map<string, StoredContract[]>;
|
|
12
|
-
export declare function runExactMatch(contracts: StoredContract[], providerIndex?: Map<string, StoredContract[]
|
|
11
|
+
export declare function buildProviderIndex(contracts: StoredContract[], matchingConfig?: MatchingConfig): Map<string, StoredContract[]>;
|
|
12
|
+
export declare function runExactMatch(contracts: StoredContract[], providerIndex?: Map<string, StoredContract[]>, matchingConfig?: MatchingConfig): MatchResult;
|
|
13
13
|
export declare function runWildcardMatch(unmatched: StoredContract[], providerIndex: Map<string, StoredContract[]>): WildcardMatchResult;
|
|
@@ -1,6 +1,43 @@
|
|
|
1
1
|
function isGrpcWildcard(cid) {
|
|
2
2
|
return cid.startsWith('grpc::') && cid.endsWith('/*');
|
|
3
3
|
}
|
|
4
|
+
/**
|
|
5
|
+
* Detect HTTP contracts that are too generic or infrastructure-level to
|
|
6
|
+
* produce meaningful cross-repo links. These are still extracted (useful
|
|
7
|
+
* for documentation / route maps) but excluded from cross-link matching.
|
|
8
|
+
*
|
|
9
|
+
* Two categories:
|
|
10
|
+
* 1. Health-check / readiness endpoints — every service has one, matching
|
|
11
|
+
* them produces N×M false links.
|
|
12
|
+
* 2. Param-only paths — routes like `/{param}` or `/{param}/{param}` that
|
|
13
|
+
* collapse to a single catch-all after normalization. These match any
|
|
14
|
+
* service with a similar shape, producing false positives.
|
|
15
|
+
*
|
|
16
|
+
* Both are configurable via matching.exclude_links_paths and
|
|
17
|
+
* matching.exclude_links_param_only_paths in group.yaml.
|
|
18
|
+
*/
|
|
19
|
+
function buildNoisyContractFilter(matchingConfig) {
|
|
20
|
+
const excludePaths = matchingConfig?.exclude_links_paths?.length
|
|
21
|
+
? new Set(matchingConfig.exclude_links_paths.map((p) => p.replace(/\/+$/, '')))
|
|
22
|
+
: new Set();
|
|
23
|
+
const excludeParamOnly = matchingConfig?.exclude_links_param_only_paths === true;
|
|
24
|
+
return function isNoisyHttpContract(contractId) {
|
|
25
|
+
if (!contractId.startsWith('http::'))
|
|
26
|
+
return false;
|
|
27
|
+
const parts = contractId.split('::');
|
|
28
|
+
if (parts.length < 3)
|
|
29
|
+
return false;
|
|
30
|
+
const pathPart = parts.slice(2).join('::').replace(/\/+$/, '');
|
|
31
|
+
if (excludePaths.has(pathPart))
|
|
32
|
+
return true;
|
|
33
|
+
if (excludeParamOnly) {
|
|
34
|
+
const segments = pathPart.split('/').filter(Boolean);
|
|
35
|
+
if (segments.length > 0 && segments.every((s) => s === '{param}'))
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
return false;
|
|
39
|
+
};
|
|
40
|
+
}
|
|
4
41
|
export function normalizeContractId(id) {
|
|
5
42
|
const colonIdx = id.indexOf('::');
|
|
6
43
|
if (colonIdx === -1)
|
|
@@ -74,8 +111,9 @@ function findMatchingKeys(contractId, index) {
|
|
|
74
111
|
}
|
|
75
112
|
return [];
|
|
76
113
|
}
|
|
77
|
-
export function buildProviderIndex(contracts) {
|
|
78
|
-
const
|
|
114
|
+
export function buildProviderIndex(contracts, matchingConfig) {
|
|
115
|
+
const isNoisy = buildNoisyContractFilter(matchingConfig);
|
|
116
|
+
const providers = contracts.filter((c) => c.role === 'provider' && !isNoisy(c.contractId));
|
|
79
117
|
const index = new Map();
|
|
80
118
|
for (const p of providers) {
|
|
81
119
|
const key = normalizeContractId(p.contractId);
|
|
@@ -85,10 +123,10 @@ export function buildProviderIndex(contracts) {
|
|
|
85
123
|
}
|
|
86
124
|
return index;
|
|
87
125
|
}
|
|
88
|
-
export function runExactMatch(contracts, providerIndex) {
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
const consumers = contracts.filter((c) => c.role === 'consumer' && !isGrpcWildcard(c.contractId));
|
|
126
|
+
export function runExactMatch(contracts, providerIndex, matchingConfig) {
|
|
127
|
+
const isNoisy = buildNoisyContractFilter(matchingConfig);
|
|
128
|
+
const index = providerIndex ?? buildProviderIndex(contracts, matchingConfig);
|
|
129
|
+
const consumers = contracts.filter((c) => c.role === 'consumer' && !isGrpcWildcard(c.contractId) && !isNoisy(c.contractId));
|
|
92
130
|
const matched = [];
|
|
93
131
|
const matchedConsumerIds = new Set();
|
|
94
132
|
const matchedProviderIds = new Set();
|
|
@@ -129,6 +167,8 @@ export function runExactMatch(contracts, providerIndex) {
|
|
|
129
167
|
const normalUnmatched = contracts.filter((c) => {
|
|
130
168
|
if (isGrpcWildcard(c.contractId))
|
|
131
169
|
return false; // excluded from exact, handled separately
|
|
170
|
+
if (isNoisy(c.contractId))
|
|
171
|
+
return false; // excluded from matching — don't surface as unmatched
|
|
132
172
|
const id = `${c.repo}::${c.contractId}`;
|
|
133
173
|
return c.role === 'provider' ? !matchedProviderIds.has(id) : !matchedConsumerIds.has(id);
|
|
134
174
|
});
|
|
@@ -85,6 +85,8 @@ matching:
|
|
|
85
85
|
bm25_threshold: 0.7
|
|
86
86
|
embedding_threshold: 0.65
|
|
87
87
|
max_candidates_per_step: 3
|
|
88
|
+
# exclude_links_paths: [/ping, /health, /healthcheck]
|
|
89
|
+
# exclude_links_param_only_paths: false
|
|
88
90
|
`;
|
|
89
91
|
await fsp.writeFile(path.join(groupDir, 'group.yaml'), template, 'utf-8');
|
|
90
92
|
return groupDir;
|
package/dist/core/group/sync.js
CHANGED
|
@@ -168,7 +168,7 @@ export async function syncGroup(config, opts) {
|
|
|
168
168
|
console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`);
|
|
169
169
|
}
|
|
170
170
|
}
|
|
171
|
-
const { matched, unmatched } = runExactMatch(autoContracts);
|
|
171
|
+
const { matched, unmatched } = runExactMatch(autoContracts, undefined, config.matching);
|
|
172
172
|
// Dedupe cross-links. Manifest contracts participate in runExactMatch, so a
|
|
173
173
|
// manifest-declared link can also emit a matchType:'exact' CrossLink with the
|
|
174
174
|
// same endpoints. Prefer the manifest version — it reflects operator intent
|
|
@@ -29,6 +29,24 @@ export interface MatchingConfig {
|
|
|
29
29
|
bm25_threshold: number;
|
|
30
30
|
embedding_threshold: number;
|
|
31
31
|
max_candidates_per_step: number;
|
|
32
|
+
/**
|
|
33
|
+
* HTTP paths to exclude from cross-link matching. Contracts at these paths
|
|
34
|
+
* are still extracted and visible in the registry, but they don't produce
|
|
35
|
+
* cross-repo links. Useful for health-check endpoints (`/ping`, `/health`)
|
|
36
|
+
* that every service exposes and would otherwise create N×M false links.
|
|
37
|
+
* Trailing slashes are normalized before comparison.
|
|
38
|
+
* @default []
|
|
39
|
+
*/
|
|
40
|
+
exclude_links_paths?: string[];
|
|
41
|
+
/**
|
|
42
|
+
* When `true`, exclude HTTP routes where every path segment is `{param}`
|
|
43
|
+
* (e.g. `/{param}`, `/{param}/{param}`) from cross-link matching. Mixed
|
|
44
|
+
* routes like `/users/{param}` are not affected. These param-only routes
|
|
45
|
+
* collapse to a single catch-all after normalization and produce false
|
|
46
|
+
* positives across unrelated services.
|
|
47
|
+
* @default false
|
|
48
|
+
*/
|
|
49
|
+
exclude_links_param_only_paths?: boolean;
|
|
32
50
|
}
|
|
33
51
|
export interface SymbolRef {
|
|
34
52
|
filePath: string;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lifecycle policy for an optional DuckDB extension.
|
|
3
|
+
*
|
|
4
|
+
* - `auto` — try `LOAD`, fall back to one bounded out-of-process `INSTALL`
|
|
5
|
+
* attempt per process if `LOAD` fails. Default for analyze.
|
|
6
|
+
* - `load-only`— try `LOAD` only; never spawn an installer. Used by serve/MCP
|
|
7
|
+
* read paths so user queries never block on a network install.
|
|
8
|
+
* - `never` — skip the extension entirely. Operators can use this to
|
|
9
|
+
* forcibly disable optional search features.
|
|
10
|
+
*/
|
|
11
|
+
export type ExtensionInstallPolicy = 'auto' | 'load-only' | 'never';
|
|
12
|
+
export interface ExtensionInstallResult {
|
|
13
|
+
success: boolean;
|
|
14
|
+
timedOut: boolean;
|
|
15
|
+
message: string;
|
|
16
|
+
}
|
|
17
|
+
/** Snapshot of one optional extension's resolved capability state. */
|
|
18
|
+
export interface ExtensionCapability {
|
|
19
|
+
name: string;
|
|
20
|
+
loaded: boolean;
|
|
21
|
+
/** Human-readable reason when `loaded` is false. */
|
|
22
|
+
reason?: string;
|
|
23
|
+
}
|
|
24
|
+
/** Per-call overrides applied on top of `ExtensionManager` defaults. */
|
|
25
|
+
export interface ExtensionEnsureOptions {
|
|
26
|
+
policy?: ExtensionInstallPolicy;
|
|
27
|
+
installTimeoutMs?: number;
|
|
28
|
+
}
|
|
29
|
+
export interface ExtensionManagerOptions {
|
|
30
|
+
policy?: ExtensionInstallPolicy;
|
|
31
|
+
installTimeoutMs?: number;
|
|
32
|
+
installExtension?: (extensionName: string, timeoutMs: number) => Promise<ExtensionInstallResult>;
|
|
33
|
+
warn?: (message: string) => void;
|
|
34
|
+
}
|
|
35
|
+
export declare const getExtensionInstallTimeoutMs: () => number;
|
|
36
|
+
export declare const getExtensionInstallChildProcessArgs: (extensionName: string) => string[];
|
|
37
|
+
/**
|
|
38
|
+
* Run `INSTALL <extension>` in a short-lived child Node process so the parent
|
|
39
|
+
* event loop is never blocked by DuckDB's synchronous network call.
|
|
40
|
+
*
|
|
41
|
+
* The child opens its own scratch LadybugDB, executes the install, and exits.
|
|
42
|
+
* If the child exceeds `timeoutMs` the parent kills it with SIGKILL and
|
|
43
|
+
* resolves with `timedOut: true`.
|
|
44
|
+
*/
|
|
45
|
+
export declare const installDuckDbExtensionOutOfProcess: (extensionName: string, timeoutMs?: number) => Promise<ExtensionInstallResult>;
|
|
46
|
+
/**
|
|
47
|
+
* Centralized lifecycle manager for optional LadybugDB extensions.
|
|
48
|
+
*
|
|
49
|
+
* Always tries `LOAD EXTENSION <name>` first — it is per-connection,
|
|
50
|
+
* idempotent, and never touches the network. If `LOAD` fails and the active
|
|
51
|
+
* policy permits, the manager runs a single bounded out-of-process `INSTALL`
|
|
52
|
+
* attempt per process and retries `LOAD`. Capability outcomes are cached so
|
|
53
|
+
* unavailable extensions degrade search features without ever blocking
|
|
54
|
+
* subsequent analyze or query calls.
|
|
55
|
+
*
|
|
56
|
+
* Policy precedence (most specific wins):
|
|
57
|
+
* per-call `opts.policy` → constructor `options.policy` → env → `auto`
|
|
58
|
+
*/
|
|
59
|
+
export declare class ExtensionManager {
|
|
60
|
+
private readonly options;
|
|
61
|
+
private readonly capabilities;
|
|
62
|
+
private readonly installAttempted;
|
|
63
|
+
private readonly warnedKeys;
|
|
64
|
+
constructor(options?: ExtensionManagerOptions);
|
|
65
|
+
/** Reset cached capability and install state. Test-only. */
|
|
66
|
+
reset(): void;
|
|
67
|
+
/** Snapshot of currently-known optional extension capabilities. */
|
|
68
|
+
getCapabilities(): ExtensionCapability[];
|
|
69
|
+
/**
|
|
70
|
+
* Ensure an optional extension is loaded on the supplied connection.
|
|
71
|
+
*
|
|
72
|
+
* Returns `true` when the extension is usable on `query`, `false` when it
|
|
73
|
+
* is unavailable. Never throws on install failure — analyze and query
|
|
74
|
+
* paths are expected to degrade gracefully.
|
|
75
|
+
*/
|
|
76
|
+
ensure(query: (sql: string) => Promise<unknown>, name: string, label: string, opts?: ExtensionEnsureOptions): Promise<boolean>;
|
|
77
|
+
private tryLoad;
|
|
78
|
+
private markLoaded;
|
|
79
|
+
private markUnavailable;
|
|
80
|
+
}
|
|
81
|
+
/** Process-wide singleton shared by core and pool adapters. */
|
|
82
|
+
export declare const extensionManager: ExtensionManager;
|
|
83
|
+
/** Snapshot of which optional DuckDB extensions are loaded in this process. */
|
|
84
|
+
export declare const getExtensionCapabilities: () => ExtensionCapability[];
|
|
85
|
+
/** Test-only: clear the singleton's cached capability and install state. */
|
|
86
|
+
export declare const resetExtensionState: () => void;
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import { fileURLToPath } from 'node:url';
|
|
3
|
+
const DEFAULT_EXTENSION_INSTALL_TIMEOUT_MS = 15_000;
|
|
4
|
+
const EXTENSION_NAME_PATTERN = /^[A-Za-z][A-Za-z0-9_]*$/;
|
|
5
|
+
const alreadyAvailable = (message) => message.includes('already loaded') ||
|
|
6
|
+
message.includes('already installed') ||
|
|
7
|
+
message.includes('already exists');
|
|
8
|
+
const resolvePolicyFromEnv = () => {
|
|
9
|
+
const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
|
|
10
|
+
if (raw === 'load-only' || raw === 'never' || raw === 'auto')
|
|
11
|
+
return raw;
|
|
12
|
+
return 'auto';
|
|
13
|
+
};
|
|
14
|
+
export const getExtensionInstallTimeoutMs = () => {
|
|
15
|
+
const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL_TIMEOUT_MS;
|
|
16
|
+
const parsed = raw ? Number(raw) : NaN;
|
|
17
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_EXTENSION_INSTALL_TIMEOUT_MS;
|
|
18
|
+
};
|
|
19
|
+
export const getExtensionInstallChildProcessArgs = (extensionName) => {
|
|
20
|
+
const childScript = new URL('../../../scripts/install-duckdb-extension.mjs', import.meta.url);
|
|
21
|
+
return [fileURLToPath(childScript), extensionName];
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Run `INSTALL <extension>` in a short-lived child Node process so the parent
|
|
25
|
+
* event loop is never blocked by DuckDB's synchronous network call.
|
|
26
|
+
*
|
|
27
|
+
* The child opens its own scratch LadybugDB, executes the install, and exits.
|
|
28
|
+
* If the child exceeds `timeoutMs` the parent kills it with SIGKILL and
|
|
29
|
+
* resolves with `timedOut: true`.
|
|
30
|
+
*/
|
|
31
|
+
export const installDuckDbExtensionOutOfProcess = async (extensionName, timeoutMs = getExtensionInstallTimeoutMs()) => {
|
|
32
|
+
if (!EXTENSION_NAME_PATTERN.test(extensionName)) {
|
|
33
|
+
throw new Error(`Invalid DuckDB extension name: ${extensionName}`);
|
|
34
|
+
}
|
|
35
|
+
return await new Promise((resolve) => {
|
|
36
|
+
const child = spawn(process.execPath, getExtensionInstallChildProcessArgs(extensionName), {
|
|
37
|
+
env: {
|
|
38
|
+
...process.env,
|
|
39
|
+
GITNEXUS_LBUG_EXTENSION_NAME: extensionName,
|
|
40
|
+
},
|
|
41
|
+
stdio: ['ignore', 'ignore', 'pipe'],
|
|
42
|
+
windowsHide: true,
|
|
43
|
+
});
|
|
44
|
+
let stderr = '';
|
|
45
|
+
child.stderr?.setEncoding('utf8');
|
|
46
|
+
child.stderr?.on('data', (chunk) => {
|
|
47
|
+
stderr = (stderr + chunk).slice(-4000);
|
|
48
|
+
});
|
|
49
|
+
let settled = false;
|
|
50
|
+
const timer = setTimeout(() => {
|
|
51
|
+
if (settled)
|
|
52
|
+
return;
|
|
53
|
+
settled = true;
|
|
54
|
+
child.kill('SIGKILL');
|
|
55
|
+
resolve({
|
|
56
|
+
success: false,
|
|
57
|
+
timedOut: true,
|
|
58
|
+
message: `INSTALL ${extensionName} timed out after ${timeoutMs}ms`,
|
|
59
|
+
});
|
|
60
|
+
}, timeoutMs);
|
|
61
|
+
child.on('error', (err) => {
|
|
62
|
+
if (settled)
|
|
63
|
+
return;
|
|
64
|
+
settled = true;
|
|
65
|
+
clearTimeout(timer);
|
|
66
|
+
resolve({ success: false, timedOut: false, message: err.message });
|
|
67
|
+
});
|
|
68
|
+
child.on('exit', (code, signal) => {
|
|
69
|
+
if (settled)
|
|
70
|
+
return;
|
|
71
|
+
settled = true;
|
|
72
|
+
clearTimeout(timer);
|
|
73
|
+
resolve({
|
|
74
|
+
success: code === 0,
|
|
75
|
+
timedOut: false,
|
|
76
|
+
message: code === 0
|
|
77
|
+
? `INSTALL ${extensionName} completed`
|
|
78
|
+
: `INSTALL ${extensionName} failed with ${signal ?? `exit code ${code}`}${stderr ? `: ${stderr.trim()}` : ''}`,
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Centralized lifecycle manager for optional LadybugDB extensions.
|
|
85
|
+
*
|
|
86
|
+
* Always tries `LOAD EXTENSION <name>` first — it is per-connection,
|
|
87
|
+
* idempotent, and never touches the network. If `LOAD` fails and the active
|
|
88
|
+
* policy permits, the manager runs a single bounded out-of-process `INSTALL`
|
|
89
|
+
* attempt per process and retries `LOAD`. Capability outcomes are cached so
|
|
90
|
+
* unavailable extensions degrade search features without ever blocking
|
|
91
|
+
* subsequent analyze or query calls.
|
|
92
|
+
*
|
|
93
|
+
* Policy precedence (most specific wins):
|
|
94
|
+
* per-call `opts.policy` → constructor `options.policy` → env → `auto`
|
|
95
|
+
*/
|
|
96
|
+
export class ExtensionManager {
|
|
97
|
+
options;
|
|
98
|
+
capabilities = new Map();
|
|
99
|
+
installAttempted = new Map();
|
|
100
|
+
warnedKeys = new Set();
|
|
101
|
+
constructor(options = {}) {
|
|
102
|
+
this.options = options;
|
|
103
|
+
}
|
|
104
|
+
/** Reset cached capability and install state. Test-only. */
|
|
105
|
+
reset() {
|
|
106
|
+
this.capabilities.clear();
|
|
107
|
+
this.installAttempted.clear();
|
|
108
|
+
this.warnedKeys.clear();
|
|
109
|
+
}
|
|
110
|
+
/** Snapshot of currently-known optional extension capabilities. */
|
|
111
|
+
getCapabilities() {
|
|
112
|
+
return Array.from(this.capabilities.values());
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Ensure an optional extension is loaded on the supplied connection.
|
|
116
|
+
*
|
|
117
|
+
* Returns `true` when the extension is usable on `query`, `false` when it
|
|
118
|
+
* is unavailable. Never throws on install failure — analyze and query
|
|
119
|
+
* paths are expected to degrade gracefully.
|
|
120
|
+
*/
|
|
121
|
+
async ensure(query, name, label, opts = {}) {
|
|
122
|
+
if (!EXTENSION_NAME_PATTERN.test(name)) {
|
|
123
|
+
throw new Error(`Invalid DuckDB extension name: ${name}`);
|
|
124
|
+
}
|
|
125
|
+
const policy = opts.policy ?? this.options.policy ?? resolvePolicyFromEnv();
|
|
126
|
+
const timeoutMs = opts.installTimeoutMs ?? this.options.installTimeoutMs ?? getExtensionInstallTimeoutMs();
|
|
127
|
+
const warn = this.options.warn ?? console.warn;
|
|
128
|
+
if (policy === 'never') {
|
|
129
|
+
this.markUnavailable(name, label, 'extension install policy is "never"', warn);
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (await this.tryLoad(query, name)) {
|
|
133
|
+
this.markLoaded(name);
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
if (policy === 'load-only') {
|
|
137
|
+
this.markUnavailable(name, label, 'load-only policy: extension not pre-installed', warn);
|
|
138
|
+
return false;
|
|
139
|
+
}
|
|
140
|
+
let install = this.installAttempted.get(name);
|
|
141
|
+
if (!install) {
|
|
142
|
+
const installFn = this.options.installExtension ?? installDuckDbExtensionOutOfProcess;
|
|
143
|
+
install = await installFn(name, timeoutMs);
|
|
144
|
+
this.installAttempted.set(name, install);
|
|
145
|
+
}
|
|
146
|
+
if (!install.success) {
|
|
147
|
+
this.markUnavailable(name, label, install.message, warn);
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
if (await this.tryLoad(query, name)) {
|
|
151
|
+
this.markLoaded(name);
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
this.markUnavailable(name, label, `LOAD ${name} failed after successful INSTALL`, warn);
|
|
155
|
+
return false;
|
|
156
|
+
}
|
|
157
|
+
async tryLoad(query, name) {
|
|
158
|
+
try {
|
|
159
|
+
await query(`LOAD EXTENSION ${name}`);
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
164
|
+
return alreadyAvailable(msg);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
markLoaded(name) {
|
|
168
|
+
this.capabilities.set(name, { name, loaded: true });
|
|
169
|
+
}
|
|
170
|
+
markUnavailable(name, label, reason, warn) {
|
|
171
|
+
this.capabilities.set(name, { name, loaded: false, reason });
|
|
172
|
+
const key = `${name}:${reason}`;
|
|
173
|
+
if (this.warnedKeys.has(key))
|
|
174
|
+
return;
|
|
175
|
+
this.warnedKeys.add(key);
|
|
176
|
+
warn(`GitNexus: ${label} extension unavailable; continuing without ${label} features. ${reason}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
/** Process-wide singleton shared by core and pool adapters. */
|
|
180
|
+
export const extensionManager = new ExtensionManager();
|
|
181
|
+
/** Snapshot of which optional DuckDB extensions are loaded in this process. */
|
|
182
|
+
export const getExtensionCapabilities = () => extensionManager.getCapabilities();
|
|
183
|
+
/** Test-only: clear the singleton's cached capability and install state. */
|
|
184
|
+
export const resetExtensionState = () => extensionManager.reset();
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import lbug from '@ladybugdb/core';
|
|
2
2
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
3
3
|
import type { CachedEmbedding } from '../embeddings/types.js';
|
|
4
|
+
import { type ExtensionEnsureOptions } from './extension-loader.js';
|
|
4
5
|
/** Factory for creating WriteStreams — injectable for testing. */
|
|
5
6
|
export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
|
|
6
7
|
/** Result of splitting the relationship CSV into per-label-pair files. */
|
|
@@ -122,23 +123,24 @@ export declare const deleteNodesForFile: (filePath: string, dbPath?: string) =>
|
|
|
122
123
|
}>;
|
|
123
124
|
export declare const getEmbeddingTableName: () => string;
|
|
124
125
|
/**
|
|
125
|
-
* Load the FTS extension
|
|
126
|
+
* Load the FTS extension on the supplied connection (or the singleton
|
|
127
|
+
* writable connection when none is given).
|
|
126
128
|
*
|
|
127
|
-
*
|
|
128
|
-
*
|
|
129
|
-
*
|
|
130
|
-
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* hasn't been cached yet.
|
|
129
|
+
* Delegates to the shared `ExtensionManager` so install policy (auto /
|
|
130
|
+
* load-only / never), out-of-process bounded INSTALL, and capability
|
|
131
|
+
* caching are owned in one place. The module-level `ftsLoaded` flag is
|
|
132
|
+
* kept purely as a per-call short-circuit on the singleton writable
|
|
133
|
+
* connection so repeated callers (e.g. createFTSIndex) avoid an extra
|
|
134
|
+
* `LOAD` round-trip per invocation. Pool adapter callers pass
|
|
135
|
+
* `{ policy: 'load-only' }` so query paths never block on a network install.
|
|
135
136
|
*/
|
|
136
|
-
export declare const loadFTSExtension: (targetConn?: lbug.Connection) => Promise<boolean>;
|
|
137
|
+
export declare const loadFTSExtension: (targetConn?: lbug.Connection, opts?: ExtensionEnsureOptions) => Promise<boolean>;
|
|
137
138
|
/**
|
|
138
|
-
* Load the VECTOR extension (
|
|
139
|
-
*
|
|
139
|
+
* Load the VECTOR extension on the supplied connection (or the singleton
|
|
140
|
+
* writable connection when none is given). See `loadFTSExtension` for the
|
|
141
|
+
* policy / capability contract — the same `ExtensionManager` owns both.
|
|
140
142
|
*/
|
|
141
|
-
export declare const loadVectorExtension: () => Promise<
|
|
143
|
+
export declare const loadVectorExtension: (targetConn?: lbug.Connection, opts?: ExtensionEnsureOptions) => Promise<boolean>;
|
|
142
144
|
/**
|
|
143
145
|
* Create a full-text search index on a table
|
|
144
146
|
* @param tableName - The node table name (e.g., 'File', 'CodeSymbol')
|
|
@@ -7,6 +7,7 @@ import path from 'path';
|
|
|
7
7
|
import lbug from '@ladybugdb/core';
|
|
8
8
|
import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
|
|
9
9
|
import { streamAllCSVsToDisk } from './csv-generator.js';
|
|
10
|
+
import { extensionManager } from './extension-loader.js';
|
|
10
11
|
/**
|
|
11
12
|
* Split a relationship CSV into per-label-pair files on disk.
|
|
12
13
|
*
|
|
@@ -287,7 +288,8 @@ const doInitLbug = async (dbPath) => {
|
|
|
287
288
|
}
|
|
288
289
|
}
|
|
289
290
|
}
|
|
290
|
-
// Load query extensions once per core adapter session.
|
|
291
|
+
// Load query extensions once per core adapter session. Missing optional
|
|
292
|
+
// extensions degrade search features but must not block analyze completion.
|
|
291
293
|
await loadFTSExtension();
|
|
292
294
|
await loadVectorExtension();
|
|
293
295
|
currentDbPath = dbPath;
|
|
@@ -1024,18 +1026,18 @@ export const getEmbeddingTableName = () => EMBEDDING_TABLE_NAME;
|
|
|
1024
1026
|
// Full-Text Search (FTS) Functions
|
|
1025
1027
|
// ============================================================================
|
|
1026
1028
|
/**
|
|
1027
|
-
* Load the FTS extension
|
|
1029
|
+
* Load the FTS extension on the supplied connection (or the singleton
|
|
1030
|
+
* writable connection when none is given).
|
|
1028
1031
|
*
|
|
1029
|
-
*
|
|
1030
|
-
*
|
|
1031
|
-
*
|
|
1032
|
-
*
|
|
1033
|
-
*
|
|
1034
|
-
*
|
|
1035
|
-
*
|
|
1036
|
-
* hasn't been cached yet.
|
|
1032
|
+
* Delegates to the shared `ExtensionManager` so install policy (auto /
|
|
1033
|
+
* load-only / never), out-of-process bounded INSTALL, and capability
|
|
1034
|
+
* caching are owned in one place. The module-level `ftsLoaded` flag is
|
|
1035
|
+
* kept purely as a per-call short-circuit on the singleton writable
|
|
1036
|
+
* connection so repeated callers (e.g. createFTSIndex) avoid an extra
|
|
1037
|
+
* `LOAD` round-trip per invocation. Pool adapter callers pass
|
|
1038
|
+
* `{ policy: 'load-only' }` so query paths never block on a network install.
|
|
1037
1039
|
*/
|
|
1038
|
-
export const loadFTSExtension = async (targetConn) => {
|
|
1040
|
+
export const loadFTSExtension = async (targetConn, opts = {}) => {
|
|
1039
1041
|
const useModuleState = targetConn === undefined;
|
|
1040
1042
|
if (useModuleState && ftsLoaded)
|
|
1041
1043
|
return true;
|
|
@@ -1043,61 +1045,28 @@ export const loadFTSExtension = async (targetConn) => {
|
|
|
1043
1045
|
if (!c) {
|
|
1044
1046
|
throw new Error('LadybugDB not initialized. Call initLbug first.');
|
|
1045
1047
|
}
|
|
1046
|
-
const
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
};
|
|
1051
|
-
try {
|
|
1052
|
-
// Try loading locally first (no network required)
|
|
1053
|
-
await c.query('LOAD EXTENSION fts');
|
|
1054
|
-
return markLoaded();
|
|
1055
|
-
}
|
|
1056
|
-
catch {
|
|
1057
|
-
// Fall back to install + load (requires network)
|
|
1058
|
-
try {
|
|
1059
|
-
await c.query('INSTALL fts');
|
|
1060
|
-
await c.query('LOAD EXTENSION fts');
|
|
1061
|
-
return markLoaded();
|
|
1062
|
-
}
|
|
1063
|
-
catch (err) {
|
|
1064
|
-
const msg = err?.message || '';
|
|
1065
|
-
if (msg.includes('already loaded') ||
|
|
1066
|
-
msg.includes('already installed') ||
|
|
1067
|
-
msg.includes('already exists')) {
|
|
1068
|
-
return markLoaded();
|
|
1069
|
-
}
|
|
1070
|
-
console.error('GitNexus: FTS extension load failed:', msg);
|
|
1071
|
-
return false;
|
|
1072
|
-
}
|
|
1073
|
-
}
|
|
1048
|
+
const loaded = await extensionManager.ensure((sql) => c.query(sql), 'fts', 'FTS', opts);
|
|
1049
|
+
if (loaded && useModuleState)
|
|
1050
|
+
ftsLoaded = true;
|
|
1051
|
+
return loaded;
|
|
1074
1052
|
};
|
|
1075
1053
|
/**
|
|
1076
|
-
* Load the VECTOR extension (
|
|
1077
|
-
*
|
|
1054
|
+
* Load the VECTOR extension on the supplied connection (or the singleton
|
|
1055
|
+
* writable connection when none is given). See `loadFTSExtension` for the
|
|
1056
|
+
* policy / capability contract — the same `ExtensionManager` owns both.
|
|
1078
1057
|
*/
|
|
1079
|
-
export const loadVectorExtension = async () => {
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1058
|
+
export const loadVectorExtension = async (targetConn, opts = {}) => {
|
|
1059
|
+
const useModuleState = targetConn === undefined;
|
|
1060
|
+
if (useModuleState && vectorExtensionLoaded)
|
|
1061
|
+
return true;
|
|
1062
|
+
const c = targetConn ?? conn;
|
|
1063
|
+
if (!c) {
|
|
1083
1064
|
throw new Error('LadybugDB not initialized. Call initLbug first.');
|
|
1084
1065
|
}
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
await conn.query('LOAD EXTENSION VECTOR');
|
|
1066
|
+
const loaded = await extensionManager.ensure((sql) => c.query(sql), 'VECTOR', 'VECTOR', opts);
|
|
1067
|
+
if (loaded && useModuleState)
|
|
1088
1068
|
vectorExtensionLoaded = true;
|
|
1089
|
-
|
|
1090
|
-
catch (err) {
|
|
1091
|
-
const msg = err?.message || '';
|
|
1092
|
-
if (msg.includes('already loaded') ||
|
|
1093
|
-
msg.includes('already installed') ||
|
|
1094
|
-
msg.includes('already exists')) {
|
|
1095
|
-
vectorExtensionLoaded = true;
|
|
1096
|
-
}
|
|
1097
|
-
else {
|
|
1098
|
-
console.error('GitNexus: VECTOR extension load failed:', msg);
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1069
|
+
return loaded;
|
|
1101
1070
|
};
|
|
1102
1071
|
/**
|
|
1103
1072
|
* Create a full-text search index on a table
|
|
@@ -1110,7 +1079,9 @@ export const createFTSIndex = async (tableName, indexName, properties, stemmer =
|
|
|
1110
1079
|
if (!conn) {
|
|
1111
1080
|
throw new Error('LadybugDB not initialized. Call initLbug first.');
|
|
1112
1081
|
}
|
|
1113
|
-
await loadFTSExtension()
|
|
1082
|
+
if (!(await loadFTSExtension())) {
|
|
1083
|
+
return;
|
|
1084
|
+
}
|
|
1114
1085
|
const propList = properties.map((p) => `'${p}'`).join(', ');
|
|
1115
1086
|
const query = `CALL CREATE_FTS_INDEX('${tableName}', '${indexName}', [${propList}], stemmer := '${stemmer}')`;
|
|
1116
1087
|
try {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
*/
|
|
17
17
|
import fs from 'fs/promises';
|
|
18
18
|
import lbug from '@ladybugdb/core';
|
|
19
|
-
import { loadFTSExtension } from './lbug-adapter.js';
|
|
19
|
+
import { loadFTSExtension, loadVectorExtension } from './lbug-adapter.js';
|
|
20
20
|
const pool = new Map();
|
|
21
21
|
const poolCloseListeners = new Set();
|
|
22
22
|
/**
|
|
@@ -284,19 +284,14 @@ async function doInitLbug(repoId, dbPath) {
|
|
|
284
284
|
// Load FTS extension once per shared Database.
|
|
285
285
|
// Done BEFORE pool registration so no concurrent checkout can grab
|
|
286
286
|
// the connection while the async FTS load is in progress.
|
|
287
|
+
// policy: 'load-only' — the read pool must never trigger a network
|
|
288
|
+
// install; analyze owns extension installation. If LOAD fails, search
|
|
289
|
+
// features degrade gracefully and the user-facing query path proceeds.
|
|
287
290
|
if (!shared.ftsLoaded) {
|
|
288
|
-
shared.ftsLoaded = await loadFTSExtension(available[0]);
|
|
291
|
+
shared.ftsLoaded = await loadFTSExtension(available[0], { policy: 'load-only' });
|
|
289
292
|
}
|
|
290
|
-
// Load VECTOR extension once per shared Database for semantic search support.
|
|
291
293
|
if (!shared.vectorLoaded) {
|
|
292
|
-
|
|
293
|
-
await available[0].query('INSTALL VECTOR');
|
|
294
|
-
await available[0].query('LOAD EXTENSION VECTOR');
|
|
295
|
-
shared.vectorLoaded = true;
|
|
296
|
-
}
|
|
297
|
-
catch {
|
|
298
|
-
// VECTOR extension may not be available
|
|
299
|
-
}
|
|
294
|
+
shared.vectorLoaded = await loadVectorExtension(available[0], { policy: 'load-only' });
|
|
300
295
|
}
|
|
301
296
|
// Register pool entry only after all connections are pre-warmed and FTS is
|
|
302
297
|
// loaded. Concurrent executeQuery calls see either "not initialized"
|
|
@@ -349,20 +344,14 @@ export async function initLbugWithDb(repoId, existingDb, dbPath) {
|
|
|
349
344
|
finally {
|
|
350
345
|
preWarmActive = false;
|
|
351
346
|
}
|
|
352
|
-
// Load FTS extension if not already loaded on this Database
|
|
347
|
+
// Load FTS extension if not already loaded on this Database.
|
|
348
|
+
// policy: 'load-only' — same contract as initLbug above; the read pool
|
|
349
|
+
// must not block on a network install during query execution.
|
|
353
350
|
if (!shared.ftsLoaded) {
|
|
354
|
-
shared.ftsLoaded = await loadFTSExtension(available[0]);
|
|
351
|
+
shared.ftsLoaded = await loadFTSExtension(available[0], { policy: 'load-only' });
|
|
355
352
|
}
|
|
356
|
-
// Load VECTOR extension for semantic search support
|
|
357
353
|
if (!shared.vectorLoaded) {
|
|
358
|
-
|
|
359
|
-
await available[0].query('INSTALL VECTOR');
|
|
360
|
-
await available[0].query('LOAD EXTENSION VECTOR');
|
|
361
|
-
shared.vectorLoaded = true;
|
|
362
|
-
}
|
|
363
|
-
catch {
|
|
364
|
-
// VECTOR extension may not be available
|
|
365
|
-
}
|
|
354
|
+
shared.vectorLoaded = await loadVectorExtension(available[0], { policy: 'load-only' });
|
|
366
355
|
}
|
|
367
356
|
pool.set(repoId, {
|
|
368
357
|
db: existingDb,
|
package/package.json
CHANGED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { createRequire } from 'node:module';
|
|
6
|
+
|
|
7
|
+
const EXTENSION_NAME_PATTERN = /^[A-Za-z][A-Za-z0-9_]*$/;
|
|
8
|
+
|
|
9
|
+
async function installDuckDbExtension(extensionName) {
|
|
10
|
+
if (!extensionName || !EXTENSION_NAME_PATTERN.test(extensionName)) {
|
|
11
|
+
throw new Error(`Invalid DuckDB extension name: ${extensionName ?? '<missing>'}`);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const require = createRequire(import.meta.url);
|
|
15
|
+
const lbugModule = require('@ladybugdb/core');
|
|
16
|
+
const lbug = lbugModule.default ?? lbugModule;
|
|
17
|
+
|
|
18
|
+
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'gitnexus-ext-install-'));
|
|
19
|
+
const dbPath = path.join(tmpDir, 'install.lbug');
|
|
20
|
+
let db;
|
|
21
|
+
let conn;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
db = new lbug.Database(dbPath);
|
|
25
|
+
conn = new lbug.Connection(db);
|
|
26
|
+
await conn.query(`INSTALL ${extensionName}`);
|
|
27
|
+
} finally {
|
|
28
|
+
if (conn) await conn.close().catch(() => {});
|
|
29
|
+
if (db) await db.close().catch(() => {});
|
|
30
|
+
await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
installDuckDbExtension(process.argv[2] ?? process.env.GITNEXUS_LBUG_EXTENSION_NAME).catch((err) => {
|
|
35
|
+
console.error(err instanceof Error ? (err.stack ?? err.message) : String(err));
|
|
36
|
+
process.exitCode = 1;
|
|
37
|
+
});
|