gitnexus 1.6.6-rc.89 → 1.6.6-rc.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +8 -0
- package/dist/cli/doctor.js +11 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +14 -0
- package/dist/core/embeddings/embedding-pipeline.js +29 -8
- package/dist/core/ingestion/languages/javascript/captures.js +15 -0
- package/dist/core/ingestion/languages/javascript/query.js +6 -0
- package/dist/core/ingestion/languages/typescript/array-callback.d.ts +67 -0
- package/dist/core/ingestion/languages/typescript/array-callback.js +94 -0
- package/dist/core/ingestion/languages/typescript/captures.js +15 -0
- package/dist/core/ingestion/languages/typescript/query.js +16 -14
- package/dist/core/ingestion/scope-extractor.d.ts +33 -1
- package/dist/core/ingestion/scope-extractor.js +53 -0
- package/dist/core/lbug/extension-loader.d.ts +17 -1
- package/dist/core/lbug/extension-loader.js +22 -1
- package/dist/core/run-analyze.d.ts +7 -0
- package/dist/core/run-analyze.js +51 -14
- package/dist/mcp/server.d.ts +16 -1
- package/dist/mcp/server.js +39 -8
- package/package.json +1 -1
package/dist/cli/analyze.js
CHANGED
|
@@ -828,6 +828,14 @@ const analyzeCommandImpl = async (inputPath, options) => {
|
|
|
828
828
|
console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
|
|
829
829
|
console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
|
|
830
830
|
console.log(` ${repoPath}`);
|
|
831
|
+
// Persistent (non-scrolling) warning when FTS indexing was skipped — the
|
|
832
|
+
// progress-bar log() that fired mid-run has already scrolled away, so the
|
|
833
|
+
// degraded-search state must also appear in the final summary (#1161).
|
|
834
|
+
if (result.ftsSkipped) {
|
|
835
|
+
console.log(`\n Warning: full-text/BM25 search is disabled — the LadybugDB FTS extension was unavailable.\n` +
|
|
836
|
+
` Install it once with network access (GITNEXUS_LBUG_EXTENSION_INSTALL=auto) then rerun, or\n` +
|
|
837
|
+
` run \`gitnexus analyze --repair-fts\` when connected. Run \`gitnexus doctor\` for details.`);
|
|
838
|
+
}
|
|
831
839
|
try {
|
|
832
840
|
await fs.access(getGlobalRegistryPath());
|
|
833
841
|
}
|
package/dist/cli/doctor.js
CHANGED
|
@@ -2,6 +2,7 @@ import { getRuntimeCapabilities, getRuntimeFingerprint } from '../core/platform/
|
|
|
2
2
|
import { resolveEmbeddingConfig } from '../core/embeddings/config.js';
|
|
3
3
|
import { isHttpMode } from '../core/embeddings/http-client.js';
|
|
4
4
|
import { checkLbugNative } from '../core/lbug/native-check.js';
|
|
5
|
+
import { getExtensionInstallPolicy } from '../core/lbug/extension-loader.js';
|
|
5
6
|
import { t } from './i18n/index.js';
|
|
6
7
|
function isCombiningMark(codePoint) {
|
|
7
8
|
return ((codePoint >= 0x0300 && codePoint <= 0x036f) ||
|
|
@@ -66,6 +67,16 @@ export const doctorCommand = async () => {
|
|
|
66
67
|
console.log(` ${label('doctor.labels.fullTextSearch', 18)}${capabilities.fts}`);
|
|
67
68
|
console.log(` ${label('doctor.labels.vectorIndex', 18)}${capabilities.vector}`);
|
|
68
69
|
console.log(` ${label('doctor.labels.semanticMode', 18)}${capabilities.semanticMode}`);
|
|
70
|
+
// Surface the optional-extension install policy so offline users can see
|
|
71
|
+
// whether analyze/query will reach the network (extension.ladybugdb.com).
|
|
72
|
+
// Literal label (like the 'native' line) to avoid adding i18n keys.
|
|
73
|
+
const installPolicy = getExtensionInstallPolicy();
|
|
74
|
+
const policyHint = installPolicy === 'load-only'
|
|
75
|
+
? ' (offline; load only, no network install)'
|
|
76
|
+
: installPolicy === 'never'
|
|
77
|
+
? ' (optional extensions disabled)'
|
|
78
|
+
: ' (installs missing extensions over network)';
|
|
79
|
+
console.log(` ${padDisplayEnd('Ext install:', 18)}${installPolicy}${policyHint}`);
|
|
69
80
|
console.log(` ${label('doctor.labels.exactScanLimit', 18)}${t('doctor.chunks', { count: capabilities.exactScanLimit })}`);
|
|
70
81
|
if (capabilities.reason)
|
|
71
82
|
console.log(` ${label('doctor.labels.note', 18)}${capabilities.reason}`);
|
|
@@ -9,6 +9,20 @@
|
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
11
|
import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
|
|
12
|
+
import type { ExtensionInstallPolicy } from '../lbug/extension-loader.js';
|
|
13
|
+
/**
|
|
14
|
+
* Resolve the extension-install policy for the embedding WRITE path (analyze).
|
|
15
|
+
*
|
|
16
|
+
* Generating embeddings is an explicit opt-in to a feature that requires the
|
|
17
|
+
* VECTOR extension, so when the operator has NOT pinned a policy we default to
|
|
18
|
+
* `auto` (one bounded, out-of-process INSTALL) — matching the documented
|
|
19
|
+
* "auto = default for analyze" intent in extension-loader.ts. An explicit
|
|
20
|
+
* GITNEXUS_LBUG_EXTENSION_INSTALL=load-only|never|auto always wins, so an
|
|
21
|
+
* offline or locked-down operator is never silently forced onto the network
|
|
22
|
+
* (the #1153 regression caused by hard-coding `auto` here). Read on every call
|
|
23
|
+
* (not memoized) so test env stubbing works.
|
|
24
|
+
*/
|
|
25
|
+
export declare const resolveEmbeddingInstallPolicy: () => ExtensionInstallPolicy;
|
|
12
26
|
/**
|
|
13
27
|
* Bump this when the embedding text template changes in a way that should
|
|
14
28
|
* invalidate existing vectors, such as metadata/header shape changes,
|
|
@@ -21,13 +21,30 @@ import { loadVectorExtension } from '../lbug/lbug-adapter.js';
|
|
|
21
21
|
import { getExactScanLimit } from '../platform/capabilities.js';
|
|
22
22
|
import { logger } from '../logger.js';
|
|
23
23
|
const isDev = process.env.NODE_ENV === 'development';
|
|
24
|
-
const vectorUnavailableMessage = 'VECTOR extension
|
|
24
|
+
const vectorUnavailableMessage = 'VECTOR extension unavailable; semantic embeddings fall back to exact scan. ' +
|
|
25
|
+
'To enable vector search, install it once with network access ' +
|
|
26
|
+
'(GITNEXUS_LBUG_EXTENSION_INSTALL=auto), or pre-install it for offline use. ' +
|
|
27
|
+
'Set GITNEXUS_LBUG_EXTENSION_INSTALL=never to skip installs and silence this.';
|
|
28
|
+
/**
|
|
29
|
+
* Resolve the extension-install policy for the embedding WRITE path (analyze).
|
|
30
|
+
*
|
|
31
|
+
* Generating embeddings is an explicit opt-in to a feature that requires the
|
|
32
|
+
* VECTOR extension, so when the operator has NOT pinned a policy we default to
|
|
33
|
+
* `auto` (one bounded, out-of-process INSTALL) — matching the documented
|
|
34
|
+
* "auto = default for analyze" intent in extension-loader.ts. An explicit
|
|
35
|
+
* GITNEXUS_LBUG_EXTENSION_INSTALL=load-only|never|auto always wins, so an
|
|
36
|
+
* offline or locked-down operator is never silently forced onto the network
|
|
37
|
+
* (the #1153 regression caused by hard-coding `auto` here). Read on every call
|
|
38
|
+
* (not memoized) so test env stubbing works.
|
|
39
|
+
*/
|
|
40
|
+
export const resolveEmbeddingInstallPolicy = () => {
|
|
41
|
+
const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
|
|
42
|
+
if (raw === 'load-only' || raw === 'never' || raw === 'auto')
|
|
43
|
+
return raw;
|
|
44
|
+
return 'auto';
|
|
45
|
+
};
|
|
25
46
|
const ensureVectorExtensionAvailable = async () => {
|
|
26
|
-
|
|
27
|
-
if (!vectorReady) {
|
|
28
|
-
return false;
|
|
29
|
-
}
|
|
30
|
-
return true;
|
|
47
|
+
return loadVectorExtension(undefined, { policy: resolveEmbeddingInstallPolicy() });
|
|
31
48
|
};
|
|
32
49
|
/**
|
|
33
50
|
* Bump this when the embedding text template changes in a way that should
|
|
@@ -176,7 +193,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
176
193
|
let totalChunks = 0;
|
|
177
194
|
try {
|
|
178
195
|
const vectorAvailable = await ensureVectorExtensionAvailable();
|
|
179
|
-
if (!vectorAvailable
|
|
196
|
+
if (!vectorAvailable) {
|
|
180
197
|
logger.warn(vectorUnavailableMessage);
|
|
181
198
|
}
|
|
182
199
|
// Phase 1: Load embedding model
|
|
@@ -427,7 +444,11 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
427
444
|
const queryVec = embeddingToArray(queryEmbedding);
|
|
428
445
|
const queryVecStr = `[${queryVec.join(',')}]`;
|
|
429
446
|
let bestChunks = new Map();
|
|
430
|
-
|
|
447
|
+
// Query/read path: NEVER spawn a network INSTALL on a user query. If the
|
|
448
|
+
// VECTOR extension was not pre-installed, fall back to exact scan rather than
|
|
449
|
+
// blocking the query on a download (offline-first; see extension-loader.ts
|
|
450
|
+
// "load-only" — used by all serve/MCP query paths).
|
|
451
|
+
if (await loadVectorExtension(undefined, { policy: 'load-only' })) {
|
|
431
452
|
try {
|
|
432
453
|
bestChunks = await collectBestChunks(k, async (fetchLimit) => {
|
|
433
454
|
const vectorQuery = `
|
|
@@ -31,6 +31,7 @@ import { splitImportStatement } from '../typescript/import-decomposer.js';
|
|
|
31
31
|
import { getJsParser, getJsScopeQuery, jsCachedTreeMatchesGrammar } from './query.js';
|
|
32
32
|
import { computeTsArityMetadata } from '../typescript/arity-metadata.js';
|
|
33
33
|
import { synthesizeTsReceiverBinding } from '../typescript/receiver-binding.js';
|
|
34
|
+
import { isArrayMethodCallbackArrow } from '../typescript/array-callback.js';
|
|
34
35
|
import { getTreeSitterBufferSize } from '../../constants.js';
|
|
35
36
|
import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';
|
|
36
37
|
/** JS function-like node types that may carry a synthesized `this` binding.
|
|
@@ -601,6 +602,20 @@ export function emitJsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
601
602
|
continue;
|
|
602
603
|
}
|
|
603
604
|
}
|
|
605
|
+
// #1876: drop @declaration.function for array higher-order-method
|
|
606
|
+
// callbacks (`const x = arr.map(a => …)`). The HOC-wrapped-arrow
|
|
607
|
+
// pattern matches them, but the binding holds a value, not a callable.
|
|
608
|
+
// The binding keeps its separate @declaration.const / .variable match,
|
|
609
|
+
// and the arrow's own @scope.function match (a different pattern) is
|
|
610
|
+
// untouched, so inner-call attribution falls through to the enclosing
|
|
611
|
+
// scope instead of a phantom Function.
|
|
612
|
+
const fnDeclAnchor = grouped['@declaration.function'];
|
|
613
|
+
if (fnDeclAnchor !== undefined) {
|
|
614
|
+
const arrowNode = findFunctionNode(tree.rootNode, fnDeclAnchor.range);
|
|
615
|
+
if (arrowNode !== null && isArrayMethodCallbackArrow(arrowNode)) {
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
}
|
|
604
619
|
// Synthesize arity metadata on function-like declarations.
|
|
605
620
|
const declAnchor = pickFirstDefined(grouped, FUNCTION_DECL_TAGS);
|
|
606
621
|
if (declAnchor !== undefined) {
|
|
@@ -144,6 +144,12 @@ const JAVASCRIPT_SCOPE_QUERY = `
|
|
|
144
144
|
;; HOC-wrapped variable declarations: const X = HOC((args) => { ... }).
|
|
145
145
|
;; Covers React.forwardRef, memo, useCallback, useMemo, observer,
|
|
146
146
|
;; debounce, and any user-defined HOC factory.
|
|
147
|
+
;;
|
|
148
|
+
;; #1876: this shape also matches array higher-order-method callbacks
|
|
149
|
+
;; (const x = arr.map(a => ...)), where x is a value, not a function.
|
|
150
|
+
;; Those are filtered out emit-side in captures.ts via
|
|
151
|
+
;; isArrayMethodCallbackArrow (member-expression callee whose property
|
|
152
|
+
;; is a known Array method), so only the @declaration.const survives.
|
|
147
153
|
(lexical_declaration
|
|
148
154
|
(variable_declarator
|
|
149
155
|
name: (identifier) @declaration.name
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Array higher-order-method callback detection (issue #1876).
|
|
3
|
+
*
|
|
4
|
+
* The HOC-wrapped-arrow declaration pattern in the JS/TS scope queries
|
|
5
|
+
* (`const X = call((args) => …)`) was added for React idioms
|
|
6
|
+
* (`forwardRef` / `memo` / `useCallback`). It has the same AST shape as
|
|
7
|
+
* an array higher-order-method call (`const x = arr.map(a => …)`), so
|
|
8
|
+
* those callbacks also match and produce a spurious `@declaration.function`
|
|
9
|
+
* named after the binding — duplicating the `@declaration.const` /
|
|
10
|
+
* `@declaration.variable` def that the same binding already gets.
|
|
11
|
+
*
|
|
12
|
+
* For an array-method callback the binding holds a *value* (the method's
|
|
13
|
+
* result), not a callable, so the `Function` def is semantically wrong.
|
|
14
|
+
* `isArrayMethodCallbackArrow` lets the emitter (`captures.ts`) drop that
|
|
15
|
+
* `@declaration.function` match, leaving only the value def.
|
|
16
|
+
*
|
|
17
|
+
* Shared by both the JavaScript and TypeScript capture emitters — the
|
|
18
|
+
* relevant grammar nodes (`arrow_function`, `function_expression`,
|
|
19
|
+
* `arguments`, `call_expression`, `member_expression`,
|
|
20
|
+
* `property_identifier`) are identical across `tree-sitter-javascript`
|
|
21
|
+
* and `tree-sitter-typescript`.
|
|
22
|
+
*
|
|
23
|
+
* Pure given the input node. No I/O, no globals.
|
|
24
|
+
*/
|
|
25
|
+
import type { SyntaxNode } from '../../utils/ast-helpers.js';
|
|
26
|
+
/**
|
|
27
|
+
* Array prototype higher-order methods whose result is a value, not a
|
|
28
|
+
* function. A callback passed to one of these is an anonymous callback,
|
|
29
|
+
* never a top-level function definition. Identifier-callee HOCs
|
|
30
|
+
* (`forwardRef(...)`, `useCallback(...)`, custom factories) are
|
|
31
|
+
* deliberately NOT listed — they keep their `Function` classification.
|
|
32
|
+
*
|
|
33
|
+
* Trade-off (unchanged from before #1876): a custom *fluent-API* member
|
|
34
|
+
* call with a callback whose method name is not in this set
|
|
35
|
+
* (`qb.where(x => …)`) still classifies as `Function`. There is no clean
|
|
36
|
+
* syntactic line beyond the well-known Array surface, so the set is
|
|
37
|
+
* intentionally closed and easy to extend.
|
|
38
|
+
*
|
|
39
|
+
* Receiver-blind, by design: the match keys on the method NAME only, never
|
|
40
|
+
* the receiver type (tree-sitter has no type information here). So an in-set
|
|
41
|
+
* name on a NON-array receiver — `Map`/`Set` `.forEach`, an RxJS
|
|
42
|
+
* `observable.map(…)`, a query builder `.sort(…)`, a lodash chain
|
|
43
|
+
* `.filter(…)` — is ALSO treated as a callback and has its
|
|
44
|
+
* `@declaration.function` dropped. This is an accepted limitation, not a
|
|
45
|
+
* regression: those bindings hold the call's *result value*, not a callable,
|
|
46
|
+
* so a value def is the correct classification anyway. The only genuine loss
|
|
47
|
+
* is a bespoke DSL whose in-set-named method returns something callable —
|
|
48
|
+
* rare enough to accept rather than guard with type inference. Pinned by the
|
|
49
|
+
* "in-set method on a non-array receiver" case in `*-captures.test.ts`.
|
|
50
|
+
*/
|
|
51
|
+
export declare const ARRAY_CALLBACK_METHODS: ReadonlySet<string>;
|
|
52
|
+
/**
|
|
53
|
+
* True when `node` (an `arrow_function` / `function_expression`) is the
|
|
54
|
+
* callback argument of an array higher-order-method call, i.e. the
|
|
55
|
+
* enclosing call's callee is a `member_expression` whose property is one
|
|
56
|
+
* of {@link ARRAY_CALLBACK_METHODS}.
|
|
57
|
+
*
|
|
58
|
+
* Returns false for direct assignments (`const fn = () => {}` — parent is
|
|
59
|
+
* `variable_declarator`, not `arguments`) and for identifier-callee HOCs
|
|
60
|
+
* (`forwardRef(() => …)` — callee is an `identifier`, not a
|
|
61
|
+
* `member_expression`), so neither is ever suppressed.
|
|
62
|
+
*
|
|
63
|
+
* Intentional non-suppressing gaps (preserve current behavior, no
|
|
64
|
+
* regression): parenthesized callee `(arr.map)(cb)` (`parenthesized_expression`)
|
|
65
|
+
* and computed callee `arr['map'](cb)` (`subscript_expression`).
|
|
66
|
+
*/
|
|
67
|
+
export declare function isArrayMethodCallbackArrow(node: SyntaxNode): boolean;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Array higher-order-method callback detection (issue #1876).
|
|
3
|
+
*
|
|
4
|
+
* The HOC-wrapped-arrow declaration pattern in the JS/TS scope queries
|
|
5
|
+
* (`const X = call((args) => …)`) was added for React idioms
|
|
6
|
+
* (`forwardRef` / `memo` / `useCallback`). It has the same AST shape as
|
|
7
|
+
* an array higher-order-method call (`const x = arr.map(a => …)`), so
|
|
8
|
+
* those callbacks also match and produce a spurious `@declaration.function`
|
|
9
|
+
* named after the binding — duplicating the `@declaration.const` /
|
|
10
|
+
* `@declaration.variable` def that the same binding already gets.
|
|
11
|
+
*
|
|
12
|
+
* For an array-method callback the binding holds a *value* (the method's
|
|
13
|
+
* result), not a callable, so the `Function` def is semantically wrong.
|
|
14
|
+
* `isArrayMethodCallbackArrow` lets the emitter (`captures.ts`) drop that
|
|
15
|
+
* `@declaration.function` match, leaving only the value def.
|
|
16
|
+
*
|
|
17
|
+
* Shared by both the JavaScript and TypeScript capture emitters — the
|
|
18
|
+
* relevant grammar nodes (`arrow_function`, `function_expression`,
|
|
19
|
+
* `arguments`, `call_expression`, `member_expression`,
|
|
20
|
+
* `property_identifier`) are identical across `tree-sitter-javascript`
|
|
21
|
+
* and `tree-sitter-typescript`.
|
|
22
|
+
*
|
|
23
|
+
* Pure given the input node. No I/O, no globals.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Array prototype higher-order methods whose result is a value, not a
|
|
27
|
+
* function. A callback passed to one of these is an anonymous callback,
|
|
28
|
+
* never a top-level function definition. Identifier-callee HOCs
|
|
29
|
+
* (`forwardRef(...)`, `useCallback(...)`, custom factories) are
|
|
30
|
+
* deliberately NOT listed — they keep their `Function` classification.
|
|
31
|
+
*
|
|
32
|
+
* Trade-off (unchanged from before #1876): a custom *fluent-API* member
|
|
33
|
+
* call with a callback whose method name is not in this set
|
|
34
|
+
* (`qb.where(x => …)`) still classifies as `Function`. There is no clean
|
|
35
|
+
* syntactic line beyond the well-known Array surface, so the set is
|
|
36
|
+
* intentionally closed and easy to extend.
|
|
37
|
+
*
|
|
38
|
+
* Receiver-blind, by design: the match keys on the method NAME only, never
|
|
39
|
+
* the receiver type (tree-sitter has no type information here). So an in-set
|
|
40
|
+
* name on a NON-array receiver — `Map`/`Set` `.forEach`, an RxJS
|
|
41
|
+
* `observable.map(…)`, a query builder `.sort(…)`, a lodash chain
|
|
42
|
+
* `.filter(…)` — is ALSO treated as a callback and has its
|
|
43
|
+
* `@declaration.function` dropped. This is an accepted limitation, not a
|
|
44
|
+
* regression: those bindings hold the call's *result value*, not a callable,
|
|
45
|
+
* so a value def is the correct classification anyway. The only genuine loss
|
|
46
|
+
* is a bespoke DSL whose in-set-named method returns something callable —
|
|
47
|
+
* rare enough to accept rather than guard with type inference. Pinned by the
|
|
48
|
+
* "in-set method on a non-array receiver" case in `*-captures.test.ts`.
|
|
49
|
+
*/
|
|
50
|
+
export const ARRAY_CALLBACK_METHODS = new Set([
|
|
51
|
+
'map',
|
|
52
|
+
'filter',
|
|
53
|
+
'find',
|
|
54
|
+
'findIndex',
|
|
55
|
+
'findLast',
|
|
56
|
+
'findLastIndex',
|
|
57
|
+
'forEach',
|
|
58
|
+
'reduce',
|
|
59
|
+
'reduceRight',
|
|
60
|
+
'some',
|
|
61
|
+
'every',
|
|
62
|
+
'flatMap',
|
|
63
|
+
'sort',
|
|
64
|
+
]);
|
|
65
|
+
/**
|
|
66
|
+
* True when `node` (an `arrow_function` / `function_expression`) is the
|
|
67
|
+
* callback argument of an array higher-order-method call, i.e. the
|
|
68
|
+
* enclosing call's callee is a `member_expression` whose property is one
|
|
69
|
+
* of {@link ARRAY_CALLBACK_METHODS}.
|
|
70
|
+
*
|
|
71
|
+
* Returns false for direct assignments (`const fn = () => {}` — parent is
|
|
72
|
+
* `variable_declarator`, not `arguments`) and for identifier-callee HOCs
|
|
73
|
+
* (`forwardRef(() => …)` — callee is an `identifier`, not a
|
|
74
|
+
* `member_expression`), so neither is ever suppressed.
|
|
75
|
+
*
|
|
76
|
+
* Intentional non-suppressing gaps (preserve current behavior, no
|
|
77
|
+
* regression): parenthesized callee `(arr.map)(cb)` (`parenthesized_expression`)
|
|
78
|
+
* and computed callee `arr['map'](cb)` (`subscript_expression`).
|
|
79
|
+
*/
|
|
80
|
+
export function isArrayMethodCallbackArrow(node) {
|
|
81
|
+
const args = node.parent;
|
|
82
|
+
if (args === null || args.type !== 'arguments')
|
|
83
|
+
return false;
|
|
84
|
+
const call = args.parent;
|
|
85
|
+
if (call === null || call.type !== 'call_expression')
|
|
86
|
+
return false;
|
|
87
|
+
const callee = call.childForFieldName('function');
|
|
88
|
+
if (callee === null || callee.type !== 'member_expression')
|
|
89
|
+
return false;
|
|
90
|
+
const property = callee.childForFieldName('property');
|
|
91
|
+
if (property === null || property.type !== 'property_identifier')
|
|
92
|
+
return false;
|
|
93
|
+
return ARRAY_CALLBACK_METHODS.has(property.text);
|
|
94
|
+
}
|
|
@@ -30,6 +30,7 @@ import { getTsParser, getTsScopeQuery, tsCachedTreeMatchesGrammar } from './quer
|
|
|
30
30
|
import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
|
|
31
31
|
import { synthesizeTsReceiverBinding } from './receiver-binding.js';
|
|
32
32
|
import { computeTsArityMetadata } from './arity-metadata.js';
|
|
33
|
+
import { isArrayMethodCallbackArrow } from './array-callback.js';
|
|
33
34
|
import { getTreeSitterBufferSize } from '../../constants.js';
|
|
34
35
|
import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';
|
|
35
36
|
/** tree-sitter-typescript node types for function-like scopes that may
|
|
@@ -228,6 +229,20 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
|
|
|
228
229
|
continue;
|
|
229
230
|
}
|
|
230
231
|
}
|
|
232
|
+
// #1876: drop @declaration.function for array higher-order-method
|
|
233
|
+
// callbacks (`const x = arr.map(a => …)`). The HOC-wrapped-arrow
|
|
234
|
+
// pattern matches them, but the binding holds a value, not a callable.
|
|
235
|
+
// The binding keeps its separate @declaration.const / .variable match,
|
|
236
|
+
// and the arrow's own @scope.function match (a different pattern) is
|
|
237
|
+
// untouched, so inner-call attribution falls through to the enclosing
|
|
238
|
+
// scope instead of a phantom Function.
|
|
239
|
+
const fnDeclAnchor = grouped['@declaration.function'];
|
|
240
|
+
if (fnDeclAnchor !== undefined) {
|
|
241
|
+
const arrowNode = findFunctionNode(tree.rootNode, fnDeclAnchor.range, groupedNodes['@declaration.function']);
|
|
242
|
+
if (arrowNode !== null && isArrayMethodCallbackArrow(arrowNode)) {
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
231
246
|
// Synthesize arity metadata on function-like declaration anchors
|
|
232
247
|
// before pushing the match. The registry uses these to narrow
|
|
233
248
|
// overloads — TypeScript supports overload signatures via
|
|
@@ -246,20 +246,22 @@ const TYPESCRIPT_SCOPE_QUERY = `
|
|
|
246
246
|
;; that promotes the binding to the parent scope (where \`const X\`
|
|
247
247
|
;; lives).
|
|
248
248
|
;;
|
|
249
|
-
;;
|
|
250
|
-
;; has the same syntactic shape and
|
|
251
|
-
;; \`.find\` callback as \`x\`.
|
|
252
|
-
;;
|
|
253
|
-
;;
|
|
254
|
-
;;
|
|
255
|
-
;;
|
|
256
|
-
;;
|
|
257
|
-
;;
|
|
258
|
-
;;
|
|
259
|
-
;;
|
|
260
|
-
;;
|
|
261
|
-
;;
|
|
262
|
-
;;
|
|
249
|
+
;; #1876 — chained array-method form: \`const x = arr.find((y) => p(y))\`
|
|
250
|
+
;; has the same syntactic shape and matches here too, naming the
|
|
251
|
+
;; \`.find\` callback as \`x\`. Because \`x\` holds a value (the method
|
|
252
|
+
;; result), not a callable, the spurious \`Function:x\` def is dropped
|
|
253
|
+
;; emit-side in captures.ts: \`isArrayMethodCallbackArrow\` skips any
|
|
254
|
+
;; \`@declaration.function\` whose enclosing call has a member-expression
|
|
255
|
+
;; callee with a known Array-method property (\`ARRAY_CALLBACK_METHODS\`:
|
|
256
|
+
;; \`map\` / \`filter\` / \`find\` / \`reduce\` / \`forEach\` / \`some\` /
|
|
257
|
+
;; \`every\` / …). Only the \`@declaration.variable\` survives, so the
|
|
258
|
+
;; binding is a single value def and calls inside the callback attribute
|
|
259
|
+
;; to the enclosing scope rather than \`Function:x\`.
|
|
260
|
+
;;
|
|
261
|
+
;; Residual (intentional): a user-defined fluent-API method with a
|
|
262
|
+
;; callback (\`qb.where(x => …)\`) is NOT in the blocklist and still
|
|
263
|
+
;; classifies as \`Function\` — there's no clean syntactic line beyond
|
|
264
|
+
;; the well-known Array surface, so the set is closed and easy to extend.
|
|
263
265
|
;;
|
|
264
266
|
;; Trade-off — multi-arrow arguments: \`const x = call(arrow1, arrow2)\`
|
|
265
267
|
;; would emit TWO matches with the same name \`x\`. tree-sitter-query
|
|
@@ -58,7 +58,7 @@
|
|
|
58
58
|
* - `ParsedFile.localDefs` — flattened union of `Scope.ownedDefs`.
|
|
59
59
|
* - `ParsedFile.referenceSites` — pre-resolution usage facts.
|
|
60
60
|
*/
|
|
61
|
-
import type { CaptureMatch, ParsedFile } from '../../_shared/index.js';
|
|
61
|
+
import type { CaptureMatch, ParsedFile, SymbolDefinition } from '../../_shared/index.js';
|
|
62
62
|
import type { LanguageProvider } from './language-provider.js';
|
|
63
63
|
/**
|
|
64
64
|
* The subset of `LanguageProvider` hooks that `extract()` reads. Declared
|
|
@@ -86,3 +86,35 @@ export type ScopeExtractorHooks = Pick<LanguageProvider, 'resolveScopeKind' | 'b
|
|
|
86
86
|
* templates with mixed PHP/HTML/JS).
|
|
87
87
|
*/
|
|
88
88
|
export declare function extract(matches: readonly CaptureMatch[], filePath: string, provider: ScopeExtractorHooks): ParsedFile;
|
|
89
|
+
/**
|
|
90
|
+
* Collapse rule for the deferred node-creation migration (#1876).
|
|
91
|
+
*
|
|
92
|
+
* When graph-node creation moves from the legacy DAG onto the
|
|
93
|
+
* registry-primary path, a single source binding can carry more than one
|
|
94
|
+
* `SymbolDefinition` for the same name in the same scope — e.g. a direct
|
|
95
|
+
* arrow `const fn = () => {}` is classified BOTH as a `Function` (the
|
|
96
|
+
* arrow) and a `Variable` (the binding). Emitting one graph node per def
|
|
97
|
+
* would reproduce exactly the duplicate-node bug this issue tracks.
|
|
98
|
+
*
|
|
99
|
+
* `selectNodeBearingDef` picks the ONE def that should bear the graph node
|
|
100
|
+
* for such a binding group:
|
|
101
|
+
*
|
|
102
|
+
* 1. a function-like def (`Function` / `Method` / `Constructor`) if any —
|
|
103
|
+
* the binding is callable and must keep incoming `CALLS` edges;
|
|
104
|
+
* 2. otherwise a value def (`Const` / `Variable`) — the binding holds a
|
|
105
|
+
* value (e.g. an array-method result after the U1/U2 narrowing);
|
|
106
|
+
* 3. otherwise the first def — deterministic fallback for label sets this
|
|
107
|
+
* rule does not rank.
|
|
108
|
+
*
|
|
109
|
+
* INPUT CONTRACT: `group` must be the defs bound to ONE name within ONE
|
|
110
|
+
* scope (a binding group). It deliberately does NOT dedup by range —
|
|
111
|
+
* `SymbolDefinition` carries no range and `makeDefId` encodes only the
|
|
112
|
+
* start position, so containment is uncomputable here; the caller forms the
|
|
113
|
+
* group (e.g. from a scope's `ownedDefs` keyed by name) before calling.
|
|
114
|
+
*
|
|
115
|
+
* Pure. No production call site yet — this dead export is intentional and
|
|
116
|
+
* tracked by #1876 (the deferred node-creation migration); it is the
|
|
117
|
+
* executable contract that follow-up will consume, pinned today by the
|
|
118
|
+
* scope-extractor unit test.
|
|
119
|
+
*/
|
|
120
|
+
export declare function selectNodeBearingDef(group: readonly SymbolDefinition[]): SymbolDefinition | undefined;
|
|
@@ -569,6 +569,59 @@ function normalizeNodeLabel(kindStr) {
|
|
|
569
569
|
return undefined;
|
|
570
570
|
}
|
|
571
571
|
}
|
|
572
|
+
/** Function-like labels: callable defs that must keep incoming CALLS edges. */
|
|
573
|
+
const NODE_BEARING_FUNCTION_LABELS = new Set([
|
|
574
|
+
'Function',
|
|
575
|
+
'Method',
|
|
576
|
+
'Constructor',
|
|
577
|
+
]);
|
|
578
|
+
/** Value labels: non-callable bindings (a `const`/`let`/`var` holds a value). */
|
|
579
|
+
const NODE_BEARING_VALUE_LABELS = new Set([
|
|
580
|
+
'Const',
|
|
581
|
+
'Variable',
|
|
582
|
+
]);
|
|
583
|
+
/**
|
|
584
|
+
* Collapse rule for the deferred node-creation migration (#1876).
|
|
585
|
+
*
|
|
586
|
+
* When graph-node creation moves from the legacy DAG onto the
|
|
587
|
+
* registry-primary path, a single source binding can carry more than one
|
|
588
|
+
* `SymbolDefinition` for the same name in the same scope — e.g. a direct
|
|
589
|
+
* arrow `const fn = () => {}` is classified BOTH as a `Function` (the
|
|
590
|
+
* arrow) and a `Variable` (the binding). Emitting one graph node per def
|
|
591
|
+
* would reproduce exactly the duplicate-node bug this issue tracks.
|
|
592
|
+
*
|
|
593
|
+
* `selectNodeBearingDef` picks the ONE def that should bear the graph node
|
|
594
|
+
* for such a binding group:
|
|
595
|
+
*
|
|
596
|
+
* 1. a function-like def (`Function` / `Method` / `Constructor`) if any —
|
|
597
|
+
* the binding is callable and must keep incoming `CALLS` edges;
|
|
598
|
+
* 2. otherwise a value def (`Const` / `Variable`) — the binding holds a
|
|
599
|
+
* value (e.g. an array-method result after the U1/U2 narrowing);
|
|
600
|
+
* 3. otherwise the first def — deterministic fallback for label sets this
|
|
601
|
+
* rule does not rank.
|
|
602
|
+
*
|
|
603
|
+
* INPUT CONTRACT: `group` must be the defs bound to ONE name within ONE
|
|
604
|
+
* scope (a binding group). It deliberately does NOT dedup by range —
|
|
605
|
+
* `SymbolDefinition` carries no range and `makeDefId` encodes only the
|
|
606
|
+
* start position, so containment is uncomputable here; the caller forms the
|
|
607
|
+
* group (e.g. from a scope's `ownedDefs` keyed by name) before calling.
|
|
608
|
+
*
|
|
609
|
+
* Pure. No production call site yet — this dead export is intentional and
|
|
610
|
+
* tracked by #1876 (the deferred node-creation migration); it is the
|
|
611
|
+
* executable contract that follow-up will consume, pinned today by the
|
|
612
|
+
* scope-extractor unit test.
|
|
613
|
+
*/
|
|
614
|
+
export function selectNodeBearingDef(group) {
|
|
615
|
+
if (group.length === 0)
|
|
616
|
+
return undefined;
|
|
617
|
+
const functionLike = group.find((def) => NODE_BEARING_FUNCTION_LABELS.has(def.type));
|
|
618
|
+
if (functionLike !== undefined)
|
|
619
|
+
return functionLike;
|
|
620
|
+
const value = group.find((def) => NODE_BEARING_VALUE_LABELS.has(def.type));
|
|
621
|
+
if (value !== undefined)
|
|
622
|
+
return value;
|
|
623
|
+
return group[0];
|
|
624
|
+
}
|
|
572
625
|
function makeDefId(filePath, range, type, name) {
|
|
573
626
|
return `def:${filePath}#${range.startLine}:${range.startCol}:${type}:${name}`;
|
|
574
627
|
}
|
|
@@ -32,6 +32,22 @@ export interface ExtensionManagerOptions {
|
|
|
32
32
|
installExtension?: (extensionName: string, timeoutMs: number) => Promise<ExtensionInstallResult>;
|
|
33
33
|
warn?: (message: string) => void;
|
|
34
34
|
}
|
|
35
|
+
export declare const getExtensionInstallPolicy: () => ExtensionInstallPolicy;
|
|
36
|
+
/**
|
|
37
|
+
* Install policy for the **analyze (write) path**.
|
|
38
|
+
*
|
|
39
|
+
* The global default (`resolvePolicyFromEnv`) is `load-only` so serve/query
|
|
40
|
+
* read paths never require outbound network access (PR #1161, offline-first).
|
|
41
|
+
* The analyze path is different: it owns building the search indexes, so it
|
|
42
|
+
* defaults to `auto` — LOAD the extension if present, otherwise attempt one
|
|
43
|
+
* bounded out-of-process INSTALL. This keeps FTS symmetric with the
|
|
44
|
+
* VECTOR/embeddings path (which already defaults to `auto`) and matches the
|
|
45
|
+
* #726 contract. An explicit `GITNEXUS_LBUG_EXTENSION_INSTALL` value still
|
|
46
|
+
* wins, so operators can force `load-only`/`never` for fully offline analyze;
|
|
47
|
+
* `auto` LOADs-first, so offline machines still degrade gracefully when the
|
|
48
|
+
* INSTALL cannot reach the network.
|
|
49
|
+
*/
|
|
50
|
+
export declare const resolveAnalyzeInstallPolicy: () => ExtensionInstallPolicy;
|
|
35
51
|
export declare const getExtensionInstallTimeoutMs: () => number;
|
|
36
52
|
export declare const getExtensionInstallChildProcessArgs: (extensionName: string, maxDbSize?: number) => string[];
|
|
37
53
|
/**
|
|
@@ -54,7 +70,7 @@ export declare const installDuckDbExtensionOutOfProcess: (extensionName: string,
|
|
|
54
70
|
* subsequent analyze or query calls.
|
|
55
71
|
*
|
|
56
72
|
* Policy precedence (most specific wins):
|
|
57
|
-
* per-call `opts.policy` → constructor `options.policy` → env → `
|
|
73
|
+
* per-call `opts.policy` → constructor `options.policy` → env → `load-only`
|
|
58
74
|
*/
|
|
59
75
|
export declare class ExtensionManager {
|
|
60
76
|
private readonly options;
|
|
@@ -8,6 +8,27 @@ const alreadyAvailable = (message) => message.includes('already loaded') ||
|
|
|
8
8
|
message.includes('already installed') ||
|
|
9
9
|
message.includes('already exists');
|
|
10
10
|
const resolvePolicyFromEnv = () => {
|
|
11
|
+
const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
|
|
12
|
+
if (raw === 'load-only' || raw === 'never' || raw === 'auto')
|
|
13
|
+
return raw;
|
|
14
|
+
return 'load-only';
|
|
15
|
+
};
|
|
16
|
+
export const getExtensionInstallPolicy = () => resolvePolicyFromEnv();
|
|
17
|
+
/**
|
|
18
|
+
* Install policy for the **analyze (write) path**.
|
|
19
|
+
*
|
|
20
|
+
* The global default (`resolvePolicyFromEnv`) is `load-only` so serve/query
|
|
21
|
+
* read paths never require outbound network access (PR #1161, offline-first).
|
|
22
|
+
* The analyze path is different: it owns building the search indexes, so it
|
|
23
|
+
* defaults to `auto` — LOAD the extension if present, otherwise attempt one
|
|
24
|
+
* bounded out-of-process INSTALL. This keeps FTS symmetric with the
|
|
25
|
+
* VECTOR/embeddings path (which already defaults to `auto`) and matches the
|
|
26
|
+
* #726 contract. An explicit `GITNEXUS_LBUG_EXTENSION_INSTALL` value still
|
|
27
|
+
* wins, so operators can force `load-only`/`never` for fully offline analyze;
|
|
28
|
+
* `auto` LOADs-first, so offline machines still degrade gracefully when the
|
|
29
|
+
* INSTALL cannot reach the network.
|
|
30
|
+
*/
|
|
31
|
+
export const resolveAnalyzeInstallPolicy = () => {
|
|
11
32
|
const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
|
|
12
33
|
if (raw === 'load-only' || raw === 'never' || raw === 'auto')
|
|
13
34
|
return raw;
|
|
@@ -93,7 +114,7 @@ export const installDuckDbExtensionOutOfProcess = async (extensionName, timeoutM
|
|
|
93
114
|
* subsequent analyze or query calls.
|
|
94
115
|
*
|
|
95
116
|
* Policy precedence (most specific wins):
|
|
96
|
-
* per-call `opts.policy` → constructor `options.policy` → env → `
|
|
117
|
+
* per-call `opts.policy` → constructor `options.policy` → env → `load-only`
|
|
97
118
|
*/
|
|
98
119
|
export class ExtensionManager {
|
|
99
120
|
options;
|
|
@@ -88,6 +88,13 @@ export interface AnalyzeResult {
|
|
|
88
88
|
pipelineResult?: any;
|
|
89
89
|
/** True when analyze only repaired FTS indexes and skipped pipeline re-analysis. */
|
|
90
90
|
ftsRepairedOnly?: boolean;
|
|
91
|
+
/**
|
|
92
|
+
* True when the FTS extension was unavailable so search-index creation was
|
|
93
|
+
* skipped (offline-first degradation). The graph is fully queryable; only
|
|
94
|
+
* full-text/BM25 search is disabled. Lets callers (CLI summary, server) and
|
|
95
|
+
* the persisted meta surface the degraded state instead of reporting healthy.
|
|
96
|
+
*/
|
|
97
|
+
ftsSkipped?: boolean;
|
|
91
98
|
}
|
|
92
99
|
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
93
100
|
export type { EmbeddingMode } from './embedding-mode.js';
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -12,8 +12,9 @@ import path from 'path';
|
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
13
|
import { execFileSync } from 'child_process';
|
|
14
14
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
15
|
-
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, deleteNodesForFile, deleteAllCommunitiesAndProcesses, queryImporters, } from './lbug/lbug-adapter.js';
|
|
15
|
+
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, deleteNodesForFile, deleteAllCommunitiesAndProcesses, queryImporters, loadFTSExtension, } from './lbug/lbug-adapter.js';
|
|
16
16
|
import { createSearchFTSIndexes, verifySearchFTSIndexes } from './search/fts-indexes.js';
|
|
17
|
+
import { resolveAnalyzeInstallPolicy } from './lbug/extension-loader.js';
|
|
17
18
|
import { startWalCheckpointDriver, } from './lbug/wal-checkpoint-driver.js';
|
|
18
19
|
import { getStoragePaths, saveMeta, loadMeta, ensureGitNexusIgnored, registerRepo, cleanupOldKuzuFiles, INCREMENTAL_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
19
20
|
import { computeFileHashes, diffFileHashes } from '../storage/file-hash.js';
|
|
@@ -24,6 +25,15 @@ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName, resolve
|
|
|
24
25
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
25
26
|
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
26
27
|
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
28
|
+
/**
|
|
29
|
+
* Logged when the optional FTS extension cannot be loaded or installed during
|
|
30
|
+
* a full analyze. Kept as a named constant so the env-var/command guidance
|
|
31
|
+
* stays in one place (mirrors the VECTOR message in embedding-pipeline.ts).
|
|
32
|
+
*/
|
|
33
|
+
const FTS_UNAVAILABLE_MESSAGE = 'FTS extension unavailable; skipping search-index creation. ' +
|
|
34
|
+
'Full-text/BM25 search will be disabled until the LadybugDB FTS extension is ' +
|
|
35
|
+
'installed once with network access (GITNEXUS_LBUG_EXTENSION_INSTALL=auto) or ' +
|
|
36
|
+
'pre-installed for offline use. Run `gitnexus doctor` for details.';
|
|
27
37
|
// Re-export the pure flag-derivation helper so external callers (and tests)
|
|
28
38
|
// keep importing from this module's stable surface.
|
|
29
39
|
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
@@ -495,21 +505,40 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
495
505
|
});
|
|
496
506
|
}
|
|
497
507
|
// ── Phase 3: FTS (85–90%) ─────────────────────────────────────────
|
|
508
|
+
// The analyze (write) path owns building the search indexes, so it uses
|
|
509
|
+
// the `auto` install policy (LOAD-first, then one bounded INSTALL) —
|
|
510
|
+
// symmetric with the VECTOR/embeddings path below and consistent with the
|
|
511
|
+
// #726 contract. The global `load-only` default (PR #1161) governs the
|
|
512
|
+
// serve/query read paths, not this one. When the extension still cannot be
|
|
513
|
+
// loaded (genuinely offline + not pre-installed, or policy forced to
|
|
514
|
+
// load-only/never), degrade gracefully — exactly like the VECTOR path — so
|
|
515
|
+
// analyze still produces a fully queryable graph; only full-text/BM25
|
|
516
|
+
// search falls back. `--repair-fts` (whose sole job is FTS) still fails
|
|
517
|
+
// loudly on its own path above.
|
|
498
518
|
progress('fts', 85, 'Creating search indexes...');
|
|
499
|
-
await
|
|
500
|
-
|
|
501
|
-
? (table, indexName) => log(`FTS: creating ${table}.${indexName}`)
|
|
502
|
-
: undefined,
|
|
503
|
-
onIndexReady: options.verbose
|
|
504
|
-
? (table, indexName) => log(`FTS: ready ${table}.${indexName}`)
|
|
505
|
-
: undefined,
|
|
519
|
+
const ftsAvailable = await loadFTSExtension(undefined, {
|
|
520
|
+
policy: resolveAnalyzeInstallPolicy(),
|
|
506
521
|
});
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
522
|
+
if (ftsAvailable) {
|
|
523
|
+
await createSearchFTSIndexes({
|
|
524
|
+
onIndexStart: options.verbose
|
|
525
|
+
? (table, indexName) => log(`FTS: creating ${table}.${indexName}`)
|
|
526
|
+
: undefined,
|
|
527
|
+
onIndexReady: options.verbose
|
|
528
|
+
? (table, indexName) => log(`FTS: ready ${table}.${indexName}`)
|
|
529
|
+
: undefined,
|
|
530
|
+
});
|
|
531
|
+
const missingIndexNames = await verifySearchFTSIndexes(executeQuery);
|
|
532
|
+
if (missingIndexNames.length > 0) {
|
|
533
|
+
throw new Error(`FTS verification failed - missing indexes after analyze: ${missingIndexNames.join(', ')}. ` +
|
|
534
|
+
'Check FTS extension availability, then retry `gitnexus analyze --force` for a full rebuild.');
|
|
535
|
+
}
|
|
536
|
+
progress('fts', 90, 'Search indexes ready');
|
|
537
|
+
}
|
|
538
|
+
else {
|
|
539
|
+
log(FTS_UNAVAILABLE_MESSAGE);
|
|
540
|
+
progress('fts', 90, 'Search indexes skipped (FTS unavailable)');
|
|
511
541
|
}
|
|
512
|
-
progress('fts', 90, 'Search indexes ready');
|
|
513
542
|
// ── Phase 3.5: Re-insert cached embeddings ────────────────────────
|
|
514
543
|
// Runs on BOTH the full-rebuild path and the incremental path:
|
|
515
544
|
// - Full rebuild: DB was wiped, every cached row needs to come back.
|
|
@@ -661,7 +690,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
661
690
|
},
|
|
662
691
|
capabilities: {
|
|
663
692
|
graph: { provider: 'ladybugdb', status: runtimeCapabilities.graph },
|
|
664
|
-
|
|
693
|
+
// Reflect what this analyze run actually produced: when the FTS
|
|
694
|
+
// extension was unavailable the indexes were skipped, so record
|
|
695
|
+
// 'unavailable' rather than the static runtime default. Keeps
|
|
696
|
+
// meta.json / `gitnexus doctor` honest about degraded search.
|
|
697
|
+
fts: {
|
|
698
|
+
provider: 'ladybugdb-fts',
|
|
699
|
+
status: ftsAvailable ? runtimeCapabilities.fts : 'unavailable',
|
|
700
|
+
},
|
|
665
701
|
vectorSearch: {
|
|
666
702
|
provider: effectiveSemanticMode === 'vector-index' ? 'ladybugdb-vector' : 'exact-scan',
|
|
667
703
|
status: embeddingCount > 0 ? effectiveSemanticMode : 'unavailable',
|
|
@@ -748,6 +784,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
748
784
|
repoPath,
|
|
749
785
|
stats: meta.stats,
|
|
750
786
|
pipelineResult,
|
|
787
|
+
ftsSkipped: !ftsAvailable,
|
|
751
788
|
};
|
|
752
789
|
}
|
|
753
790
|
catch (err) {
|
package/dist/mcp/server.d.ts
CHANGED
|
@@ -17,7 +17,22 @@ import type { LocalBackend } from './local/local-backend.js';
|
|
|
17
17
|
* Transport-agnostic — caller connects the desired transport.
|
|
18
18
|
*/
|
|
19
19
|
export declare function createMCPServer(backend: LocalBackend): Server;
|
|
20
|
+
/** Conventional 128 + signal-number exit codes for graceful termination. */
|
|
21
|
+
export declare const SHUTDOWN_EXIT_CODES: {
|
|
22
|
+
readonly SIGINT: 130;
|
|
23
|
+
readonly SIGTERM: 143;
|
|
24
|
+
};
|
|
25
|
+
type SignalRegistrar = (event: 'SIGINT' | 'SIGTERM', listener: (...args: unknown[]) => void) => void;
|
|
20
26
|
/**
|
|
21
|
-
*
|
|
27
|
+
* Wire SIGINT/SIGTERM to a graceful shutdown using NUMERIC exit codes.
|
|
28
|
+
*
|
|
29
|
+
* Node invokes signal listeners with the signal NAME string as the first
|
|
30
|
+
* argument, so registering an `(exitCode = 0) => process.exit(exitCode)`
|
|
31
|
+
* shutdown directly passes `'SIGTERM'` into `process.exit()` and crashes with
|
|
32
|
+
* `ERR_INVALID_ARG_TYPE` (#1132). These wrappers discard the signal argument
|
|
33
|
+
* and pass the conventional 128+signal code instead. `on` is injectable so the
|
|
34
|
+
* mapping can be unit-tested without touching the real process.
|
|
22
35
|
*/
|
|
36
|
+
export declare function installSignalShutdown(shutdown: (exitCode?: number) => unknown, on?: SignalRegistrar): void;
|
|
23
37
|
export declare function startMCPServer(backend: LocalBackend): Promise<void>;
|
|
38
|
+
export {};
|
package/dist/mcp/server.js
CHANGED
|
@@ -245,6 +245,26 @@ Follow these steps:
|
|
|
245
245
|
/**
|
|
246
246
|
* Start the MCP server on stdio transport (for CLI use).
|
|
247
247
|
*/
|
|
248
|
+
/** Force-exit fallback budget if graceful shutdown cleanup hangs. */
|
|
249
|
+
const SHUTDOWN_FORCE_EXIT_MS = 5_000;
|
|
250
|
+
/** Conventional 128 + signal-number exit codes for graceful termination. */
|
|
251
|
+
export const SHUTDOWN_EXIT_CODES = { SIGINT: 130, SIGTERM: 143 };
|
|
252
|
+
/**
|
|
253
|
+
* Wire SIGINT/SIGTERM to a graceful shutdown using NUMERIC exit codes.
|
|
254
|
+
*
|
|
255
|
+
* Node invokes signal listeners with the signal NAME string as the first
|
|
256
|
+
* argument, so registering an `(exitCode = 0) => process.exit(exitCode)`
|
|
257
|
+
* shutdown directly passes `'SIGTERM'` into `process.exit()` and crashes with
|
|
258
|
+
* `ERR_INVALID_ARG_TYPE` (#1132). These wrappers discard the signal argument
|
|
259
|
+
* and pass the conventional 128+signal code instead. `on` is injectable so the
|
|
260
|
+
* mapping can be unit-tested without touching the real process.
|
|
261
|
+
*/
|
|
262
|
+
export function installSignalShutdown(shutdown, on = (event, listener) => {
|
|
263
|
+
process.on(event, listener);
|
|
264
|
+
}) {
|
|
265
|
+
on('SIGINT', () => void shutdown(SHUTDOWN_EXIT_CODES.SIGINT));
|
|
266
|
+
on('SIGTERM', () => void shutdown(SHUTDOWN_EXIT_CODES.SIGTERM));
|
|
267
|
+
}
|
|
248
268
|
export async function startMCPServer(backend) {
|
|
249
269
|
const server = createMCPServer(backend);
|
|
250
270
|
// Idempotent global sentinel install. cli/mcp.ts calls this first thing
|
|
@@ -281,6 +301,11 @@ export async function startMCPServer(backend) {
|
|
|
281
301
|
if (shuttingDown)
|
|
282
302
|
return;
|
|
283
303
|
shuttingDown = true;
|
|
304
|
+
// Safety net: if backend.disconnect()/server.close() hangs, still exit so a
|
|
305
|
+
// SIGINT/SIGTERM reliably terminates the process. Unref'd so the timer alone
|
|
306
|
+
// never keeps the event loop alive.
|
|
307
|
+
const forceExit = setTimeout(() => process.exit(exitCode), SHUTDOWN_FORCE_EXIT_MS);
|
|
308
|
+
forceExit.unref();
|
|
284
309
|
try {
|
|
285
310
|
await backend.disconnect();
|
|
286
311
|
}
|
|
@@ -291,24 +316,30 @@ export async function startMCPServer(backend) {
|
|
|
291
316
|
catch { }
|
|
292
317
|
const { flushLoggerSync } = await import('../core/logger.js');
|
|
293
318
|
flushLoggerSync();
|
|
319
|
+
clearTimeout(forceExit);
|
|
294
320
|
process.exit(exitCode);
|
|
295
321
|
};
|
|
296
|
-
// Handle graceful shutdown
|
|
297
|
-
|
|
298
|
-
process.
|
|
322
|
+
// Handle graceful shutdown. Node invokes signal listeners with the signal
|
|
323
|
+
// NAME (e.g. 'SIGTERM') as the first argument; registering `shutdown`
|
|
324
|
+
// directly passed that string to process.exit() and crashed with
|
|
325
|
+
// ERR_INVALID_ARG_TYPE (#1132). Map each signal to its conventional
|
|
326
|
+
// 128+signal exit code instead.
|
|
327
|
+
installSignalShutdown(shutdown);
|
|
299
328
|
// Log crashes to stderr so they aren't silently lost.
|
|
300
329
|
// uncaughtException is fatal — shut down.
|
|
301
330
|
// unhandledRejection is logged but kept non-fatal (availability-first):
|
|
302
331
|
// killing the server for one missed catch would be worse than logging it.
|
|
303
332
|
process.on('uncaughtException', (err) => {
|
|
304
333
|
process.stderr.write(`GitNexus MCP uncaughtException: ${err?.stack || err}\n`);
|
|
305
|
-
shutdown(1);
|
|
334
|
+
void shutdown(1);
|
|
306
335
|
});
|
|
307
336
|
process.on('unhandledRejection', (reason) => {
|
|
308
337
|
process.stderr.write(`GitNexus MCP unhandledRejection: ${reason?.stack || reason}\n`);
|
|
309
338
|
});
|
|
310
|
-
// Handle stdio errors — stdin close means the parent process is gone
|
|
311
|
-
|
|
312
|
-
process.
|
|
313
|
-
process.
|
|
339
|
+
// Handle stdio errors — stdin close means the parent process is gone.
|
|
340
|
+
// Wrap so the event payload (e.g. an Error for 'error') can never reach
|
|
341
|
+
// process.exit() as a non-numeric exit code, and void the returned promise.
|
|
342
|
+
process.stdin.on('end', () => void shutdown(0));
|
|
343
|
+
process.stdin.on('error', () => void shutdown(0));
|
|
344
|
+
process.stdout.on('error', () => void shutdown(0));
|
|
314
345
|
}
|
package/package.json
CHANGED