gitnexus 1.6.6-rc.89 → 1.6.6-rc.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -828,6 +828,14 @@ const analyzeCommandImpl = async (inputPath, options) => {
828
828
  console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
829
829
  console.log(` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`);
830
830
  console.log(` ${repoPath}`);
831
+ // Persistent (non-scrolling) warning when FTS indexing was skipped — the
832
+ // progress-bar log() that fired mid-run has already scrolled away, so the
833
+ // degraded-search state must also appear in the final summary (#1161).
834
+ if (result.ftsSkipped) {
835
+ console.log(`\n Warning: full-text/BM25 search is disabled — the LadybugDB FTS extension was unavailable.\n` +
836
+ ` Install it once with network access (GITNEXUS_LBUG_EXTENSION_INSTALL=auto) then rerun, or\n` +
837
+ ` run \`gitnexus analyze --repair-fts\` when connected. Run \`gitnexus doctor\` for details.`);
838
+ }
831
839
  try {
832
840
  await fs.access(getGlobalRegistryPath());
833
841
  }
@@ -2,6 +2,7 @@ import { getRuntimeCapabilities, getRuntimeFingerprint } from '../core/platform/
2
2
  import { resolveEmbeddingConfig } from '../core/embeddings/config.js';
3
3
  import { isHttpMode } from '../core/embeddings/http-client.js';
4
4
  import { checkLbugNative } from '../core/lbug/native-check.js';
5
+ import { getExtensionInstallPolicy } from '../core/lbug/extension-loader.js';
5
6
  import { t } from './i18n/index.js';
6
7
  function isCombiningMark(codePoint) {
7
8
  return ((codePoint >= 0x0300 && codePoint <= 0x036f) ||
@@ -66,6 +67,16 @@ export const doctorCommand = async () => {
66
67
  console.log(` ${label('doctor.labels.fullTextSearch', 18)}${capabilities.fts}`);
67
68
  console.log(` ${label('doctor.labels.vectorIndex', 18)}${capabilities.vector}`);
68
69
  console.log(` ${label('doctor.labels.semanticMode', 18)}${capabilities.semanticMode}`);
70
+ // Surface the optional-extension install policy so offline users can see
71
+ // whether analyze/query will reach the network (extension.ladybugdb.com).
72
+ // Literal label (like the 'native' line) to avoid adding i18n keys.
73
+ const installPolicy = getExtensionInstallPolicy();
74
+ const policyHint = installPolicy === 'load-only'
75
+ ? ' (offline; load only, no network install)'
76
+ : installPolicy === 'never'
77
+ ? ' (optional extensions disabled)'
78
+ : ' (installs missing extensions over network)';
79
+ console.log(` ${padDisplayEnd('Ext install:', 18)}${installPolicy}${policyHint}`);
69
80
  console.log(` ${label('doctor.labels.exactScanLimit', 18)}${t('doctor.chunks', { count: capabilities.exactScanLimit })}`);
70
81
  if (capabilities.reason)
71
82
  console.log(` ${label('doctor.labels.note', 18)}${capabilities.reason}`);
@@ -9,6 +9,20 @@
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
11
  import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
12
+ import type { ExtensionInstallPolicy } from '../lbug/extension-loader.js';
13
+ /**
14
+ * Resolve the extension-install policy for the embedding WRITE path (analyze).
15
+ *
16
+ * Generating embeddings is an explicit opt-in to a feature that requires the
17
+ * VECTOR extension, so when the operator has NOT pinned a policy we default to
18
+ * `auto` (one bounded, out-of-process INSTALL) — matching the documented
19
+ * "auto = default for analyze" intent in extension-loader.ts. An explicit
20
+ * GITNEXUS_LBUG_EXTENSION_INSTALL=load-only|never|auto always wins, so an
21
+ * offline or locked-down operator is never silently forced onto the network
22
+ * (the #1153 regression caused by hard-coding `auto` here). Read on every call
23
+ * (not memoized) so test env stubbing works.
24
+ */
25
+ export declare const resolveEmbeddingInstallPolicy: () => ExtensionInstallPolicy;
12
26
  /**
13
27
  * Bump this when the embedding text template changes in a way that should
14
28
  * invalidate existing vectors, such as metadata/header shape changes,
@@ -21,13 +21,30 @@ import { loadVectorExtension } from '../lbug/lbug-adapter.js';
21
21
  import { getExactScanLimit } from '../platform/capabilities.js';
22
22
  import { logger } from '../logger.js';
23
23
  const isDev = process.env.NODE_ENV === 'development';
24
- const vectorUnavailableMessage = 'VECTOR extension is unavailable for this LadybugDB runtime; semantic search will use exact scan when embeddings exist.';
24
+ const vectorUnavailableMessage = 'VECTOR extension unavailable; semantic embeddings fall back to exact scan. ' +
25
+ 'To enable vector search, install it once with network access ' +
26
+ '(GITNEXUS_LBUG_EXTENSION_INSTALL=auto), or pre-install it for offline use. ' +
27
+ 'Set GITNEXUS_LBUG_EXTENSION_INSTALL=never to skip installs and silence this.';
28
+ /**
29
+ * Resolve the extension-install policy for the embedding WRITE path (analyze).
30
+ *
31
+ * Generating embeddings is an explicit opt-in to a feature that requires the
32
+ * VECTOR extension, so when the operator has NOT pinned a policy we default to
33
+ * `auto` (one bounded, out-of-process INSTALL) — matching the documented
34
+ * "auto = default for analyze" intent in extension-loader.ts. An explicit
35
+ * GITNEXUS_LBUG_EXTENSION_INSTALL=load-only|never|auto always wins, so an
36
+ * offline or locked-down operator is never silently forced onto the network
37
+ * (the #1153 regression caused by hard-coding `auto` here). Read on every call
38
+ * (not memoized) so test env stubbing works.
39
+ */
40
+ export const resolveEmbeddingInstallPolicy = () => {
41
+ const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
42
+ if (raw === 'load-only' || raw === 'never' || raw === 'auto')
43
+ return raw;
44
+ return 'auto';
45
+ };
25
46
  const ensureVectorExtensionAvailable = async () => {
26
- const vectorReady = await loadVectorExtension();
27
- if (!vectorReady) {
28
- return false;
29
- }
30
- return true;
47
+ return loadVectorExtension(undefined, { policy: resolveEmbeddingInstallPolicy() });
31
48
  };
32
49
  /**
33
50
  * Bump this when the embedding text template changes in a way that should
@@ -176,7 +193,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
176
193
  let totalChunks = 0;
177
194
  try {
178
195
  const vectorAvailable = await ensureVectorExtensionAvailable();
179
- if (!vectorAvailable && isDev) {
196
+ if (!vectorAvailable) {
180
197
  logger.warn(vectorUnavailableMessage);
181
198
  }
182
199
  // Phase 1: Load embedding model
@@ -427,7 +444,11 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
427
444
  const queryVec = embeddingToArray(queryEmbedding);
428
445
  const queryVecStr = `[${queryVec.join(',')}]`;
429
446
  let bestChunks = new Map();
430
- if (await loadVectorExtension()) {
447
+ // Query/read path: NEVER spawn a network INSTALL on a user query. If the
448
+ // VECTOR extension was not pre-installed, fall back to exact scan rather than
449
+ // blocking the query on a download (offline-first; see extension-loader.ts
450
+ // "load-only" — used by all serve/MCP query paths).
451
+ if (await loadVectorExtension(undefined, { policy: 'load-only' })) {
431
452
  try {
432
453
  bestChunks = await collectBestChunks(k, async (fetchLimit) => {
433
454
  const vectorQuery = `
@@ -31,6 +31,7 @@ import { splitImportStatement } from '../typescript/import-decomposer.js';
31
31
  import { getJsParser, getJsScopeQuery, jsCachedTreeMatchesGrammar } from './query.js';
32
32
  import { computeTsArityMetadata } from '../typescript/arity-metadata.js';
33
33
  import { synthesizeTsReceiverBinding } from '../typescript/receiver-binding.js';
34
+ import { isArrayMethodCallbackArrow } from '../typescript/array-callback.js';
34
35
  import { getTreeSitterBufferSize } from '../../constants.js';
35
36
  import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';
36
37
  /** JS function-like node types that may carry a synthesized `this` binding.
@@ -601,6 +602,20 @@ export function emitJsScopeCaptures(sourceText, filePath, cachedTree) {
601
602
  continue;
602
603
  }
603
604
  }
605
+ // #1876: drop @declaration.function for array higher-order-method
606
+ // callbacks (`const x = arr.map(a => …)`). The HOC-wrapped-arrow
607
+ // pattern matches them, but the binding holds a value, not a callable.
608
+ // The binding keeps its separate @declaration.const / .variable match,
609
+ // and the arrow's own @scope.function match (a different pattern) is
610
+ // untouched, so inner-call attribution falls through to the enclosing
611
+ // scope instead of a phantom Function.
612
+ const fnDeclAnchor = grouped['@declaration.function'];
613
+ if (fnDeclAnchor !== undefined) {
614
+ const arrowNode = findFunctionNode(tree.rootNode, fnDeclAnchor.range);
615
+ if (arrowNode !== null && isArrayMethodCallbackArrow(arrowNode)) {
616
+ continue;
617
+ }
618
+ }
604
619
  // Synthesize arity metadata on function-like declarations.
605
620
  const declAnchor = pickFirstDefined(grouped, FUNCTION_DECL_TAGS);
606
621
  if (declAnchor !== undefined) {
@@ -144,6 +144,12 @@ const JAVASCRIPT_SCOPE_QUERY = `
144
144
  ;; HOC-wrapped variable declarations: const X = HOC((args) => { ... }).
145
145
  ;; Covers React.forwardRef, memo, useCallback, useMemo, observer,
146
146
  ;; debounce, and any user-defined HOC factory.
147
+ ;;
148
+ ;; #1876: this shape also matches array higher-order-method callbacks
149
+ ;; (const x = arr.map(a => ...)), where x is a value, not a function.
150
+ ;; Those are filtered out emit-side in captures.ts via
151
+ ;; isArrayMethodCallbackArrow (member-expression callee whose property
152
+ ;; is a known Array method), so only the @declaration.const survives.
147
153
  (lexical_declaration
148
154
  (variable_declarator
149
155
  name: (identifier) @declaration.name
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Array higher-order-method callback detection (issue #1876).
3
+ *
4
+ * The HOC-wrapped-arrow declaration pattern in the JS/TS scope queries
5
+ * (`const X = call((args) => …)`) was added for React idioms
6
+ * (`forwardRef` / `memo` / `useCallback`). It has the same AST shape as
7
+ * an array higher-order-method call (`const x = arr.map(a => …)`), so
8
+ * those callbacks also match and produce a spurious `@declaration.function`
9
+ * named after the binding — duplicating the `@declaration.const` /
10
+ * `@declaration.variable` def that the same binding already gets.
11
+ *
12
+ * For an array-method callback the binding holds a *value* (the method's
13
+ * result), not a callable, so the `Function` def is semantically wrong.
14
+ * `isArrayMethodCallbackArrow` lets the emitter (`captures.ts`) drop that
15
+ * `@declaration.function` match, leaving only the value def.
16
+ *
17
+ * Shared by both the JavaScript and TypeScript capture emitters — the
18
+ * relevant grammar nodes (`arrow_function`, `function_expression`,
19
+ * `arguments`, `call_expression`, `member_expression`,
20
+ * `property_identifier`) are identical across `tree-sitter-javascript`
21
+ * and `tree-sitter-typescript`.
22
+ *
23
+ * Pure given the input node. No I/O, no globals.
24
+ */
25
+ import type { SyntaxNode } from '../../utils/ast-helpers.js';
26
+ /**
27
+ * Array prototype higher-order methods whose result is a value, not a
28
+ * function. A callback passed to one of these is an anonymous callback,
29
+ * never a top-level function definition. Identifier-callee HOCs
30
+ * (`forwardRef(...)`, `useCallback(...)`, custom factories) are
31
+ * deliberately NOT listed — they keep their `Function` classification.
32
+ *
33
+ * Trade-off (unchanged from before #1876): a custom *fluent-API* member
34
+ * call with a callback whose method name is not in this set
35
+ * (`qb.where(x => …)`) still classifies as `Function`. There is no clean
36
+ * syntactic line beyond the well-known Array surface, so the set is
37
+ * intentionally closed and easy to extend.
38
+ *
39
+ * Receiver-blind, by design: the match keys on the method NAME only, never
40
+ * the receiver type (tree-sitter has no type information here). So an in-set
41
+ * name on a NON-array receiver — `Map`/`Set` `.forEach`, an RxJS
42
+ * `observable.map(…)`, a query builder `.sort(…)`, a lodash chain
43
+ * `.filter(…)` — is ALSO treated as a callback and has its
44
+ * `@declaration.function` dropped. This is an accepted limitation, not a
45
+ * regression: those bindings hold the call's *result value*, not a callable,
46
+ * so a value def is the correct classification anyway. The only genuine loss
47
+ * is a bespoke DSL whose in-set-named method returns something callable —
48
+ * rare enough to accept rather than guard with type inference. Pinned by the
49
+ * "in-set method on a non-array receiver" case in `*-captures.test.ts`.
50
+ */
51
+ export declare const ARRAY_CALLBACK_METHODS: ReadonlySet<string>;
52
+ /**
53
+ * True when `node` (an `arrow_function` / `function_expression`) is the
54
+ * callback argument of an array higher-order-method call, i.e. the
55
+ * enclosing call's callee is a `member_expression` whose property is one
56
+ * of {@link ARRAY_CALLBACK_METHODS}.
57
+ *
58
+ * Returns false for direct assignments (`const fn = () => {}` — parent is
59
+ * `variable_declarator`, not `arguments`) and for identifier-callee HOCs
60
+ * (`forwardRef(() => …)` — callee is an `identifier`, not a
61
+ * `member_expression`), so neither is ever suppressed.
62
+ *
63
+ * Intentional non-suppressing gaps (preserve current behavior, no
64
+ * regression): parenthesized callee `(arr.map)(cb)` (`parenthesized_expression`)
65
+ * and computed callee `arr['map'](cb)` (`subscript_expression`).
66
+ */
67
+ export declare function isArrayMethodCallbackArrow(node: SyntaxNode): boolean;
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Array higher-order-method callback detection (issue #1876).
3
+ *
4
+ * The HOC-wrapped-arrow declaration pattern in the JS/TS scope queries
5
+ * (`const X = call((args) => …)`) was added for React idioms
6
+ * (`forwardRef` / `memo` / `useCallback`). It has the same AST shape as
7
+ * an array higher-order-method call (`const x = arr.map(a => …)`), so
8
+ * those callbacks also match and produce a spurious `@declaration.function`
9
+ * named after the binding — duplicating the `@declaration.const` /
10
+ * `@declaration.variable` def that the same binding already gets.
11
+ *
12
+ * For an array-method callback the binding holds a *value* (the method's
13
+ * result), not a callable, so the `Function` def is semantically wrong.
14
+ * `isArrayMethodCallbackArrow` lets the emitter (`captures.ts`) drop that
15
+ * `@declaration.function` match, leaving only the value def.
16
+ *
17
+ * Shared by both the JavaScript and TypeScript capture emitters — the
18
+ * relevant grammar nodes (`arrow_function`, `function_expression`,
19
+ * `arguments`, `call_expression`, `member_expression`,
20
+ * `property_identifier`) are identical across `tree-sitter-javascript`
21
+ * and `tree-sitter-typescript`.
22
+ *
23
+ * Pure given the input node. No I/O, no globals.
24
+ */
25
+ /**
26
+ * Array prototype higher-order methods whose result is a value, not a
27
+ * function. A callback passed to one of these is an anonymous callback,
28
+ * never a top-level function definition. Identifier-callee HOCs
29
+ * (`forwardRef(...)`, `useCallback(...)`, custom factories) are
30
+ * deliberately NOT listed — they keep their `Function` classification.
31
+ *
32
+ * Trade-off (unchanged from before #1876): a custom *fluent-API* member
33
+ * call with a callback whose method name is not in this set
34
+ * (`qb.where(x => …)`) still classifies as `Function`. There is no clean
35
+ * syntactic line beyond the well-known Array surface, so the set is
36
+ * intentionally closed and easy to extend.
37
+ *
38
+ * Receiver-blind, by design: the match keys on the method NAME only, never
39
+ * the receiver type (tree-sitter has no type information here). So an in-set
40
+ * name on a NON-array receiver — `Map`/`Set` `.forEach`, an RxJS
41
+ * `observable.map(…)`, a query builder `.sort(…)`, a lodash chain
42
+ * `.filter(…)` — is ALSO treated as a callback and has its
43
+ * `@declaration.function` dropped. This is an accepted limitation, not a
44
+ * regression: those bindings hold the call's *result value*, not a callable,
45
+ * so a value def is the correct classification anyway. The only genuine loss
46
+ * is a bespoke DSL whose in-set-named method returns something callable —
47
+ * rare enough to accept rather than guard with type inference. Pinned by the
48
+ * "in-set method on a non-array receiver" case in `*-captures.test.ts`.
49
+ */
50
+ export const ARRAY_CALLBACK_METHODS = new Set([
51
+ 'map',
52
+ 'filter',
53
+ 'find',
54
+ 'findIndex',
55
+ 'findLast',
56
+ 'findLastIndex',
57
+ 'forEach',
58
+ 'reduce',
59
+ 'reduceRight',
60
+ 'some',
61
+ 'every',
62
+ 'flatMap',
63
+ 'sort',
64
+ ]);
65
+ /**
66
+ * True when `node` (an `arrow_function` / `function_expression`) is the
67
+ * callback argument of an array higher-order-method call, i.e. the
68
+ * enclosing call's callee is a `member_expression` whose property is one
69
+ * of {@link ARRAY_CALLBACK_METHODS}.
70
+ *
71
+ * Returns false for direct assignments (`const fn = () => {}` — parent is
72
+ * `variable_declarator`, not `arguments`) and for identifier-callee HOCs
73
+ * (`forwardRef(() => …)` — callee is an `identifier`, not a
74
+ * `member_expression`), so neither is ever suppressed.
75
+ *
76
+ * Intentional non-suppressing gaps (preserve current behavior, no
77
+ * regression): parenthesized callee `(arr.map)(cb)` (`parenthesized_expression`)
78
+ * and computed callee `arr['map'](cb)` (`subscript_expression`).
79
+ */
80
+ export function isArrayMethodCallbackArrow(node) {
81
+ const args = node.parent;
82
+ if (args === null || args.type !== 'arguments')
83
+ return false;
84
+ const call = args.parent;
85
+ if (call === null || call.type !== 'call_expression')
86
+ return false;
87
+ const callee = call.childForFieldName('function');
88
+ if (callee === null || callee.type !== 'member_expression')
89
+ return false;
90
+ const property = callee.childForFieldName('property');
91
+ if (property === null || property.type !== 'property_identifier')
92
+ return false;
93
+ return ARRAY_CALLBACK_METHODS.has(property.text);
94
+ }
@@ -30,6 +30,7 @@ import { getTsParser, getTsScopeQuery, tsCachedTreeMatchesGrammar } from './quer
30
30
  import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
31
31
  import { synthesizeTsReceiverBinding } from './receiver-binding.js';
32
32
  import { computeTsArityMetadata } from './arity-metadata.js';
33
+ import { isArrayMethodCallbackArrow } from './array-callback.js';
33
34
  import { getTreeSitterBufferSize } from '../../constants.js';
34
35
  import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';
35
36
  /** tree-sitter-typescript node types for function-like scopes that may
@@ -228,6 +229,20 @@ export function emitTsScopeCaptures(sourceText, filePath, cachedTree) {
228
229
  continue;
229
230
  }
230
231
  }
232
+ // #1876: drop @declaration.function for array higher-order-method
233
+ // callbacks (`const x = arr.map(a => …)`). The HOC-wrapped-arrow
234
+ // pattern matches them, but the binding holds a value, not a callable.
235
+ // The binding keeps its separate @declaration.const / .variable match,
236
+ // and the arrow's own @scope.function match (a different pattern) is
237
+ // untouched, so inner-call attribution falls through to the enclosing
238
+ // scope instead of a phantom Function.
239
+ const fnDeclAnchor = grouped['@declaration.function'];
240
+ if (fnDeclAnchor !== undefined) {
241
+ const arrowNode = findFunctionNode(tree.rootNode, fnDeclAnchor.range, groupedNodes['@declaration.function']);
242
+ if (arrowNode !== null && isArrayMethodCallbackArrow(arrowNode)) {
243
+ continue;
244
+ }
245
+ }
231
246
  // Synthesize arity metadata on function-like declaration anchors
232
247
  // before pushing the match. The registry uses these to narrow
233
248
  // overloads — TypeScript supports overload signatures via
@@ -246,20 +246,22 @@ const TYPESCRIPT_SCOPE_QUERY = `
246
246
  ;; that promotes the binding to the parent scope (where \`const X\`
247
247
  ;; lives).
248
248
  ;;
249
- ;; Trade-off — chained array-method form: \`const x = arr.find((y) => p(y))\`
250
- ;; has the same syntactic shape and would also match, naming the
251
- ;; \`.find\` callback as \`x\`. The resulting \`Function:x\` is mostly
252
- ;; harmless: \`x\` is consumed as a value (\`if (x) { ... }\`), never
253
- ;; invoked as a function, so it gets zero incoming \`CALLS\` edges. The
254
- ;; one outgoing edge \`Function:x p\` is a minor mis-attribution that
255
- ;; could in principle be fixed by adding a \`function: [(identifier)
256
- ;; (member_expression)]\` predicate that excludes property-identifiers
257
- ;; matching a known array-method blocklist (\`map\` / \`filter\` / \`find\`
258
- ;; / \`reduce\` / \`forEach\` / \`some\` / \`every\`). We don't do that here
259
- ;; because (a) the false-positive cost is negligible, (b) the blocklist
260
- ;; would need maintenance, and (c) any user-defined fluent-API method
261
- ;; with a callback argument would still false-positive there's no
262
- ;; clean syntactic line.
249
+ ;; #1876 — chained array-method form: \`const x = arr.find((y) => p(y))\`
250
+ ;; has the same syntactic shape and matches here too, naming the
251
+ ;; \`.find\` callback as \`x\`. Because \`x\` holds a value (the method
252
+ ;; result), not a callable, the spurious \`Function:x\` def is dropped
253
+ ;; emit-side in captures.ts: \`isArrayMethodCallbackArrow\` skips any
254
+ ;; \`@declaration.function\` whose enclosing call has a member-expression
255
+ ;; callee with a known Array-method property (\`ARRAY_CALLBACK_METHODS\`:
256
+ ;; \`map\` / \`filter\` / \`find\` / \`reduce\` / \`forEach\` / \`some\` /
257
+ ;; \`every\` / …). Only the \`@declaration.variable\` survives, so the
258
+ ;; binding is a single value def and calls inside the callback attribute
259
+ ;; to the enclosing scope rather than \`Function:x\`.
260
+ ;;
261
+ ;; Residual (intentional): a user-defined fluent-API method with a
262
+ ;; callback (\`qb.where(x => …)\`) is NOT in the blocklist and still
263
+ ;; classifies as \`Function\` — there's no clean syntactic line beyond
264
+ ;; the well-known Array surface, so the set is closed and easy to extend.
263
265
  ;;
264
266
  ;; Trade-off — multi-arrow arguments: \`const x = call(arrow1, arrow2)\`
265
267
  ;; would emit TWO matches with the same name \`x\`. tree-sitter-query
@@ -58,7 +58,7 @@
58
58
  * - `ParsedFile.localDefs` — flattened union of `Scope.ownedDefs`.
59
59
  * - `ParsedFile.referenceSites` — pre-resolution usage facts.
60
60
  */
61
- import type { CaptureMatch, ParsedFile } from '../../_shared/index.js';
61
+ import type { CaptureMatch, ParsedFile, SymbolDefinition } from '../../_shared/index.js';
62
62
  import type { LanguageProvider } from './language-provider.js';
63
63
  /**
64
64
  * The subset of `LanguageProvider` hooks that `extract()` reads. Declared
@@ -86,3 +86,35 @@ export type ScopeExtractorHooks = Pick<LanguageProvider, 'resolveScopeKind' | 'b
86
86
  * templates with mixed PHP/HTML/JS).
87
87
  */
88
88
  export declare function extract(matches: readonly CaptureMatch[], filePath: string, provider: ScopeExtractorHooks): ParsedFile;
89
+ /**
90
+ * Collapse rule for the deferred node-creation migration (#1876).
91
+ *
92
+ * When graph-node creation moves from the legacy DAG onto the
93
+ * registry-primary path, a single source binding can carry more than one
94
+ * `SymbolDefinition` for the same name in the same scope — e.g. a direct
95
+ * arrow `const fn = () => {}` is classified BOTH as a `Function` (the
96
+ * arrow) and a `Variable` (the binding). Emitting one graph node per def
97
+ * would reproduce exactly the duplicate-node bug this issue tracks.
98
+ *
99
+ * `selectNodeBearingDef` picks the ONE def that should bear the graph node
100
+ * for such a binding group:
101
+ *
102
+ * 1. a function-like def (`Function` / `Method` / `Constructor`) if any —
103
+ * the binding is callable and must keep incoming `CALLS` edges;
104
+ * 2. otherwise a value def (`Const` / `Variable`) — the binding holds a
105
+ * value (e.g. an array-method result after the U1/U2 narrowing);
106
+ * 3. otherwise the first def — deterministic fallback for label sets this
107
+ * rule does not rank.
108
+ *
109
+ * INPUT CONTRACT: `group` must be the defs bound to ONE name within ONE
110
+ * scope (a binding group). It deliberately does NOT dedup by range —
111
+ * `SymbolDefinition` carries no range and `makeDefId` encodes only the
112
+ * start position, so containment is uncomputable here; the caller forms the
113
+ * group (e.g. from a scope's `ownedDefs` keyed by name) before calling.
114
+ *
115
+ * Pure. No production call site yet — this dead export is intentional and
116
+ * tracked by #1876 (the deferred node-creation migration); it is the
117
+ * executable contract that follow-up will consume, pinned today by the
118
+ * scope-extractor unit test.
119
+ */
120
+ export declare function selectNodeBearingDef(group: readonly SymbolDefinition[]): SymbolDefinition | undefined;
@@ -569,6 +569,59 @@ function normalizeNodeLabel(kindStr) {
569
569
  return undefined;
570
570
  }
571
571
  }
572
+ /** Function-like labels: callable defs that must keep incoming CALLS edges. */
573
+ const NODE_BEARING_FUNCTION_LABELS = new Set([
574
+ 'Function',
575
+ 'Method',
576
+ 'Constructor',
577
+ ]);
578
+ /** Value labels: non-callable bindings (a `const`/`let`/`var` holds a value). */
579
+ const NODE_BEARING_VALUE_LABELS = new Set([
580
+ 'Const',
581
+ 'Variable',
582
+ ]);
583
+ /**
584
+ * Collapse rule for the deferred node-creation migration (#1876).
585
+ *
586
+ * When graph-node creation moves from the legacy DAG onto the
587
+ * registry-primary path, a single source binding can carry more than one
588
+ * `SymbolDefinition` for the same name in the same scope — e.g. a direct
589
+ * arrow `const fn = () => {}` is classified BOTH as a `Function` (the
590
+ * arrow) and a `Variable` (the binding). Emitting one graph node per def
591
+ * would reproduce exactly the duplicate-node bug this issue tracks.
592
+ *
593
+ * `selectNodeBearingDef` picks the ONE def that should bear the graph node
594
+ * for such a binding group:
595
+ *
596
+ * 1. a function-like def (`Function` / `Method` / `Constructor`) if any —
597
+ * the binding is callable and must keep incoming `CALLS` edges;
598
+ * 2. otherwise a value def (`Const` / `Variable`) — the binding holds a
599
+ * value (e.g. an array-method result after the U1/U2 narrowing);
600
+ * 3. otherwise the first def — deterministic fallback for label sets this
601
+ * rule does not rank.
602
+ *
603
+ * INPUT CONTRACT: `group` must be the defs bound to ONE name within ONE
604
+ * scope (a binding group). It deliberately does NOT dedup by range —
605
+ * `SymbolDefinition` carries no range and `makeDefId` encodes only the
606
+ * start position, so containment is uncomputable here; the caller forms the
607
+ * group (e.g. from a scope's `ownedDefs` keyed by name) before calling.
608
+ *
609
+ * Pure. No production call site yet — this dead export is intentional and
610
+ * tracked by #1876 (the deferred node-creation migration); it is the
611
+ * executable contract that follow-up will consume, pinned today by the
612
+ * scope-extractor unit test.
613
+ */
614
+ export function selectNodeBearingDef(group) {
615
+ if (group.length === 0)
616
+ return undefined;
617
+ const functionLike = group.find((def) => NODE_BEARING_FUNCTION_LABELS.has(def.type));
618
+ if (functionLike !== undefined)
619
+ return functionLike;
620
+ const value = group.find((def) => NODE_BEARING_VALUE_LABELS.has(def.type));
621
+ if (value !== undefined)
622
+ return value;
623
+ return group[0];
624
+ }
572
625
  function makeDefId(filePath, range, type, name) {
573
626
  return `def:${filePath}#${range.startLine}:${range.startCol}:${type}:${name}`;
574
627
  }
@@ -32,6 +32,22 @@ export interface ExtensionManagerOptions {
32
32
  installExtension?: (extensionName: string, timeoutMs: number) => Promise<ExtensionInstallResult>;
33
33
  warn?: (message: string) => void;
34
34
  }
35
+ export declare const getExtensionInstallPolicy: () => ExtensionInstallPolicy;
36
+ /**
37
+ * Install policy for the **analyze (write) path**.
38
+ *
39
+ * The global default (`resolvePolicyFromEnv`) is `load-only` so serve/query
40
+ * read paths never require outbound network access (PR #1161, offline-first).
41
+ * The analyze path is different: it owns building the search indexes, so it
42
+ * defaults to `auto` — LOAD the extension if present, otherwise attempt one
43
+ * bounded out-of-process INSTALL. This keeps FTS symmetric with the
44
+ * VECTOR/embeddings path (which already defaults to `auto`) and matches the
45
+ * #726 contract. An explicit `GITNEXUS_LBUG_EXTENSION_INSTALL` value still
46
+ * wins, so operators can force `load-only`/`never` for fully offline analyze;
47
+ * `auto` LOADs-first, so offline machines still degrade gracefully when the
48
+ * INSTALL cannot reach the network.
49
+ */
50
+ export declare const resolveAnalyzeInstallPolicy: () => ExtensionInstallPolicy;
35
51
  export declare const getExtensionInstallTimeoutMs: () => number;
36
52
  export declare const getExtensionInstallChildProcessArgs: (extensionName: string, maxDbSize?: number) => string[];
37
53
  /**
@@ -54,7 +70,7 @@ export declare const installDuckDbExtensionOutOfProcess: (extensionName: string,
54
70
  * subsequent analyze or query calls.
55
71
  *
56
72
  * Policy precedence (most specific wins):
57
- * per-call `opts.policy` → constructor `options.policy` → env → `auto`
73
+ * per-call `opts.policy` → constructor `options.policy` → env → `load-only`
58
74
  */
59
75
  export declare class ExtensionManager {
60
76
  private readonly options;
@@ -8,6 +8,27 @@ const alreadyAvailable = (message) => message.includes('already loaded') ||
8
8
  message.includes('already installed') ||
9
9
  message.includes('already exists');
10
10
  const resolvePolicyFromEnv = () => {
11
+ const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
12
+ if (raw === 'load-only' || raw === 'never' || raw === 'auto')
13
+ return raw;
14
+ return 'load-only';
15
+ };
16
+ export const getExtensionInstallPolicy = () => resolvePolicyFromEnv();
17
+ /**
18
+ * Install policy for the **analyze (write) path**.
19
+ *
20
+ * The global default (`resolvePolicyFromEnv`) is `load-only` so serve/query
21
+ * read paths never require outbound network access (PR #1161, offline-first).
22
+ * The analyze path is different: it owns building the search indexes, so it
23
+ * defaults to `auto` — LOAD the extension if present, otherwise attempt one
24
+ * bounded out-of-process INSTALL. This keeps FTS symmetric with the
25
+ * VECTOR/embeddings path (which already defaults to `auto`) and matches the
26
+ * #726 contract. An explicit `GITNEXUS_LBUG_EXTENSION_INSTALL` value still
27
+ * wins, so operators can force `load-only`/`never` for fully offline analyze;
28
+ * `auto` LOADs-first, so offline machines still degrade gracefully when the
29
+ * INSTALL cannot reach the network.
30
+ */
31
+ export const resolveAnalyzeInstallPolicy = () => {
11
32
  const raw = process.env.GITNEXUS_LBUG_EXTENSION_INSTALL;
12
33
  if (raw === 'load-only' || raw === 'never' || raw === 'auto')
13
34
  return raw;
@@ -93,7 +114,7 @@ export const installDuckDbExtensionOutOfProcess = async (extensionName, timeoutM
93
114
  * subsequent analyze or query calls.
94
115
  *
95
116
  * Policy precedence (most specific wins):
96
- * per-call `opts.policy` → constructor `options.policy` → env → `auto`
117
+ * per-call `opts.policy` → constructor `options.policy` → env → `load-only`
97
118
  */
98
119
  export class ExtensionManager {
99
120
  options;
@@ -88,6 +88,13 @@ export interface AnalyzeResult {
88
88
  pipelineResult?: any;
89
89
  /** True when analyze only repaired FTS indexes and skipped pipeline re-analysis. */
90
90
  ftsRepairedOnly?: boolean;
91
+ /**
92
+ * True when the FTS extension was unavailable so search-index creation was
93
+ * skipped (offline-first degradation). The graph is fully queryable; only
94
+ * full-text/BM25 search is disabled. Lets callers (CLI summary, server) and
95
+ * the persisted meta surface the degraded state instead of reporting healthy.
96
+ */
97
+ ftsSkipped?: boolean;
91
98
  }
92
99
  export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
93
100
  export type { EmbeddingMode } from './embedding-mode.js';
@@ -12,8 +12,9 @@ import path from 'path';
12
12
  import fs from 'fs/promises';
13
13
  import { execFileSync } from 'child_process';
14
14
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
15
- import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, deleteNodesForFile, deleteAllCommunitiesAndProcesses, queryImporters, } from './lbug/lbug-adapter.js';
15
+ import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, deleteNodesForFile, deleteAllCommunitiesAndProcesses, queryImporters, loadFTSExtension, } from './lbug/lbug-adapter.js';
16
16
  import { createSearchFTSIndexes, verifySearchFTSIndexes } from './search/fts-indexes.js';
17
+ import { resolveAnalyzeInstallPolicy } from './lbug/extension-loader.js';
17
18
  import { startWalCheckpointDriver, } from './lbug/wal-checkpoint-driver.js';
18
19
  import { getStoragePaths, saveMeta, loadMeta, ensureGitNexusIgnored, registerRepo, cleanupOldKuzuFiles, INCREMENTAL_SCHEMA_VERSION, } from '../storage/repo-manager.js';
19
20
  import { computeFileHashes, diffFileHashes } from '../storage/file-hash.js';
@@ -24,6 +25,15 @@ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName, resolve
24
25
  import { generateAIContextFiles } from '../cli/ai-context.js';
25
26
  import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
26
27
  import { STALE_HASH_SENTINEL } from './lbug/schema.js';
28
+ /**
29
+ * Logged when the optional FTS extension cannot be loaded or installed during
30
+ * a full analyze. Kept as a named constant so the env-var/command guidance
31
+ * stays in one place (mirrors the VECTOR message in embedding-pipeline.ts).
32
+ */
33
+ const FTS_UNAVAILABLE_MESSAGE = 'FTS extension unavailable; skipping search-index creation. ' +
34
+ 'Full-text/BM25 search will be disabled until the LadybugDB FTS extension is ' +
35
+ 'installed once with network access (GITNEXUS_LBUG_EXTENSION_INSTALL=auto) or ' +
36
+ 'pre-installed for offline use. Run `gitnexus doctor` for details.';
27
37
  // Re-export the pure flag-derivation helper so external callers (and tests)
28
38
  // keep importing from this module's stable surface.
29
39
  export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
@@ -495,21 +505,40 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
495
505
  });
496
506
  }
497
507
  // ── Phase 3: FTS (85–90%) ─────────────────────────────────────────
508
+ // The analyze (write) path owns building the search indexes, so it uses
509
+ // the `auto` install policy (LOAD-first, then one bounded INSTALL) —
510
+ // symmetric with the VECTOR/embeddings path below and consistent with the
511
+ // #726 contract. The global `load-only` default (PR #1161) governs the
512
+ // serve/query read paths, not this one. When the extension still cannot be
513
+ // loaded (genuinely offline + not pre-installed, or policy forced to
514
+ // load-only/never), degrade gracefully — exactly like the VECTOR path — so
515
+ // analyze still produces a fully queryable graph; only full-text/BM25
516
+ // search falls back. `--repair-fts` (whose sole job is FTS) still fails
517
+ // loudly on its own path above.
498
518
  progress('fts', 85, 'Creating search indexes...');
499
- await createSearchFTSIndexes({
500
- onIndexStart: options.verbose
501
- ? (table, indexName) => log(`FTS: creating ${table}.${indexName}`)
502
- : undefined,
503
- onIndexReady: options.verbose
504
- ? (table, indexName) => log(`FTS: ready ${table}.${indexName}`)
505
- : undefined,
519
+ const ftsAvailable = await loadFTSExtension(undefined, {
520
+ policy: resolveAnalyzeInstallPolicy(),
506
521
  });
507
- const missingIndexNames = await verifySearchFTSIndexes(executeQuery);
508
- if (missingIndexNames.length > 0) {
509
- throw new Error(`FTS verification failed - missing indexes after analyze: ${missingIndexNames.join(', ')}. ` +
510
- 'Check FTS extension availability, then retry `gitnexus analyze --force` for a full rebuild.');
522
+ if (ftsAvailable) {
523
+ await createSearchFTSIndexes({
524
+ onIndexStart: options.verbose
525
+ ? (table, indexName) => log(`FTS: creating ${table}.${indexName}`)
526
+ : undefined,
527
+ onIndexReady: options.verbose
528
+ ? (table, indexName) => log(`FTS: ready ${table}.${indexName}`)
529
+ : undefined,
530
+ });
531
+ const missingIndexNames = await verifySearchFTSIndexes(executeQuery);
532
+ if (missingIndexNames.length > 0) {
533
+ throw new Error(`FTS verification failed - missing indexes after analyze: ${missingIndexNames.join(', ')}. ` +
534
+ 'Check FTS extension availability, then retry `gitnexus analyze --force` for a full rebuild.');
535
+ }
536
+ progress('fts', 90, 'Search indexes ready');
537
+ }
538
+ else {
539
+ log(FTS_UNAVAILABLE_MESSAGE);
540
+ progress('fts', 90, 'Search indexes skipped (FTS unavailable)');
511
541
  }
512
- progress('fts', 90, 'Search indexes ready');
513
542
  // ── Phase 3.5: Re-insert cached embeddings ────────────────────────
514
543
  // Runs on BOTH the full-rebuild path and the incremental path:
515
544
  // - Full rebuild: DB was wiped, every cached row needs to come back.
@@ -661,7 +690,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
661
690
  },
662
691
  capabilities: {
663
692
  graph: { provider: 'ladybugdb', status: runtimeCapabilities.graph },
664
- fts: { provider: 'ladybugdb-fts', status: runtimeCapabilities.fts },
693
+ // Reflect what this analyze run actually produced: when the FTS
694
+ // extension was unavailable the indexes were skipped, so record
695
+ // 'unavailable' rather than the static runtime default. Keeps
696
+ // meta.json / `gitnexus doctor` honest about degraded search.
697
+ fts: {
698
+ provider: 'ladybugdb-fts',
699
+ status: ftsAvailable ? runtimeCapabilities.fts : 'unavailable',
700
+ },
665
701
  vectorSearch: {
666
702
  provider: effectiveSemanticMode === 'vector-index' ? 'ladybugdb-vector' : 'exact-scan',
667
703
  status: embeddingCount > 0 ? effectiveSemanticMode : 'unavailable',
@@ -748,6 +784,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
748
784
  repoPath,
749
785
  stats: meta.stats,
750
786
  pipelineResult,
787
+ ftsSkipped: !ftsAvailable,
751
788
  };
752
789
  }
753
790
  catch (err) {
@@ -17,7 +17,22 @@ import type { LocalBackend } from './local/local-backend.js';
17
17
  * Transport-agnostic — caller connects the desired transport.
18
18
  */
19
19
  export declare function createMCPServer(backend: LocalBackend): Server;
20
+ /** Conventional 128 + signal-number exit codes for graceful termination. */
21
+ export declare const SHUTDOWN_EXIT_CODES: {
22
+ readonly SIGINT: 130;
23
+ readonly SIGTERM: 143;
24
+ };
25
+ type SignalRegistrar = (event: 'SIGINT' | 'SIGTERM', listener: (...args: unknown[]) => void) => void;
20
26
  /**
21
- * Start the MCP server on stdio transport (for CLI use).
27
+ * Wire SIGINT/SIGTERM to a graceful shutdown using NUMERIC exit codes.
28
+ *
29
+ * Node invokes signal listeners with the signal NAME string as the first
30
+ * argument, so registering an `(exitCode = 0) => process.exit(exitCode)`
31
+ * shutdown directly passes `'SIGTERM'` into `process.exit()` and crashes with
32
+ * `ERR_INVALID_ARG_TYPE` (#1132). These wrappers discard the signal argument
33
+ * and pass the conventional 128+signal code instead. `on` is injectable so the
34
+ * mapping can be unit-tested without touching the real process.
22
35
  */
36
+ export declare function installSignalShutdown(shutdown: (exitCode?: number) => unknown, on?: SignalRegistrar): void;
23
37
  export declare function startMCPServer(backend: LocalBackend): Promise<void>;
38
+ export {};
@@ -245,6 +245,26 @@ Follow these steps:
245
245
  /**
246
246
  * Start the MCP server on stdio transport (for CLI use).
247
247
  */
248
+ /** Force-exit fallback budget if graceful shutdown cleanup hangs. */
249
+ const SHUTDOWN_FORCE_EXIT_MS = 5_000;
250
+ /** Conventional 128 + signal-number exit codes for graceful termination. */
251
+ export const SHUTDOWN_EXIT_CODES = { SIGINT: 130, SIGTERM: 143 };
252
+ /**
253
+ * Wire SIGINT/SIGTERM to a graceful shutdown using NUMERIC exit codes.
254
+ *
255
+ * Node invokes signal listeners with the signal NAME string as the first
256
+ * argument, so registering an `(exitCode = 0) => process.exit(exitCode)`
257
+ * shutdown directly passes `'SIGTERM'` into `process.exit()` and crashes with
258
+ * `ERR_INVALID_ARG_TYPE` (#1132). These wrappers discard the signal argument
259
+ * and pass the conventional 128+signal code instead. `on` is injectable so the
260
+ * mapping can be unit-tested without touching the real process.
261
+ */
262
+ export function installSignalShutdown(shutdown, on = (event, listener) => {
263
+ process.on(event, listener);
264
+ }) {
265
+ on('SIGINT', () => void shutdown(SHUTDOWN_EXIT_CODES.SIGINT));
266
+ on('SIGTERM', () => void shutdown(SHUTDOWN_EXIT_CODES.SIGTERM));
267
+ }
248
268
  export async function startMCPServer(backend) {
249
269
  const server = createMCPServer(backend);
250
270
  // Idempotent global sentinel install. cli/mcp.ts calls this first thing
@@ -281,6 +301,11 @@ export async function startMCPServer(backend) {
281
301
  if (shuttingDown)
282
302
  return;
283
303
  shuttingDown = true;
304
+ // Safety net: if backend.disconnect()/server.close() hangs, still exit so a
305
+ // SIGINT/SIGTERM reliably terminates the process. Unref'd so the timer alone
306
+ // never keeps the event loop alive.
307
+ const forceExit = setTimeout(() => process.exit(exitCode), SHUTDOWN_FORCE_EXIT_MS);
308
+ forceExit.unref();
284
309
  try {
285
310
  await backend.disconnect();
286
311
  }
@@ -291,24 +316,30 @@ export async function startMCPServer(backend) {
291
316
  catch { }
292
317
  const { flushLoggerSync } = await import('../core/logger.js');
293
318
  flushLoggerSync();
319
+ clearTimeout(forceExit);
294
320
  process.exit(exitCode);
295
321
  };
296
- // Handle graceful shutdown
297
- process.on('SIGINT', shutdown);
298
- process.on('SIGTERM', shutdown);
322
+ // Handle graceful shutdown. Node invokes signal listeners with the signal
323
+ // NAME (e.g. 'SIGTERM') as the first argument; registering `shutdown`
324
+ // directly passed that string to process.exit() and crashed with
325
+ // ERR_INVALID_ARG_TYPE (#1132). Map each signal to its conventional
326
+ // 128+signal exit code instead.
327
+ installSignalShutdown(shutdown);
299
328
  // Log crashes to stderr so they aren't silently lost.
300
329
  // uncaughtException is fatal — shut down.
301
330
  // unhandledRejection is logged but kept non-fatal (availability-first):
302
331
  // killing the server for one missed catch would be worse than logging it.
303
332
  process.on('uncaughtException', (err) => {
304
333
  process.stderr.write(`GitNexus MCP uncaughtException: ${err?.stack || err}\n`);
305
- shutdown(1);
334
+ void shutdown(1);
306
335
  });
307
336
  process.on('unhandledRejection', (reason) => {
308
337
  process.stderr.write(`GitNexus MCP unhandledRejection: ${reason?.stack || reason}\n`);
309
338
  });
310
- // Handle stdio errors — stdin close means the parent process is gone
311
- process.stdin.on('end', shutdown);
312
- process.stdin.on('error', () => shutdown());
313
- process.stdout.on('error', () => shutdown());
339
+ // Handle stdio errors — stdin close means the parent process is gone.
340
+ // Wrap so the event payload (e.g. an Error for 'error') can never reach
341
+ // process.exit() as a non-numeric exit code, and void the returned promise.
342
+ process.stdin.on('end', () => void shutdown(0));
343
+ process.stdin.on('error', () => void shutdown(0));
344
+ process.stdout.on('error', () => void shutdown(0));
314
345
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.6-rc.89",
3
+ "version": "1.6.6-rc.90",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",