gitnexus 1.6.8-rc.46 → 1.6.8-rc.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/dist/_shared/scope-resolution/symbol-definition.d.ts +2 -0
  2. package/dist/_shared/scope-resolution/symbol-definition.d.ts.map +1 -1
  3. package/dist/core/ingestion/languages/cpp/arity-metadata.js +35 -0
  4. package/dist/core/ingestion/languages/cpp/captures.js +24 -0
  5. package/dist/core/ingestion/languages/cpp/conversion-rank.d.ts +2 -0
  6. package/dist/core/ingestion/languages/cpp/conversion-rank.js +89 -0
  7. package/dist/core/ingestion/languages/cpp/inline-namespaces.js +7 -2
  8. package/dist/core/ingestion/languages/cpp/member-lookup.js +2 -1
  9. package/dist/core/ingestion/languages/cpp/scope-resolver.js +2 -1
  10. package/dist/core/ingestion/scope-extractor.js +9 -2
  11. package/dist/core/ingestion/scope-resolution/contract/scope-resolver.d.ts +9 -0
  12. package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.d.ts +3 -1
  13. package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.js +9 -2
  14. package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.d.ts +4 -0
  15. package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.js +9 -0
  16. package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.d.ts +1 -1
  17. package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.js +4 -0
  18. package/dist/core/ingestion/scope-resolution/pipeline/run.js +1 -0
  19. package/dist/core/ingestion/type-extractors/c-cpp.js +21 -0
  20. package/dist/core/lbug/csv-generator.d.ts +18 -1
  21. package/dist/core/lbug/csv-generator.js +60 -25
  22. package/dist/core/lbug/lbug-adapter.d.ts +15 -0
  23. package/dist/core/lbug/lbug-adapter.js +162 -57
  24. package/package.json +1 -1
@@ -18,6 +18,8 @@ export interface ParameterTypeClass {
18
18
  indirection: 'value' | 'lvalue-ref' | 'rvalue-ref' | 'pointer' | 'unknown';
19
19
  /** Number of pointer markers when indirection is `pointer`; otherwise 0. */
20
20
  pointerDepth: number;
21
+ /** Normalized top-level template arguments, when a language preserves them. */
22
+ templateArguments?: string[];
21
23
  }
22
24
  export interface SymbolDefinition {
23
25
  nodeId: string;
@@ -1 +1 @@
1
- {"version":3,"file":"symbol-definition.d.ts","sourceRoot":"","sources":["../../src/scope-resolution/symbol-definition.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,wFAAwF;IACxF,IAAI,EAAE,MAAM,CAAC;IACb,8EAA8E;IAC9E,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,UAAU,GAAG,gBAAgB,GAAG,SAAS,CAAC;IACjE,4CAA4C;IAC5C,WAAW,EAAE,OAAO,GAAG,YAAY,GAAG,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3E,4EAA4E;IAC5E,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,SAAS,CAAC;IAChB;;+FAE2F;IAC3F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;iHAC6G;IAC7G,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC;uFACmF;IACnF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B;4FACwF;IACxF,oBAAoB,CAAC,EAAE,kBAAkB,EAAE,CAAC;IAC5C,6EAA6E;IAC7E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gGAAgG;IAChG,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gGAAgG;IAChG,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;;;;;yEAKqE;IACrE,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B;;4DAEwD;IACxD,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;mEAE+D;IAC/D,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,4EAA4E;IAC5E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;iEAO6D;IAC7D,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B"}
1
+ {"version":3,"file":"symbol-definition.d.ts","sourceRoot":"","sources":["../../src/scope-resolution/symbol-definition.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,wFAAwF;IACxF,IAAI,EAAE,MAAM,CAAC;IACb,8EAA8E;IAC9E,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,UAAU,GAAG,gBAAgB,GAAG,SAAS,CAAC;IACjE,4CAA4C;IAC5C,WAAW,EAAE,OAAO,GAAG,YAAY,GAAG,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3E,4EAA4E;IAC5E,YAAY,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,SAAS,CAAC;IAChB;;+FAE2F;IAC3F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;iHAC6G;IAC7G,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC;uFACmF;IACnF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B;4FACwF;IACxF,oBAAoB,CAAC,EAAE,kBAAkB,EAAE,CAAC;IAC5C,6EAA6E;IAC7E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gGAAgG;IAChG,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gGAAgG;IAChG,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;;;;;yEAKqE;IACrE,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B;;4DAEwD;IACxD,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;mEAE+D;IAC/D,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,4EAA4E;IAC5E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;iEAO6D;IAC7D,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B"}
@@ -173,6 +173,7 @@ export function classifyCppParameterType(rawType, declaratorText, fullParameterT
173
173
  cv,
174
174
  indirection,
175
175
  pointerDepth,
176
+ ...templateArgumentsFor(`${source} ${rawType} ${declaratorText ?? ''}`),
176
177
  };
177
178
  }
178
179
  function unknownTypeClass(base) {
@@ -183,6 +184,40 @@ function unknownTypeClass(base) {
183
184
  pointerDepth: 0,
184
185
  };
185
186
  }
187
+ function templateArgumentsFor(rawType) {
188
+ const args = parseTopLevelTemplateArguments(rawType);
189
+ return args === undefined ? {} : { templateArguments: args };
190
+ }
191
+ function parseTopLevelTemplateArguments(rawType) {
192
+ const start = rawType.indexOf('<');
193
+ if (start < 0)
194
+ return undefined;
195
+ const args = [];
196
+ let depth = 0;
197
+ let argStart = start + 1;
198
+ for (let i = start + 1; i < rawType.length; i++) {
199
+ const ch = rawType[i];
200
+ if (ch === '<') {
201
+ depth++;
202
+ }
203
+ else if (ch === '>') {
204
+ if (depth === 0) {
205
+ const finalArg = rawType.slice(argStart, i).trim();
206
+ if (finalArg.length > 0)
207
+ args.push(normalizeCppParamType(finalArg));
208
+ return args.length > 0 ? args : undefined;
209
+ }
210
+ depth--;
211
+ }
212
+ else if (ch === ',' && depth === 0) {
213
+ const arg = rawType.slice(argStart, i).trim();
214
+ if (arg.length > 0)
215
+ args.push(normalizeCppParamType(arg));
216
+ argStart = i + 1;
217
+ }
218
+ }
219
+ return undefined;
220
+ }
186
221
  function findFuncDeclarator(node) {
187
222
  let decl = node.childForFieldName('declarator');
188
223
  if (decl === null) {
@@ -11,6 +11,7 @@ import { markCppAdlSiteArgs, markCppAdlSiteNoAdl } from './adl.js';
11
11
  import { markCppInlineNamespaceRange } from './inline-namespaces.js';
12
12
  import { extractCppTemplateConstraints } from './constraint-extractor.js';
13
13
  import { captureCppMemberLookupFacts } from './member-lookup.js';
14
+ import { CPP_BRACED_INIT_TYPE_PREFIX } from './conversion-rank.js';
14
15
  export function emitCppScopeCaptures(sourceText, filePath, cachedTree) {
15
16
  let tree = cachedTree;
16
17
  if (tree === undefined) {
@@ -906,6 +907,8 @@ function unknownTypeClass(base) {
906
907
  */
907
908
  function inferCppLiteralType(node) {
908
909
  switch (node.type) {
910
+ case 'initializer_list':
911
+ return inferCppBracedInitType(node);
909
912
  case 'number_literal': {
910
913
  const text = node.text;
911
914
  // Floating-point literals contain '.', 'e', 'E', or end with 'f'/'F'
@@ -934,6 +937,27 @@ function inferCppLiteralType(node) {
934
937
  return '';
935
938
  }
936
939
  }
940
+ function inferCppBracedInitType(node) {
941
+ const elementTypes = [];
942
+ for (let i = 0; i < node.childCount; i++) {
943
+ const child = node.child(i);
944
+ if (child === null)
945
+ continue;
946
+ if (child.type === ',' || child.type === '{' || child.type === '}')
947
+ continue;
948
+ const elementType = inferCppLiteralType(child);
949
+ if (elementType === '' || elementType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX)) {
950
+ return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length + 1}`;
951
+ }
952
+ elementTypes.push(elementType);
953
+ }
954
+ if (elementTypes.length === 0)
955
+ return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:0`;
956
+ const first = elementTypes[0];
957
+ return elementTypes.every((type) => type === first)
958
+ ? `${CPP_BRACED_INIT_TYPE_PREFIX}${first}:${elementTypes.length}`
959
+ : `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length}`;
960
+ }
937
961
  /**
938
962
  * Look up the declared type of a variable by scanning sibling declarations
939
963
  * in the enclosing compound_statement (function body). Handles:
@@ -20,6 +20,8 @@
20
20
  * their own `ConversionRankFn` in the future.
21
21
  */
22
22
  import type { ParameterTypeClass } from '../../../../_shared/index.js';
23
+ export declare const CPP_BRACED_INIT_TYPE_PREFIX = "braced-init:";
24
+ export declare const CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES: readonly ["braced-init:"];
23
25
  /**
24
26
  * Return the conversion rank from `argType` to `paramType`.
25
27
  *
@@ -19,6 +19,7 @@
19
19
  * This function is intentionally C++-specific. Other languages may define
20
20
  * their own `ConversionRankFn` in the future.
21
21
  */
22
+ import { normalizeCppParamType } from './arity-metadata.js';
22
23
  import { hasCppUserDefinedConversion } from './user-defined-conversions.js';
23
24
  /** Set of normalized arithmetic types that support implicit conversion. */
24
25
  const ARITHMETIC = new Set(['int', 'double', 'char', 'bool']);
@@ -27,6 +28,22 @@ const INTEGRAL_PROMOTION = new Map([
27
28
  ['char', 'int'],
28
29
  ['bool', 'int'],
29
30
  ]);
31
+ export const CPP_BRACED_INIT_TYPE_PREFIX = 'braced-init:';
32
+ export const CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES = [CPP_BRACED_INIT_TYPE_PREFIX];
33
+ const BRACED_INIT_CONTAINER_TYPES = new Set([
34
+ 'array',
35
+ 'deque',
36
+ 'list',
37
+ 'set',
38
+ 'std::array',
39
+ 'std::deque',
40
+ 'std::list',
41
+ 'std::set',
42
+ 'std::unordered_set',
43
+ 'std::vector',
44
+ 'unordered_set',
45
+ 'vector',
46
+ ]);
30
47
  /**
31
48
  * Return the conversion rank from `argType` to `paramType`.
32
49
  *
@@ -36,6 +53,17 @@ const INTEGRAL_PROMOTION = new Map([
36
53
  * for mismatch.
37
54
  */
38
55
  export function cppConversionRank(argType, paramType, argTypeClass, paramTypeClass) {
56
+ const bracedInitType = parseBracedInitArgType(argType);
57
+ if (bracedInitType !== undefined) {
58
+ if (bracedInitType.elementType === 'unknown')
59
+ return Infinity;
60
+ if (bracedInitType.elementCount === 1) {
61
+ const scalarRank = cppConversionRank(bracedInitType.elementType, paramType, undefined, paramTypeClass);
62
+ if (isFinite(scalarRank))
63
+ return scalarRank;
64
+ }
65
+ return bracedInitConversionRank(paramType, bracedInitType, paramTypeClass);
66
+ }
39
67
  if (argType === paramType) {
40
68
  return exactShapeCompatible(argTypeClass, paramTypeClass) ? 0 : Infinity;
41
69
  }
@@ -57,6 +85,67 @@ export function cppConversionRank(argType, paramType, argTypeClass, paramTypeCla
57
85
  return 4;
58
86
  return Infinity;
59
87
  }
88
+ function parseBracedInitArgType(argType) {
89
+ if (!argType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX))
90
+ return undefined;
91
+ const payload = argType.slice(CPP_BRACED_INIT_TYPE_PREFIX.length);
92
+ if (payload === '')
93
+ return undefined;
94
+ const separator = payload.lastIndexOf(':');
95
+ if (separator > 0) {
96
+ const countText = payload.slice(separator + 1);
97
+ if (/^\d+$/.test(countText)) {
98
+ return {
99
+ elementType: payload.slice(0, separator),
100
+ elementCount: Number(countText),
101
+ };
102
+ }
103
+ }
104
+ return { elementType: payload };
105
+ }
106
+ function bracedInitConversionRank(paramType, argType, paramTypeClass) {
107
+ const targetBase = bracedInitTargetBase(paramType);
108
+ if (targetBase === 'initializer_list' || targetBase === 'std::initializer_list') {
109
+ return bracedInitValueTypeMatches(paramType, argType, paramTypeClass) ? 0 : Infinity;
110
+ }
111
+ if (BRACED_INIT_CONTAINER_TYPES.has(targetBase)) {
112
+ return bracedInitValueTypeMatches(paramType, argType, paramTypeClass) ? 4 : Infinity;
113
+ }
114
+ return Infinity;
115
+ }
116
+ function bracedInitValueTypeMatches(paramType, argType, paramTypeClass) {
117
+ const valueType = bracedInitTargetValueType(paramType, paramTypeClass);
118
+ if (valueType === undefined)
119
+ return false;
120
+ return isFinite(cppConversionRank(argType.elementType, valueType));
121
+ }
122
+ function bracedInitTargetValueType(paramType, paramTypeClass) {
123
+ return firstTemplateArgument(paramType) ?? paramTypeClass?.templateArguments?.[0];
124
+ }
125
+ function firstTemplateArgument(rawType) {
126
+ const start = rawType.indexOf('<');
127
+ if (start < 0)
128
+ return undefined;
129
+ let depth = 0;
130
+ for (let i = start + 1; i < rawType.length; i++) {
131
+ const ch = rawType[i];
132
+ if (ch === '<') {
133
+ depth++;
134
+ }
135
+ else if (ch === '>') {
136
+ if (depth === 0)
137
+ return bracedInitTargetBase(rawType.slice(start + 1, i));
138
+ depth--;
139
+ }
140
+ else if (ch === ',' && depth === 0) {
141
+ return bracedInitTargetBase(rawType.slice(start + 1, i));
142
+ }
143
+ }
144
+ return undefined;
145
+ }
146
+ function bracedInitTargetBase(paramType) {
147
+ return normalizeCppParamType(paramType);
148
+ }
60
149
  function isPointer(typeClass) {
61
150
  return typeClass?.indirection === 'pointer' && typeClass.pointerDepth > 0;
62
151
  }
@@ -27,7 +27,7 @@
27
27
  * declaration transparently.
28
28
  */
29
29
  import { isOverloadAmbiguousAfterNormalization, narrowOverloadCandidates, } from '../../scope-resolution/passes/overload-narrowing.js';
30
- import { cppConversionRank } from './conversion-rank.js';
30
+ import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
31
31
  const inlineNamespaceRangesByFile = new Map();
32
32
  const inlineNamespaceScopeIds = new Set();
33
33
  function rangeKey(r) {
@@ -142,7 +142,12 @@ export function resolveCppQualifiedNamespaceMember(receiverName, memberName, par
142
142
  // can disambiguate via exact-type match and, when available, conversion-rank
143
143
  // scoring (`cppConversionRank`). Same-signature ambiguity is still detected
144
144
  // by `isOverloadAmbiguousAfterNormalization` below.
145
- const narrowed = narrowOverloadCandidates(allHits, callsite?.arity, callsite?.argumentTypes, callsite !== undefined ? { conversionRankFn: cppConversionRank } : undefined);
145
+ const narrowed = narrowOverloadCandidates(allHits, callsite?.arity, callsite?.argumentTypes, callsite !== undefined
146
+ ? {
147
+ conversionRankFn: cppConversionRank,
148
+ conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
149
+ }
150
+ : undefined);
146
151
  if (narrowed.length === 1)
147
152
  return narrowed[0];
148
153
  if (narrowed.length === 0)
@@ -3,7 +3,7 @@ import { buildMro, defaultLinearize } from '../../scope-resolution/passes/mro.js
3
3
  import { isOverloadAmbiguousAfterNormalization, narrowOverloadCandidates, } from '../../scope-resolution/passes/overload-narrowing.js';
4
4
  import { isClassLike } from '../../scope-resolution/scope/walkers.js';
5
5
  import { cppConstraintCompatibility } from './constraint-filter.js';
6
- import { cppConversionRank } from './conversion-rank.js';
6
+ import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
7
7
  const capturedByFile = new Map();
8
8
  let directParentsByDefId = new Map();
9
9
  let virtualEdges = new Set();
@@ -193,6 +193,7 @@ function chooseOverload(candidates, callsite) {
193
193
  const narrowed = narrowOverloadCandidates(candidates, callsite.arity, callsite.argumentTypes, {
194
194
  argumentTypeClasses: callsite.argumentTypeClasses,
195
195
  conversionRankFn: cppConversionRank,
196
+ conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
196
197
  constraintCompatibility: cppConstraintCompatibility,
197
198
  });
198
199
  if (narrowed.length === 1)
@@ -3,7 +3,7 @@ import { SupportedLanguages } from '../../../../_shared/index.js';
3
3
  import { populateClassOwnedMembers, tagNamespacePrefixes, } from '../../scope-resolution/scope/walkers.js';
4
4
  import { cppProvider } from '../c-cpp.js';
5
5
  import { cppArityCompatibility } from './arity.js';
6
- import { cppConversionRank } from './conversion-rank.js';
6
+ import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
7
7
  import { cppMergeBindings } from './merge-bindings.js';
8
8
  import { resolveCppImportTarget } from './import-target.js';
9
9
  import { scanCppHeaderFiles } from './header-scan.js';
@@ -205,6 +205,7 @@ export const cppScopeResolver = {
205
205
  // Disambiguates `f(int)` vs `f(double)` called with `f(2.5)` by scoring
206
206
  // each candidate's conversion cost; exact match wins over standard conversion.
207
207
  conversionRankFn: cppConversionRank,
208
+ conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
208
209
  // Range-for element type inference: for (auto& user : users) → bind user to User
209
210
  populateRangeBindings: populateCppRangeBindings,
210
211
  // C++ method return-type bindings need to be visible from module scope
@@ -477,12 +477,19 @@ function parseJsonParameterTypeClassesCapture(cap) {
477
477
  if (typeof o.pointerDepth !== 'number' || !Number.isFinite(o.pointerDepth)) {
478
478
  return undefined;
479
479
  }
480
- out.push({
480
+ const shape = {
481
481
  base: o.base,
482
482
  cv: o.cv,
483
483
  indirection: o.indirection,
484
484
  pointerDepth: o.pointerDepth,
485
- });
485
+ };
486
+ if (Array.isArray(o.templateArguments)) {
487
+ if (!o.templateArguments.every((x) => typeof x === 'string')) {
488
+ return undefined;
489
+ }
490
+ shape.templateArguments = [...o.templateArguments];
491
+ }
492
+ out.push(shape);
486
493
  }
487
494
  return out;
488
495
  }
@@ -618,6 +618,15 @@ export interface ScopeResolver {
618
618
  * `cppConversionRank`; other languages define their own if needed.
619
619
  */
620
620
  readonly conversionRankFn?: ConversionRankFn;
621
+ /**
622
+ * Optional per-language argument-type prefixes for conversion-only
623
+ * argument sentinels. When ranking cannot find any viable candidate
624
+ * for a multi-overload set containing one of these sentinels, shared
625
+ * narrowing suppresses the ambiguous set instead of falling back to
626
+ * arity-only candidates. Languages without such sentinels leave this
627
+ * undefined.
628
+ */
629
+ readonly conversionOnlyArgTypePrefixes?: readonly string[];
621
630
  /**
622
631
  * Optional predicate to identify definitions with file-local linkage
623
632
  * (e.g. C `static` functions). When provided, `pickUniqueGlobalCallable`
@@ -49,6 +49,7 @@ export declare function emitFreeCallFallback(graph: KnowledgeGraph, scopes: Scop
49
49
  };
50
50
  }, callerParsed: ParsedFile, scopes: ScopeResolutionIndexes, parsedFiles: readonly ParsedFile[]) => readonly SymbolDefinition[] | undefined;
51
51
  readonly conversionRankFn?: ConversionRankFn;
52
+ readonly conversionOnlyArgTypePrefixes?: readonly string[];
52
53
  /** Optional per-language constraint hook threaded into
53
54
  * `narrowOverloadCandidates`. Drops candidates whose template
54
55
  * constraints (e.g. C++ `enable_if_t`, C++20 `requires`) provably
@@ -99,7 +100,7 @@ export declare function buildGlobalClassIndex(scopes: ScopeResolutionIndexes): R
99
100
  * order. Exported for unit testing — the `scopeDefsCache` equivalence is
100
101
  * exercised via synthetic stubs in `pick-unique-global-callable.test.ts`.
101
102
  */
102
- export declare function pickUniqueGlobalCallable(name: string, model: SemanticModel, globalCallablesBySimpleName: ReadonlyMap<string, readonly SymbolDefinition[]>, callerFilePath: string, isFileLocalDef?: (def: SymbolDefinition) => boolean, callArity?: number, isCallerVisible?: (candidate: SymbolDefinition) => boolean, callArgTypes?: readonly string[], callArgTypeClasses?: readonly ParameterTypeClass[], conversionRankFn?: ConversionRankFn, scopeDefsCache?: Map<string, readonly SymbolDefinition[]>): SymbolDefinition | undefined;
103
+ export declare function pickUniqueGlobalCallable(name: string, model: SemanticModel, globalCallablesBySimpleName: ReadonlyMap<string, readonly SymbolDefinition[]>, callerFilePath: string, isFileLocalDef?: (def: SymbolDefinition) => boolean, callArity?: number, isCallerVisible?: (candidate: SymbolDefinition) => boolean, callArgTypes?: readonly string[], callArgTypeClasses?: readonly ParameterTypeClass[], conversionRankFn?: ConversionRankFn, scopeDefsCache?: Map<string, readonly SymbolDefinition[]>, conversionOnlyArgTypePrefixes?: readonly string[]): SymbolDefinition | undefined;
103
104
  /** Find a unique workspace-wide class-like def by simple name, for a
104
105
  * constructor-form call `Type(...)` whose type lives outside the call
105
106
  * site's lexical bindings (a sibling/imported file). Returns the def
@@ -142,5 +143,6 @@ export declare function pickImplicitThisOverload(site: {
142
143
  readonly argumentTypeClasses?: readonly import('../../../../_shared/index.js').ParameterTypeClass[];
143
144
  }, scopes: ScopeResolutionIndexes, workspaceIndex: WorkspaceResolutionIndex, model: SemanticModel, hookCtx?: {
144
145
  readonly conversionRankFn?: ConversionRankFn;
146
+ readonly conversionOnlyArgTypePrefixes?: readonly string[];
145
147
  readonly constraintCompatibility?: ScopeResolver['constraintCompatibility'];
146
148
  }): SymbolDefinition | undefined;
@@ -89,6 +89,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
89
89
  if (fnDef === undefined) {
90
90
  fnDef = pickImplicitThisOverload(site, scopes, workspaceIndex, model, {
91
91
  conversionRankFn: options.conversionRankFn,
92
+ conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
92
93
  constraintCompatibility: options.constraintCompatibility,
93
94
  });
94
95
  fnDefFromImplicitThis = fnDef !== undefined;
@@ -112,6 +113,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
112
113
  const narrowed = narrowOverloadCandidates(allCallables, site.arity, site.argumentTypes, {
113
114
  argumentTypeClasses: site.argumentTypeClasses,
114
115
  conversionRankFn: options.conversionRankFn,
116
+ conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
115
117
  constraintCompatibility: options.constraintCompatibility,
116
118
  });
117
119
  if (narrowed.length === 1) {
@@ -190,6 +192,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
190
192
  const narrowed = narrowOverloadCandidates(ordinary, site.arity, site.argumentTypes, {
191
193
  argumentTypeClasses: site.argumentTypeClasses,
192
194
  conversionRankFn: options.conversionRankFn,
195
+ conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
193
196
  constraintCompatibility: options.constraintCompatibility,
194
197
  });
195
198
  if (narrowed.length === 1) {
@@ -240,6 +243,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
240
243
  const narrowed = narrowOverloadCandidates(merged, site.arity, site.argumentTypes, {
241
244
  argumentTypeClasses: site.argumentTypeClasses,
242
245
  conversionRankFn: options.conversionRankFn,
246
+ conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
243
247
  constraintCompatibility: options.constraintCompatibility,
244
248
  });
245
249
  if (narrowed.length === 1) {
@@ -285,7 +289,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
285
289
  callerScope: site.inScope,
286
290
  scopes,
287
291
  })
288
- : undefined, site.argumentTypes, site.argumentTypeClasses, options.conversionRankFn, scopeDefsCache);
292
+ : undefined, site.argumentTypes, site.argumentTypeClasses, options.conversionRankFn, scopeDefsCache, options.conversionOnlyArgTypePrefixes);
289
293
  }
290
294
  if (fnDef === undefined)
291
295
  continue;
@@ -442,7 +446,7 @@ export function buildGlobalClassIndex(scopes) {
442
446
  * order. Exported for unit testing — the `scopeDefsCache` equivalence is
443
447
  * exercised via synthetic stubs in `pick-unique-global-callable.test.ts`.
444
448
  */
445
- export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleName, callerFilePath, isFileLocalDef, callArity, isCallerVisible, callArgTypes, callArgTypeClasses, conversionRankFn, scopeDefsCache) {
449
+ export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleName, callerFilePath, isFileLocalDef, callArity, isCallerVisible, callArgTypes, callArgTypeClasses, conversionRankFn, scopeDefsCache, conversionOnlyArgTypePrefixes) {
446
450
  // The scope-index candidate list is a pure function of (name, callerFilePath):
447
451
  // the same-name bucket is fixed for the pass, the file-local filter depends
448
452
  // only on the candidate + callerFilePath, and the logical-key dedup is
@@ -504,6 +508,7 @@ export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleNam
504
508
  const narrowed = narrowOverloadCandidates(scopeDefs, callArity, callArgTypes, {
505
509
  argumentTypeClasses: callArgTypeClasses,
506
510
  conversionRankFn,
511
+ conversionOnlyArgTypePrefixes,
507
512
  });
508
513
  if (narrowed.length === 1)
509
514
  return narrowed[0];
@@ -545,6 +550,7 @@ export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleNam
545
550
  const narrowed = narrowOverloadCandidates(defs, callArity, callArgTypes, {
546
551
  argumentTypeClasses: callArgTypeClasses,
547
552
  conversionRankFn,
553
+ conversionOnlyArgTypePrefixes,
548
554
  });
549
555
  if (narrowed.length === 1)
550
556
  return narrowed[0];
@@ -687,6 +693,7 @@ export function pickImplicitThisOverload(site, scopes, workspaceIndex, model, ho
687
693
  const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
688
694
  argumentTypeClasses: site.argumentTypeClasses,
689
695
  conversionRankFn: hookCtx?.conversionRankFn,
696
+ conversionOnlyArgTypePrefixes: hookCtx?.conversionOnlyArgTypePrefixes,
690
697
  constraintCompatibility: hookCtx?.constraintCompatibility,
691
698
  });
692
699
  if (candidates.length !== 1)
@@ -69,6 +69,10 @@ export interface OverloadNarrowingHookCtx {
69
69
  /** Conversion-rank scoring fallback (step 4b). Engages when the
70
70
  * exact-type filter rejects every candidate. */
71
71
  readonly conversionRankFn?: ConversionRankFn;
72
+ /** Per-language argument-type prefixes whose conversion-rank failures
73
+ * should suppress genuinely ambiguous multi-overload sets instead of
74
+ * falling back to arity-only candidates. */
75
+ readonly conversionOnlyArgTypePrefixes?: readonly string[];
72
76
  /** Constraint filter (step 4c). Drops candidates whose template
73
77
  * guards (SFINAE `enable_if_t`, C++20 `requires`, future Rust
74
78
  * trait bounds, etc.) provably fail at the call site. Three-valued
@@ -106,6 +106,10 @@ export function narrowOverloadCandidates(overloads, argCount, argTypes, hookCtx)
106
106
  const ranked = rankByConversion(candidates, argTypes, hookCtx.conversionRankFn, hookCtx.argumentTypeClasses);
107
107
  if (ranked.length > 0)
108
108
  result = ranked;
109
+ else if (candidates.length > 1 &&
110
+ hasConversionOnlyArgType(argTypes, hookCtx.conversionOnlyArgTypePrefixes)) {
111
+ result = [];
112
+ }
109
113
  }
110
114
  }
111
115
  // Constraint filter (step 4c; Tier-A — SFINAE / `requires` clauses).
@@ -145,6 +149,11 @@ export function narrowOverloadCandidates(overloads, argCount, argTypes, hookCtx)
145
149
  }
146
150
  return result;
147
151
  }
152
+ function hasConversionOnlyArgType(argTypes, prefixes) {
153
+ if (prefixes === undefined || prefixes.length === 0)
154
+ return false;
155
+ return argTypes.some((type) => prefixes.some((prefix) => type.startsWith(prefix)));
156
+ }
148
157
  function exactTypeSlotMatches(argType, paramType, argTypeClass, paramTypeClass) {
149
158
  if (argType !== paramType)
150
159
  return false;
@@ -47,7 +47,7 @@ import type { ResolutionOutcomeRecorder } from '../resolution-outcome.js';
47
47
  /** Subset of `ScopeResolver` consumed by this pass. Accepting the
48
48
  * subset rather than the full provider keeps tests and partial
49
49
  * refactors lighter — callers only need to populate what we read. */
50
- type ReceiverBoundProviderSubset = Pick<ScopeResolver, 'isSuperReceiver' | 'isSuperReceiverInContext' | 'fieldFallbackOnMethodLookup' | 'collapseMemberCallsByCallerTarget' | 'unwrapCollectionAccessor' | 'hoistTypeBindingsToModule' | 'resolveQualifiedReceiverMember' | 'resolveReceiverMember' | 'resolveThisViaEnclosingClass' | 'conversionRankFn' | 'constraintCompatibility' | 'isStaticOnly'>;
50
+ type ReceiverBoundProviderSubset = Pick<ScopeResolver, 'isSuperReceiver' | 'isSuperReceiverInContext' | 'fieldFallbackOnMethodLookup' | 'collapseMemberCallsByCallerTarget' | 'unwrapCollectionAccessor' | 'hoistTypeBindingsToModule' | 'resolveQualifiedReceiverMember' | 'resolveReceiverMember' | 'resolveThisViaEnclosingClass' | 'conversionRankFn' | 'conversionOnlyArgTypePrefixes' | 'constraintCompatibility' | 'isStaticOnly'>;
51
51
  export declare function emitReceiverBoundCalls(graph: KnowledgeGraph, scopes: ScopeResolutionIndexes, parsedFiles: readonly ParsedFile[], nodeLookup: GraphNodeLookup, handledSites: Set<string>, provider: ReceiverBoundProviderSubset, index: WorkspaceResolutionIndex, model: SemanticModel, options?: {
52
52
  readonly recordResolutionOutcome?: ResolutionOutcomeRecorder;
53
53
  }): number;
@@ -352,6 +352,7 @@ export function emitReceiverBoundCalls(graph, scopes, parsedFiles, nodeLookup, h
352
352
  const narrowed = narrowOverloadCandidates(methodOverloads, site.arity, site.argumentTypes, {
353
353
  argumentTypeClasses: site.argumentTypeClasses,
354
354
  conversionRankFn: provider.conversionRankFn,
355
+ conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
355
356
  constraintCompatibility: provider.constraintCompatibility,
356
357
  });
357
358
  if (isOverloadAmbiguousAfterNormalization(narrowed, site.arity)) {
@@ -867,6 +868,7 @@ function pickOverload(ownerId, memberName, site, model, provider) {
867
868
  const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
868
869
  argumentTypeClasses: site.argumentTypeClasses,
869
870
  conversionRankFn: provider.conversionRankFn,
871
+ conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
870
872
  constraintCompatibility: provider.constraintCompatibility,
871
873
  });
872
874
  // When narrowing leaves >1 candidate that share identical normalized
@@ -967,6 +969,7 @@ function pickFirstNonStaticOnly(ownerId, memberName, site, model, provider) {
967
969
  const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
968
970
  argumentTypeClasses: site.argumentTypeClasses,
969
971
  conversionRankFn: provider.conversionRankFn,
972
+ conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
970
973
  constraintCompatibility: provider.constraintCompatibility,
971
974
  });
972
975
  // Same ambiguity handling as `pickOverload`: when normalization
@@ -1001,6 +1004,7 @@ function recordReceiverOverloadSuppression(record, filePath, site, ownerId, memb
1001
1004
  const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
1002
1005
  argumentTypeClasses: site.argumentTypeClasses,
1003
1006
  conversionRankFn: provider.conversionRankFn,
1007
+ conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
1004
1008
  constraintCompatibility: provider.constraintCompatibility,
1005
1009
  });
1006
1010
  const reason = isOverloadAmbiguousAfterNormalization(candidates, site.arity)
@@ -446,6 +446,7 @@ export function runScopeResolution(input, provider) {
446
446
  isCallableVisibleFromCaller: provider.isCallableVisibleFromCaller,
447
447
  resolveAdlCandidates: provider.resolveAdlCandidates,
448
448
  conversionRankFn: provider.conversionRankFn,
449
+ conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
449
450
  constraintCompatibility: provider.constraintCompatibility,
450
451
  recordResolutionOutcome,
451
452
  });
@@ -1,4 +1,5 @@
1
1
  import { extractSimpleTypeName, extractVarName, resolveIterableElementType, methodToTypeArgPosition, } from './shared.js';
2
+ import { CPP_BRACED_INIT_TYPE_PREFIX } from '../languages/cpp/conversion-rank.js';
2
3
  const DECLARATION_NODE_TYPES = new Set(['declaration']);
3
4
  /** Smart pointer factory function names that create a typed object. */
4
5
  const SMART_PTR_FACTORIES = new Set(['make_shared', 'make_unique', 'make_shared_for_overwrite']);
@@ -447,6 +448,8 @@ const extractForLoopBinding = (node, { scopeEnv, declarationTypeNodes, scope })
447
448
  /** Infer the type of a literal AST node for C++ overload disambiguation. */
448
449
  const inferLiteralType = (node) => {
449
450
  switch (node.type) {
451
+ case 'initializer_list':
452
+ return inferBracedInitLiteralType(node);
450
453
  case 'number_literal': {
451
454
  const t = node.text;
452
455
  // Float suffixes
@@ -475,6 +478,24 @@ const inferLiteralType = (node) => {
475
478
  return undefined;
476
479
  }
477
480
  };
481
+ function inferBracedInitLiteralType(node) {
482
+ const elementTypes = [];
483
+ for (const child of node.children) {
484
+ if (child.type === ',' || child.type === '{' || child.type === '}')
485
+ continue;
486
+ const elementType = inferLiteralType(child);
487
+ if (elementType === undefined || elementType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX)) {
488
+ return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length + 1}`;
489
+ }
490
+ elementTypes.push(elementType);
491
+ }
492
+ if (elementTypes.length === 0)
493
+ return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:0`;
494
+ const first = elementTypes[0];
495
+ return elementTypes.every((type) => type === first)
496
+ ? `${CPP_BRACED_INIT_TYPE_PREFIX}${first}:${elementTypes.length}`
497
+ : `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length}`;
498
+ }
478
499
  /** C++: detect constructor type from smart pointer factory calls (make_shared<Dog>()).
479
500
  * Extracts the template type argument as the constructor type for virtual dispatch. */
480
501
  const detectCppConstructorType = (node, classNames) => {
@@ -55,5 +55,22 @@ export interface StreamedCSVResult {
55
55
  * Stream all CSV data directly to disk files.
56
56
  * Iterates graph nodes exactly ONCE — routes each node to the right writer.
57
57
  * File contents are lazy-read from disk with a generous LRU cache.
58
+ *
59
+ * `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
60
+ * right after every node CSV is fully flushed to disk and BEFORE the
61
+ * relationship pass starts writing any `rel_*.csv`. It receives the finished
62
+ * node-file manifest so the caller can begin `COPY`-ing nodes while this
63
+ * function keeps generating relationship CSVs (the only single-writer-safe
64
+ * overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
65
+ * the relationship pass proceeds concurrently with whatever the caller
66
+ * schedules. A synchronous throw from the callback is allowed and propagates out
67
+ * of this function (rejecting the returned promise) — it is raised before the
68
+ * relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
69
+ * this to surface its PDG-manifest collision guard. The callback must NOT, however,
70
+ * schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
71
+ * behavior, byte-for-byte.
58
72
  */
59
- export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
73
+ export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, onNodePhaseComplete?: (nodeFiles: Map<NodeTableName, {
74
+ csvPath: string;
75
+ rows: number;
76
+ }>) => void) => Promise<StreamedCSVResult>;
@@ -33,6 +33,14 @@ const orderedNodes = (graph, sorted) => sorted ? [...graph.iterNodes()].sort(byG
33
33
  const orderedRelationships = (graph, sorted) => sorted ? [...graph.iterRelationships()].sort(byGraphId) : graph.iterRelationships();
34
34
  /** Flush buffered rows to disk every N rows */
35
35
  const FLUSH_EVERY = 500;
36
+ /**
37
+ * Yield the event loop every N relationship rows during the emit pass (#2226 F4)
38
+ * so a concurrent node COPY (the overlap in loadGraphToLbug) and write-stream
39
+ * drain callbacks get scheduling time during long synchronous emit stretches.
40
+ * Scheduling-only — never changes row content or order (byte-identical). Tuning
41
+ * constant, not load-bearing.
42
+ */
43
+ const REL_YIELD_EVERY = 5000;
36
44
  // ============================================================================
37
45
  // CSV ESCAPE UTILITIES
38
46
  // ============================================================================
@@ -239,8 +247,22 @@ export const buildBasicBlockRow = (node) => [
239
247
  * Stream all CSV data directly to disk files.
240
248
  * Iterates graph nodes exactly ONCE — routes each node to the right writer.
241
249
  * File contents are lazy-read from disk with a generous LRU cache.
250
+ *
251
+ * `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
252
+ * right after every node CSV is fully flushed to disk and BEFORE the
253
+ * relationship pass starts writing any `rel_*.csv`. It receives the finished
254
+ * node-file manifest so the caller can begin `COPY`-ing nodes while this
255
+ * function keeps generating relationship CSVs (the only single-writer-safe
256
+ * overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
257
+ * the relationship pass proceeds concurrently with whatever the caller
258
+ * schedules. A synchronous throw from the callback is allowed and propagates out
259
+ * of this function (rejecting the returned promise) — it is raised before the
260
+ * relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
261
+ * this to surface its PDG-manifest collision guard. The callback must NOT, however,
262
+ * schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
263
+ * behavior, byte-for-byte.
242
264
  */
243
- export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
265
+ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, onNodePhaseComplete) => {
244
266
  // Deterministic (id-sorted) node/relationship row order when enabled;
245
267
  // default off = today's graph-insertion order (byte-identical).
246
268
  const sortOutput = parseTruthyEnv(process.env.GITNEXUS_SORT_GRAPH_OUTPUT);
@@ -502,30 +524,11 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
502
524
  ...multiLangWriters.values(),
503
525
  ];
504
526
  await Promise.all(allWriters.map((w) => w.finish()));
505
- // --- Stream relationships directly to per-FROM→TO-label-pair files ---
506
- // (#2203 U2) Route every edge to its pair file in this single pass. The old
507
- // monolithic relations.csv and its line-by-line re-read + per-edge regex
508
- // re-split in loadGraphToLbug are gone, so the ~1M-edge set is written and
509
- // read once instead of twice. The router applies the SAME label-derivation +
510
- // validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
511
- // files are byte-identical (asserted by the differential test).
512
- const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
513
- try {
514
- for (const rel of orderedRelationships(graph, sortOutput)) {
515
- const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
516
- if (pending)
517
- await pending;
518
- }
519
- await relRouter.close();
520
- }
521
- catch (err) {
522
- relRouter.destroy();
523
- // Rethrow the real stream error (EMFILE / disk-full) rather than the generic
524
- // AbortError a pending drain-await rejects with — mirrors the retained
525
- // splitRelCsvByLabelPair's `throw streamError ?? err`.
526
- throw relRouter.lastError ?? err;
527
- }
528
- // Build result map — only include tables that have rows
527
+ // Build the node-file manifest now (all writers are flushed; `.rows` is
528
+ // final). Hoisted above the relationship pass so `onNodePhaseComplete` can
529
+ // hand the caller a complete node manifest to start COPY-ing while we keep
530
+ // generating relationship CSVs below (#2203 overlap). The same map is
531
+ // returned, so the result is unchanged when no callback is supplied.
529
532
  const nodeFiles = new Map();
530
533
  const tableMap = [
531
534
  ['File', fileWriter],
@@ -551,6 +554,38 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
551
554
  });
552
555
  }
553
556
  }
557
+ // Node CSVs are on disk; relationship CSVs have not been touched yet. Hand
558
+ // the manifest to the caller (not awaited — the rel pass runs concurrently).
559
+ onNodePhaseComplete?.(nodeFiles);
560
+ // --- Stream relationships directly to per-FROM→TO-label-pair files ---
561
+ // (#2203 U2) Route every edge to its pair file in this single pass. The old
562
+ // monolithic relations.csv — and its line-by-line re-read + per-edge regex
563
+ // re-split in loadGraphToLbug — are gone, so the ~1M-edge set is written and
564
+ // read once instead of twice. The router applies the SAME label-derivation +
565
+ // validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
566
+ // files are byte-identical (asserted by the differential test).
567
+ const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
568
+ try {
569
+ let emitted = 0;
570
+ for (const rel of orderedRelationships(graph, sortOutput)) {
571
+ const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
572
+ if (pending)
573
+ await pending;
574
+ // Periodically hand the event loop back so the overlapped node COPY and
575
+ // write-stream drains run instead of starving behind this synchronous
576
+ // loop (#2226 F4). No effect on emitted bytes — pure scheduling.
577
+ if (++emitted % REL_YIELD_EVERY === 0)
578
+ await new Promise((r) => setImmediate(r));
579
+ }
580
+ await relRouter.close();
581
+ }
582
+ catch (err) {
583
+ relRouter.destroy();
584
+ // Rethrow the real stream error (EMFILE / disk-full) rather than the generic
585
+ // AbortError a pending drain-await rejects with — mirrors the retained
586
+ // splitRelCsvByLabelPair's `throw streamError ?? err`.
587
+ throw relRouter.lastError ?? err;
588
+ }
554
589
  return {
555
590
  nodeFiles,
556
591
  relsByPair: relRouter.byPair,
@@ -1,5 +1,6 @@
1
1
  import lbug from '@ladybugdb/core';
2
2
  import { KnowledgeGraph } from '../graph/types.js';
3
+ import { NodeTableName } from './schema.js';
3
4
  import type { PdgEmitManifest } from './pdg-emit-sink.js';
4
5
  import type { CachedEmbedding } from '../embeddings/types.js';
5
6
  import { type ExtensionEnsureOptions } from './extension-loader.js';
@@ -54,6 +55,18 @@ export declare const withLbugDb: <T>(dbPath: string, operation: () => Promise<T>
54
55
  readOnly?: boolean;
55
56
  }) => Promise<T>;
56
57
  export type LbugProgressCallback = (message: string) => void;
58
+ /**
59
+ * Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
60
+ * relationship emit — see the body) and relationships.
61
+ *
62
+ * NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
63
+ * surrounding transaction, so a failure partway through — a node `COPY` that
64
+ * throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
65
+ * collision raised after node rows have already committed in the overlap path —
66
+ * leaves a partially-loaded DB. The caller surfaces the error; recovery is a
67
+ * `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
68
+ * assume the DB is either fully loaded or untouched after a rejection.
69
+ */
57
70
  export declare const loadGraphToLbug: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback,
58
71
  /**
59
72
  * Streamed PDG-emit manifest (#2202). When present (streaming was on, full
@@ -69,6 +82,8 @@ pdgEmitManifest?: PdgEmitManifest) => Promise<{
69
82
  skippedRels: number;
70
83
  warnings: string[];
71
84
  }>;
85
+ export declare const COPY_CSV_OPTS = "(HEADER=true, ESCAPE='\"', DELIM=',', QUOTE='\"', PARALLEL=false, auto_detect=false)";
86
+ export declare const getCopyQuery: (table: NodeTableName, filePath: string) => string;
72
87
  /**
73
88
  * Insert a single node to LadybugDB
74
89
  * @param label - Node type (File, Function, Class, etc.)
@@ -740,6 +740,60 @@ const doInitLbug = async (dbPath, readOnly = false) => {
740
740
  currentDbPath = dbPath;
741
741
  return { db, conn };
742
742
  };
743
+ /**
744
+ * Run a COPY, retrying once with IGNORE_ERRORS=true (which skips row-level
745
+ * errors) on first failure. On a second failure, hand the RAW retry error to
746
+ * `onError` — each call site formats + slices its own message (#2226 F5: node
747
+ * COPY slices to 200 chars and throws; relationship COPY slices to 80 and warns,
748
+ * so the helper must not pre-format and lose that distinction). `onError` may
749
+ * throw to propagate the failure.
750
+ */
751
+ const copyCsvWithRetry = async (targetConn, copyQuery, onError) => {
752
+ try {
753
+ await queryAndDrain(targetConn, copyQuery);
754
+ }
755
+ catch {
756
+ try {
757
+ const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
758
+ await queryAndDrain(targetConn, retryQuery);
759
+ }
760
+ catch (retryErr) {
761
+ onError(retryErr);
762
+ }
763
+ }
764
+ };
765
+ /**
766
+ * Bulk-COPY every node CSV sequentially on the single writable connection
767
+ * (LadybugDB allows one write txn at a time). Extracted from loadGraphToLbug so
768
+ * it can run either at the node-phase boundary — overlapping the relationship
769
+ * emit pass (#2203) — or after emit in the serial escape-hatch path. Each COPY
770
+ * keeps the IGNORE_ERRORS=true retry; a hard failure throws (no node rows ⇒ the
771
+ * relationship COPY would dangle on missing endpoints).
772
+ */
773
+ const copyNodeCSVs = async (targetConn, nodeFileEntries, log, totalSteps) => {
774
+ let stepsDone = 0;
775
+ for (const [table, { csvPath, rows }] of nodeFileEntries) {
776
+ stepsDone++;
777
+ log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`);
778
+ const copyQuery = getCopyQuery(table, normalizeCopyPath(csvPath));
779
+ await copyCsvWithRetry(targetConn, copyQuery, (retryErr) => {
780
+ const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
781
+ throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
782
+ });
783
+ }
784
+ };
785
+ /**
786
+ * Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
787
+ * relationship emit — see the body) and relationships.
788
+ *
789
+ * NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
790
+ * surrounding transaction, so a failure partway through — a node `COPY` that
791
+ * throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
792
+ * collision raised after node rows have already committed in the overlap path —
793
+ * leaves a partially-loaded DB. The caller surfaces the error; recovery is a
794
+ * `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
795
+ * assume the DB is either fully loaded or untouched after a rejection.
796
+ */
743
797
  export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress,
744
798
  /**
745
799
  * Streamed PDG-emit manifest (#2202). When present (streaming was on, full
@@ -761,31 +815,87 @@ pdgEmitManifest) => {
761
815
  // the gap that the DB-persistence path is un-timed today (the analyze
762
816
  // "emit" number is the scope-resolution emit bucket, not this COPY path).
763
817
  const PROF = process.env.PROF_LBUG_LOAD === '1';
818
+ // Escape hatch / differential oracle (#2203): force the legacy strictly-serial
819
+ // load order (emit everything, THEN COPY nodes, THEN COPY rels) instead of the
820
+ // default node-COPY ‖ rel-emit overlap. Lets an operator revert the behavior at
821
+ // runtime, and lets a test load the same graph both ways and assert identical
822
+ // persisted content.
823
+ const SERIAL = process.env.GITNEXUS_SERIAL_LBUG_LOAD === '1';
764
824
  const mark = () => (PROF ? process.hrtime.bigint() : 0n);
765
825
  const span = (a, b) => (Number(b - a) / 1e6).toFixed(1);
766
826
  const tStart = mark();
767
827
  const csvDir = resolveNativeSafeStorageDir(storagePath, 'csv');
768
- log('Streaming CSVs to disk...');
769
- const csvResult = await streamAllCSVsToDisk(graph, repoPath, csvDir);
770
- // Merge the streamed PDG-emit CSVs (#2202) into the COPY plan so the
771
- // BasicBlock node table + per-pair PDG edges (CFG / REACHING_DEF / CDG /
772
- // POST_DOMINATE / TAINTED / SANITIZES) load through the SAME node + per-pair
773
- // COPY loops as the structural CSVs. The graph held zero BasicBlocks when
774
- // streaming, so `streamAllCSVsToDisk` produced none of these the manifest
775
- // is the sole source and there is no double-COPY. Absent ⇒ no-op.
776
- if (pdgEmitManifest) {
828
+ // The single writable connection (LadybugDB is single-writer). Captured as a
829
+ // const so the node-COPY closure has a non-null reference — TS cannot narrow
830
+ // the reassignable module-level `conn` across the callback boundary.
831
+ const writeConn = conn;
832
+ const validTables = new Set(NODE_TABLES);
833
+ // Merge the streamed PDG-emit node CSVs (#2202) into a node-file map. Collision
834
+ // guard: a BasicBlock in the in-memory graph during a streamed run is an
835
+ // invariant violation (streamAllCSVsToDisk would also emit basicblock.csv), so
836
+ // fail loudly rather than drop rows (#2202 review #3). Runs at the node-phase
837
+ // boundary so the manifest BasicBlock table COPYs with the structural CSVs.
838
+ const mergeManifestNodeFiles = (nodeFilesMap) => {
839
+ if (!pdgEmitManifest)
840
+ return;
777
841
  for (const [table, meta] of pdgEmitManifest.nodeFiles) {
778
- // A collision means a BasicBlock leaked into the in-memory graph during a
779
- // streamed run (streamAllCSVsToDisk then emitted a structural basicblock.csv).
780
- // That is a streaming-invariant violation — fail loudly rather than
781
- // silently overwrite one CSV with the other and drop its rows (#2202 review #3).
782
- if (csvResult.nodeFiles.has(table)) {
842
+ if (nodeFilesMap.has(table)) {
783
843
  throw new Error(`Streaming PDG manifest collides with a structural node CSV for "${table}" — ` +
784
844
  `the in-memory graph should hold zero ${table} nodes when streaming. ` +
785
845
  `A ${table} node leaked into the graph during a streamed emit.`);
786
846
  }
787
- csvResult.nodeFiles.set(table, meta);
847
+ nodeFilesMap.set(table, meta);
788
848
  }
849
+ };
850
+ // Node COPY is the only DB write that can overlap relationship CSV emit: the
851
+ // rel pass writes new rel_*.csv files and never touches `conn`, while node COPY
852
+ // uses `conn` and never touches the rel files. We start node COPY at the
853
+ // node-phase boundary and let the rel pass run concurrently — the only
854
+ // single-writer-safe parallelism (#2203). The rel COPY still waits for node
855
+ // COPY (FK precondition), so the DB load order is unchanged.
856
+ let nodeCopyPromise;
857
+ let nodeCopyError;
858
+ const beginNodeCopy = (nodeFilesMap) => {
859
+ mergeManifestNodeFiles(nodeFilesMap);
860
+ const entries = [...nodeFilesMap.entries()];
861
+ // copyNodeCSVs logs node progress as step/total; it processes only node
862
+ // tables (the rel COPY has its own "Loading edges" progress line), so the
863
+ // denominator is the node-table count — not +1 reserving a rel step.
864
+ // .catch captures the failure so an overlapped (mid-emit) rejection cannot
865
+ // surface as an unhandled rejection; it is rethrown at the FK barrier below.
866
+ nodeCopyPromise = copyNodeCSVs(writeConn, entries, log, entries.length).catch((e) => {
867
+ nodeCopyError = e;
868
+ });
869
+ };
870
+ log('Streaming CSVs to disk...');
871
+ let csvResult;
872
+ try {
873
+ csvResult = SERIAL
874
+ ? await streamAllCSVsToDisk(graph, repoPath, csvDir)
875
+ : await streamAllCSVsToDisk(graph, repoPath, csvDir, beginNodeCopy);
876
+ }
877
+ catch (emitErr) {
878
+ // Relationship emit failed. In overlap mode a node COPY may be in flight —
879
+ // settle it (the .catch above means this never rejects) before rethrowing so
880
+ // it cannot leak as an unhandled rejection.
881
+ if (nodeCopyPromise)
882
+ await nodeCopyPromise;
883
+ // If node COPY ALSO failed, emitErr wins the throw — log the swallowed node
884
+ // error so a half-loaded DB isn't misattributed to the emit failure alone.
885
+ if (nodeCopyError) {
886
+ logger.warn({ err: nodeCopyError }, '[lbug-load] node COPY also failed while relationship emit was failing');
887
+ }
888
+ throw emitErr;
889
+ }
890
+ const tCsv = mark();
891
+ // Merge the streamed PDG-emit per-pair rel CSVs (#2202) into the COPY plan —
892
+ // collision-guarded. Done BEFORE node COPY so the serial escape hatch detects a
893
+ // manifest/structural pair collision before committing any node rows (legacy
894
+ // parity with the pre-overlap path), and the overlap path detects it as early
895
+ // as csvResult is available. When a manifest is present, streaming was on and
896
+ // the in-memory graph held zero BasicBlocks, so a structural collision means a
897
+ // streaming-invariant violation — fail loudly rather than load corrupt data.
898
+ if (pdgEmitManifest) {
789
899
  for (const [pairKey, meta] of pdgEmitManifest.relsByPair) {
790
900
  if (csvResult.relsByPair.has(pairKey)) {
791
901
  throw new Error(`Streaming PDG manifest collides with a structural relationship CSV for pair ` +
@@ -795,30 +905,17 @@ pdgEmitManifest) => {
795
905
  csvResult.totalValidRels += meta.rows;
796
906
  }
797
907
  }
798
- const tCsv = mark();
799
- const validTables = new Set(NODE_TABLES);
800
- // Bulk COPY all node CSVs (sequential — LadybugDB allows only one write txn at a time)
801
- const nodeFiles = [...csvResult.nodeFiles.entries()];
802
- const totalSteps = nodeFiles.length + 1; // +1 for relationships
803
- let stepsDone = 0;
804
- for (const [table, { csvPath, rows }] of nodeFiles) {
805
- stepsDone++;
806
- log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`);
807
- const normalizedPath = normalizeCopyPath(csvPath);
808
- const copyQuery = getCopyQuery(table, normalizedPath);
809
- try {
810
- await queryAndDrain(conn, copyQuery);
811
- }
812
- catch (err) {
813
- try {
814
- const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
815
- await queryAndDrain(conn, retryQuery);
816
- }
817
- catch (retryErr) {
818
- const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
819
- throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
820
- }
821
- }
908
+ // Serial path: all CSVs are on disk and node COPY has not started — start it
909
+ // here so the barrier below blocks on it exactly as the legacy path did.
910
+ if (SERIAL)
911
+ beginNodeCopy(csvResult.nodeFiles);
912
+ // FK barrier: node rows must exist before the relationship COPY resolves their
913
+ // endpoints. In overlap mode most of node COPY was hidden behind rel emit, so
914
+ // this await is the *residual* node-COPY time (≈0 when fully overlapped).
915
+ if (nodeCopyPromise)
916
+ await nodeCopyPromise;
917
+ if (nodeCopyError) {
918
+ throw nodeCopyError instanceof Error ? nodeCopyError : new Error(String(nodeCopyError));
822
919
  }
823
920
  const tCopyNodes = mark();
824
921
  // Bulk COPY relationships. They were already routed to per-FROM→TO-label-pair
@@ -838,25 +935,17 @@ pdgEmitManifest) => {
838
935
  pairIdx++;
839
936
  const [fromLabel, toLabel] = pairKey.split('|');
840
937
  const normalizedPath = normalizeCopyPath(pairCsvPath);
938
+ // PARALLEL=false is load-bearing here too — see COPY_CSV_OPTS (#2203 / kuzudb/kuzu#5778).
841
939
  const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
842
940
  if (pairIdx % 5 === 0 || rows > 1000) {
843
941
  log(`Loading edges: ${pairIdx}/${relsByPair.size} types (${fromLabel} -> ${toLabel})`);
844
942
  }
845
- try {
846
- await queryAndDrain(conn, copyQuery);
847
- }
848
- catch (err) {
849
- try {
850
- const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
851
- await queryAndDrain(conn, retryQuery);
852
- }
853
- catch (retryErr) {
854
- const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
855
- warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
856
- failedPairEdges += rows;
857
- failedPairCsvPaths.add(pairCsvPath);
858
- }
859
- }
943
+ await copyCsvWithRetry(conn, copyQuery, (retryErr) => {
944
+ const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
945
+ warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
946
+ failedPairEdges += rows;
947
+ failedPairCsvPaths.add(pairCsvPath);
948
+ });
860
949
  // Only delete if not in failedPairCsvPaths (needed for fallback)
861
950
  if (!failedPairCsvPaths.has(pairCsvPath)) {
862
951
  try {
@@ -919,7 +1008,12 @@ pdgEmitManifest) => {
919
1008
  let totalNodeRows = 0;
920
1009
  for (const [, { rows }] of csvResult.nodeFiles)
921
1010
  totalNodeRows += rows;
922
- logger.warn(`[lbug-load prof] csv-emit=${span(tStart, tCsv)}ms ` +
1011
+ // `mode` records which load path ran. In overlap mode `csv-emit` is the wall
1012
+ // to streamAllCSVsToDisk's return (node COPY overlapped part of it) and
1013
+ // `copy-nodes` is the RESIDUAL node-COPY await after emit returned — it
1014
+ // trends to 0 as the overlap hides node COPY behind relationship emit. In
1015
+ // serial mode the buckets carry their legacy, disjoint meaning.
1016
+ logger.warn(`[lbug-load prof] mode=${SERIAL ? 'serial' : 'overlap'} csv-emit=${span(tStart, tCsv)}ms ` +
923
1017
  `copy-nodes=${span(tCsv, tCopyNodes)}ms copy-rels=${span(tCopyNodes, tCopyRels)}ms ` +
924
1018
  `fallback=${span(tCopyRels, tFallback)}ms total=${span(tStart, tEnd)}ms ` +
925
1019
  `(${totalNodeRows} nodes, ${insertedRels} rels)`);
@@ -930,7 +1024,18 @@ pdgEmitManifest) => {
930
1024
  // Source code content is full of backslashes which confuse the auto-detection.
931
1025
  // We MUST explicitly set ESCAPE='"' to use RFC 4180 escaping, and disable auto_detect to prevent
932
1026
  // LadybugDB from overriding our settings based on sample rows.
933
- const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
1027
+ //
1028
+ // PARALLEL=false IS LOAD-BEARING FOR CORRECTNESS — DO NOT FLIP IT (#2203).
1029
+ // LadybugDB's parallel CSV reader (Kuzu-derived; default PARALLEL=true) splits the
1030
+ // file into byte ranges parsed concurrently, and CANNOT determine line boundaries
1031
+ // when a quoted field contains an embedded newline — it errors with "Quoted newlines
1032
+ // are not supported in parallel CSV reader. Please specify PARALLEL=FALSE", or worse,
1033
+ // mis-parses silently (upstream kuzudb/kuzu#5778, still open). Our `content`/`text`
1034
+ // columns hold source code, so quoted multiline fields are guaranteed. PARALLEL=false
1035
+ // is therefore required, not conservative. The multiline-quoted round-trip in
1036
+ // test/integration/copy-parallel-invariant.test.ts fails loudly if this is ever flipped.
1037
+ // Exported so that test asserts the invariant statically as well.
1038
+ export const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
934
1039
  // Multi-language table names that were created with backticks in CODE_ELEMENT_BASE
935
1040
  // and must always be referenced with backticks in queries
936
1041
  const BACKTICK_TABLES = new Set([
@@ -996,7 +1101,7 @@ const TABLES_WITH_EXPORTED = new Set([
996
1101
  'Method',
997
1102
  'CodeElement',
998
1103
  ]);
999
- const getCopyQuery = (table, filePath) => {
1104
+ export const getCopyQuery = (table, filePath) => {
1000
1105
  const t = escapeTableName(table);
1001
1106
  if (table === 'File') {
1002
1107
  return `COPY ${t}(id, name, filePath, content) FROM "${filePath}" ${COPY_CSV_OPTS}`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.8-rc.46",
3
+ "version": "1.6.8-rc.48",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",