gitnexus 1.6.8-rc.46 → 1.6.8-rc.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/scope-resolution/symbol-definition.d.ts +2 -0
- package/dist/_shared/scope-resolution/symbol-definition.d.ts.map +1 -1
- package/dist/core/ingestion/languages/cpp/arity-metadata.js +35 -0
- package/dist/core/ingestion/languages/cpp/captures.js +24 -0
- package/dist/core/ingestion/languages/cpp/conversion-rank.d.ts +2 -0
- package/dist/core/ingestion/languages/cpp/conversion-rank.js +89 -0
- package/dist/core/ingestion/languages/cpp/inline-namespaces.js +7 -2
- package/dist/core/ingestion/languages/cpp/member-lookup.js +2 -1
- package/dist/core/ingestion/languages/cpp/scope-resolver.js +2 -1
- package/dist/core/ingestion/scope-extractor.js +9 -2
- package/dist/core/ingestion/scope-resolution/contract/scope-resolver.d.ts +9 -0
- package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.d.ts +3 -1
- package/dist/core/ingestion/scope-resolution/passes/free-call-fallback.js +9 -2
- package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.d.ts +4 -0
- package/dist/core/ingestion/scope-resolution/passes/overload-narrowing.js +9 -0
- package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.d.ts +1 -1
- package/dist/core/ingestion/scope-resolution/passes/receiver-bound-calls.js +4 -0
- package/dist/core/ingestion/scope-resolution/pipeline/run.js +1 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +21 -0
- package/dist/core/lbug/csv-generator.d.ts +18 -1
- package/dist/core/lbug/csv-generator.js +60 -25
- package/dist/core/lbug/lbug-adapter.d.ts +15 -0
- package/dist/core/lbug/lbug-adapter.js +162 -57
- package/package.json +1 -1
|
@@ -18,6 +18,8 @@ export interface ParameterTypeClass {
|
|
|
18
18
|
indirection: 'value' | 'lvalue-ref' | 'rvalue-ref' | 'pointer' | 'unknown';
|
|
19
19
|
/** Number of pointer markers when indirection is `pointer`; otherwise 0. */
|
|
20
20
|
pointerDepth: number;
|
|
21
|
+
/** Normalized top-level template arguments, when a language preserves them. */
|
|
22
|
+
templateArguments?: string[];
|
|
21
23
|
}
|
|
22
24
|
export interface SymbolDefinition {
|
|
23
25
|
nodeId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"symbol-definition.d.ts","sourceRoot":"","sources":["../../src/scope-resolution/symbol-definition.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,wFAAwF;IACxF,IAAI,EAAE,MAAM,CAAC;IACb,8EAA8E;IAC9E,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,UAAU,GAAG,gBAAgB,GAAG,SAAS,CAAC;IACjE,4CAA4C;IAC5C,WAAW,EAAE,OAAO,GAAG,YAAY,GAAG,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3E,4EAA4E;IAC5E,YAAY,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"symbol-definition.d.ts","sourceRoot":"","sources":["../../src/scope-resolution/symbol-definition.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,wFAAwF;IACxF,IAAI,EAAE,MAAM,CAAC;IACb,8EAA8E;IAC9E,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,UAAU,GAAG,gBAAgB,GAAG,SAAS,CAAC;IACjE,4CAA4C;IAC5C,WAAW,EAAE,OAAO,GAAG,YAAY,GAAG,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3E,4EAA4E;IAC5E,YAAY,EAAE,MAAM,CAAC;IACrB,+EAA+E;IAC/E,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,SAAS,CAAC;IAChB;;+FAE2F;IAC3F,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;iHAC6G;IAC7G,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC;uFACmF;IACnF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B;4FACwF;IACxF,oBAAoB,CAAC,EAAE,kBAAkB,EAAE,CAAC;IAC5C,6EAA6E;IAC7E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gGAAgG;IAChG,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gGAAgG;IAChG,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;;;;;yEAKqE;IACrE,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B;;4DAEwD;IACxD,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;mEAE+D;IAC/D,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,4EAA4E;IAC5E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;iEAO6D;IAC7D,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B"}
|
|
@@ -173,6 +173,7 @@ export function classifyCppParameterType(rawType, declaratorText, fullParameterT
|
|
|
173
173
|
cv,
|
|
174
174
|
indirection,
|
|
175
175
|
pointerDepth,
|
|
176
|
+
...templateArgumentsFor(`${source} ${rawType} ${declaratorText ?? ''}`),
|
|
176
177
|
};
|
|
177
178
|
}
|
|
178
179
|
function unknownTypeClass(base) {
|
|
@@ -183,6 +184,40 @@ function unknownTypeClass(base) {
|
|
|
183
184
|
pointerDepth: 0,
|
|
184
185
|
};
|
|
185
186
|
}
|
|
187
|
+
function templateArgumentsFor(rawType) {
|
|
188
|
+
const args = parseTopLevelTemplateArguments(rawType);
|
|
189
|
+
return args === undefined ? {} : { templateArguments: args };
|
|
190
|
+
}
|
|
191
|
+
function parseTopLevelTemplateArguments(rawType) {
|
|
192
|
+
const start = rawType.indexOf('<');
|
|
193
|
+
if (start < 0)
|
|
194
|
+
return undefined;
|
|
195
|
+
const args = [];
|
|
196
|
+
let depth = 0;
|
|
197
|
+
let argStart = start + 1;
|
|
198
|
+
for (let i = start + 1; i < rawType.length; i++) {
|
|
199
|
+
const ch = rawType[i];
|
|
200
|
+
if (ch === '<') {
|
|
201
|
+
depth++;
|
|
202
|
+
}
|
|
203
|
+
else if (ch === '>') {
|
|
204
|
+
if (depth === 0) {
|
|
205
|
+
const finalArg = rawType.slice(argStart, i).trim();
|
|
206
|
+
if (finalArg.length > 0)
|
|
207
|
+
args.push(normalizeCppParamType(finalArg));
|
|
208
|
+
return args.length > 0 ? args : undefined;
|
|
209
|
+
}
|
|
210
|
+
depth--;
|
|
211
|
+
}
|
|
212
|
+
else if (ch === ',' && depth === 0) {
|
|
213
|
+
const arg = rawType.slice(argStart, i).trim();
|
|
214
|
+
if (arg.length > 0)
|
|
215
|
+
args.push(normalizeCppParamType(arg));
|
|
216
|
+
argStart = i + 1;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return undefined;
|
|
220
|
+
}
|
|
186
221
|
function findFuncDeclarator(node) {
|
|
187
222
|
let decl = node.childForFieldName('declarator');
|
|
188
223
|
if (decl === null) {
|
|
@@ -11,6 +11,7 @@ import { markCppAdlSiteArgs, markCppAdlSiteNoAdl } from './adl.js';
|
|
|
11
11
|
import { markCppInlineNamespaceRange } from './inline-namespaces.js';
|
|
12
12
|
import { extractCppTemplateConstraints } from './constraint-extractor.js';
|
|
13
13
|
import { captureCppMemberLookupFacts } from './member-lookup.js';
|
|
14
|
+
import { CPP_BRACED_INIT_TYPE_PREFIX } from './conversion-rank.js';
|
|
14
15
|
export function emitCppScopeCaptures(sourceText, filePath, cachedTree) {
|
|
15
16
|
let tree = cachedTree;
|
|
16
17
|
if (tree === undefined) {
|
|
@@ -906,6 +907,8 @@ function unknownTypeClass(base) {
|
|
|
906
907
|
*/
|
|
907
908
|
function inferCppLiteralType(node) {
|
|
908
909
|
switch (node.type) {
|
|
910
|
+
case 'initializer_list':
|
|
911
|
+
return inferCppBracedInitType(node);
|
|
909
912
|
case 'number_literal': {
|
|
910
913
|
const text = node.text;
|
|
911
914
|
// Floating-point literals contain '.', 'e', 'E', or end with 'f'/'F'
|
|
@@ -934,6 +937,27 @@ function inferCppLiteralType(node) {
|
|
|
934
937
|
return '';
|
|
935
938
|
}
|
|
936
939
|
}
|
|
940
|
+
function inferCppBracedInitType(node) {
|
|
941
|
+
const elementTypes = [];
|
|
942
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
943
|
+
const child = node.child(i);
|
|
944
|
+
if (child === null)
|
|
945
|
+
continue;
|
|
946
|
+
if (child.type === ',' || child.type === '{' || child.type === '}')
|
|
947
|
+
continue;
|
|
948
|
+
const elementType = inferCppLiteralType(child);
|
|
949
|
+
if (elementType === '' || elementType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX)) {
|
|
950
|
+
return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length + 1}`;
|
|
951
|
+
}
|
|
952
|
+
elementTypes.push(elementType);
|
|
953
|
+
}
|
|
954
|
+
if (elementTypes.length === 0)
|
|
955
|
+
return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:0`;
|
|
956
|
+
const first = elementTypes[0];
|
|
957
|
+
return elementTypes.every((type) => type === first)
|
|
958
|
+
? `${CPP_BRACED_INIT_TYPE_PREFIX}${first}:${elementTypes.length}`
|
|
959
|
+
: `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length}`;
|
|
960
|
+
}
|
|
937
961
|
/**
|
|
938
962
|
* Look up the declared type of a variable by scanning sibling declarations
|
|
939
963
|
* in the enclosing compound_statement (function body). Handles:
|
|
@@ -20,6 +20,8 @@
|
|
|
20
20
|
* their own `ConversionRankFn` in the future.
|
|
21
21
|
*/
|
|
22
22
|
import type { ParameterTypeClass } from '../../../../_shared/index.js';
|
|
23
|
+
export declare const CPP_BRACED_INIT_TYPE_PREFIX = "braced-init:";
|
|
24
|
+
export declare const CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES: readonly ["braced-init:"];
|
|
23
25
|
/**
|
|
24
26
|
* Return the conversion rank from `argType` to `paramType`.
|
|
25
27
|
*
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
* This function is intentionally C++-specific. Other languages may define
|
|
20
20
|
* their own `ConversionRankFn` in the future.
|
|
21
21
|
*/
|
|
22
|
+
import { normalizeCppParamType } from './arity-metadata.js';
|
|
22
23
|
import { hasCppUserDefinedConversion } from './user-defined-conversions.js';
|
|
23
24
|
/** Set of normalized arithmetic types that support implicit conversion. */
|
|
24
25
|
const ARITHMETIC = new Set(['int', 'double', 'char', 'bool']);
|
|
@@ -27,6 +28,22 @@ const INTEGRAL_PROMOTION = new Map([
|
|
|
27
28
|
['char', 'int'],
|
|
28
29
|
['bool', 'int'],
|
|
29
30
|
]);
|
|
31
|
+
export const CPP_BRACED_INIT_TYPE_PREFIX = 'braced-init:';
|
|
32
|
+
export const CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES = [CPP_BRACED_INIT_TYPE_PREFIX];
|
|
33
|
+
const BRACED_INIT_CONTAINER_TYPES = new Set([
|
|
34
|
+
'array',
|
|
35
|
+
'deque',
|
|
36
|
+
'list',
|
|
37
|
+
'set',
|
|
38
|
+
'std::array',
|
|
39
|
+
'std::deque',
|
|
40
|
+
'std::list',
|
|
41
|
+
'std::set',
|
|
42
|
+
'std::unordered_set',
|
|
43
|
+
'std::vector',
|
|
44
|
+
'unordered_set',
|
|
45
|
+
'vector',
|
|
46
|
+
]);
|
|
30
47
|
/**
|
|
31
48
|
* Return the conversion rank from `argType` to `paramType`.
|
|
32
49
|
*
|
|
@@ -36,6 +53,17 @@ const INTEGRAL_PROMOTION = new Map([
|
|
|
36
53
|
* for mismatch.
|
|
37
54
|
*/
|
|
38
55
|
export function cppConversionRank(argType, paramType, argTypeClass, paramTypeClass) {
|
|
56
|
+
const bracedInitType = parseBracedInitArgType(argType);
|
|
57
|
+
if (bracedInitType !== undefined) {
|
|
58
|
+
if (bracedInitType.elementType === 'unknown')
|
|
59
|
+
return Infinity;
|
|
60
|
+
if (bracedInitType.elementCount === 1) {
|
|
61
|
+
const scalarRank = cppConversionRank(bracedInitType.elementType, paramType, undefined, paramTypeClass);
|
|
62
|
+
if (isFinite(scalarRank))
|
|
63
|
+
return scalarRank;
|
|
64
|
+
}
|
|
65
|
+
return bracedInitConversionRank(paramType, bracedInitType, paramTypeClass);
|
|
66
|
+
}
|
|
39
67
|
if (argType === paramType) {
|
|
40
68
|
return exactShapeCompatible(argTypeClass, paramTypeClass) ? 0 : Infinity;
|
|
41
69
|
}
|
|
@@ -57,6 +85,67 @@ export function cppConversionRank(argType, paramType, argTypeClass, paramTypeCla
|
|
|
57
85
|
return 4;
|
|
58
86
|
return Infinity;
|
|
59
87
|
}
|
|
88
|
+
function parseBracedInitArgType(argType) {
|
|
89
|
+
if (!argType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX))
|
|
90
|
+
return undefined;
|
|
91
|
+
const payload = argType.slice(CPP_BRACED_INIT_TYPE_PREFIX.length);
|
|
92
|
+
if (payload === '')
|
|
93
|
+
return undefined;
|
|
94
|
+
const separator = payload.lastIndexOf(':');
|
|
95
|
+
if (separator > 0) {
|
|
96
|
+
const countText = payload.slice(separator + 1);
|
|
97
|
+
if (/^\d+$/.test(countText)) {
|
|
98
|
+
return {
|
|
99
|
+
elementType: payload.slice(0, separator),
|
|
100
|
+
elementCount: Number(countText),
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return { elementType: payload };
|
|
105
|
+
}
|
|
106
|
+
function bracedInitConversionRank(paramType, argType, paramTypeClass) {
|
|
107
|
+
const targetBase = bracedInitTargetBase(paramType);
|
|
108
|
+
if (targetBase === 'initializer_list' || targetBase === 'std::initializer_list') {
|
|
109
|
+
return bracedInitValueTypeMatches(paramType, argType, paramTypeClass) ? 0 : Infinity;
|
|
110
|
+
}
|
|
111
|
+
if (BRACED_INIT_CONTAINER_TYPES.has(targetBase)) {
|
|
112
|
+
return bracedInitValueTypeMatches(paramType, argType, paramTypeClass) ? 4 : Infinity;
|
|
113
|
+
}
|
|
114
|
+
return Infinity;
|
|
115
|
+
}
|
|
116
|
+
function bracedInitValueTypeMatches(paramType, argType, paramTypeClass) {
|
|
117
|
+
const valueType = bracedInitTargetValueType(paramType, paramTypeClass);
|
|
118
|
+
if (valueType === undefined)
|
|
119
|
+
return false;
|
|
120
|
+
return isFinite(cppConversionRank(argType.elementType, valueType));
|
|
121
|
+
}
|
|
122
|
+
function bracedInitTargetValueType(paramType, paramTypeClass) {
|
|
123
|
+
return firstTemplateArgument(paramType) ?? paramTypeClass?.templateArguments?.[0];
|
|
124
|
+
}
|
|
125
|
+
function firstTemplateArgument(rawType) {
|
|
126
|
+
const start = rawType.indexOf('<');
|
|
127
|
+
if (start < 0)
|
|
128
|
+
return undefined;
|
|
129
|
+
let depth = 0;
|
|
130
|
+
for (let i = start + 1; i < rawType.length; i++) {
|
|
131
|
+
const ch = rawType[i];
|
|
132
|
+
if (ch === '<') {
|
|
133
|
+
depth++;
|
|
134
|
+
}
|
|
135
|
+
else if (ch === '>') {
|
|
136
|
+
if (depth === 0)
|
|
137
|
+
return bracedInitTargetBase(rawType.slice(start + 1, i));
|
|
138
|
+
depth--;
|
|
139
|
+
}
|
|
140
|
+
else if (ch === ',' && depth === 0) {
|
|
141
|
+
return bracedInitTargetBase(rawType.slice(start + 1, i));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return undefined;
|
|
145
|
+
}
|
|
146
|
+
function bracedInitTargetBase(paramType) {
|
|
147
|
+
return normalizeCppParamType(paramType);
|
|
148
|
+
}
|
|
60
149
|
function isPointer(typeClass) {
|
|
61
150
|
return typeClass?.indirection === 'pointer' && typeClass.pointerDepth > 0;
|
|
62
151
|
}
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
* declaration transparently.
|
|
28
28
|
*/
|
|
29
29
|
import { isOverloadAmbiguousAfterNormalization, narrowOverloadCandidates, } from '../../scope-resolution/passes/overload-narrowing.js';
|
|
30
|
-
import { cppConversionRank } from './conversion-rank.js';
|
|
30
|
+
import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
|
|
31
31
|
const inlineNamespaceRangesByFile = new Map();
|
|
32
32
|
const inlineNamespaceScopeIds = new Set();
|
|
33
33
|
function rangeKey(r) {
|
|
@@ -142,7 +142,12 @@ export function resolveCppQualifiedNamespaceMember(receiverName, memberName, par
|
|
|
142
142
|
// can disambiguate via exact-type match and, when available, conversion-rank
|
|
143
143
|
// scoring (`cppConversionRank`). Same-signature ambiguity is still detected
|
|
144
144
|
// by `isOverloadAmbiguousAfterNormalization` below.
|
|
145
|
-
const narrowed = narrowOverloadCandidates(allHits, callsite?.arity, callsite?.argumentTypes, callsite !== undefined
|
|
145
|
+
const narrowed = narrowOverloadCandidates(allHits, callsite?.arity, callsite?.argumentTypes, callsite !== undefined
|
|
146
|
+
? {
|
|
147
|
+
conversionRankFn: cppConversionRank,
|
|
148
|
+
conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
|
|
149
|
+
}
|
|
150
|
+
: undefined);
|
|
146
151
|
if (narrowed.length === 1)
|
|
147
152
|
return narrowed[0];
|
|
148
153
|
if (narrowed.length === 0)
|
|
@@ -3,7 +3,7 @@ import { buildMro, defaultLinearize } from '../../scope-resolution/passes/mro.js
|
|
|
3
3
|
import { isOverloadAmbiguousAfterNormalization, narrowOverloadCandidates, } from '../../scope-resolution/passes/overload-narrowing.js';
|
|
4
4
|
import { isClassLike } from '../../scope-resolution/scope/walkers.js';
|
|
5
5
|
import { cppConstraintCompatibility } from './constraint-filter.js';
|
|
6
|
-
import { cppConversionRank } from './conversion-rank.js';
|
|
6
|
+
import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
|
|
7
7
|
const capturedByFile = new Map();
|
|
8
8
|
let directParentsByDefId = new Map();
|
|
9
9
|
let virtualEdges = new Set();
|
|
@@ -193,6 +193,7 @@ function chooseOverload(candidates, callsite) {
|
|
|
193
193
|
const narrowed = narrowOverloadCandidates(candidates, callsite.arity, callsite.argumentTypes, {
|
|
194
194
|
argumentTypeClasses: callsite.argumentTypeClasses,
|
|
195
195
|
conversionRankFn: cppConversionRank,
|
|
196
|
+
conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
|
|
196
197
|
constraintCompatibility: cppConstraintCompatibility,
|
|
197
198
|
});
|
|
198
199
|
if (narrowed.length === 1)
|
|
@@ -3,7 +3,7 @@ import { SupportedLanguages } from '../../../../_shared/index.js';
|
|
|
3
3
|
import { populateClassOwnedMembers, tagNamespacePrefixes, } from '../../scope-resolution/scope/walkers.js';
|
|
4
4
|
import { cppProvider } from '../c-cpp.js';
|
|
5
5
|
import { cppArityCompatibility } from './arity.js';
|
|
6
|
-
import { cppConversionRank } from './conversion-rank.js';
|
|
6
|
+
import { CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES, cppConversionRank } from './conversion-rank.js';
|
|
7
7
|
import { cppMergeBindings } from './merge-bindings.js';
|
|
8
8
|
import { resolveCppImportTarget } from './import-target.js';
|
|
9
9
|
import { scanCppHeaderFiles } from './header-scan.js';
|
|
@@ -205,6 +205,7 @@ export const cppScopeResolver = {
|
|
|
205
205
|
// Disambiguates `f(int)` vs `f(double)` called with `f(2.5)` by scoring
|
|
206
206
|
// each candidate's conversion cost; exact match wins over standard conversion.
|
|
207
207
|
conversionRankFn: cppConversionRank,
|
|
208
|
+
conversionOnlyArgTypePrefixes: CPP_CONVERSION_ONLY_ARG_TYPE_PREFIXES,
|
|
208
209
|
// Range-for element type inference: for (auto& user : users) → bind user to User
|
|
209
210
|
populateRangeBindings: populateCppRangeBindings,
|
|
210
211
|
// C++ method return-type bindings need to be visible from module scope
|
|
@@ -477,12 +477,19 @@ function parseJsonParameterTypeClassesCapture(cap) {
|
|
|
477
477
|
if (typeof o.pointerDepth !== 'number' || !Number.isFinite(o.pointerDepth)) {
|
|
478
478
|
return undefined;
|
|
479
479
|
}
|
|
480
|
-
|
|
480
|
+
const shape = {
|
|
481
481
|
base: o.base,
|
|
482
482
|
cv: o.cv,
|
|
483
483
|
indirection: o.indirection,
|
|
484
484
|
pointerDepth: o.pointerDepth,
|
|
485
|
-
}
|
|
485
|
+
};
|
|
486
|
+
if (Array.isArray(o.templateArguments)) {
|
|
487
|
+
if (!o.templateArguments.every((x) => typeof x === 'string')) {
|
|
488
|
+
return undefined;
|
|
489
|
+
}
|
|
490
|
+
shape.templateArguments = [...o.templateArguments];
|
|
491
|
+
}
|
|
492
|
+
out.push(shape);
|
|
486
493
|
}
|
|
487
494
|
return out;
|
|
488
495
|
}
|
|
@@ -618,6 +618,15 @@ export interface ScopeResolver {
|
|
|
618
618
|
* `cppConversionRank`; other languages define their own if needed.
|
|
619
619
|
*/
|
|
620
620
|
readonly conversionRankFn?: ConversionRankFn;
|
|
621
|
+
/**
|
|
622
|
+
* Optional per-language argument-type prefixes for conversion-only
|
|
623
|
+
* argument sentinels. When ranking cannot find any viable candidate
|
|
624
|
+
* for a multi-overload set containing one of these sentinels, shared
|
|
625
|
+
* narrowing suppresses the ambiguous set instead of falling back to
|
|
626
|
+
* arity-only candidates. Languages without such sentinels leave this
|
|
627
|
+
* undefined.
|
|
628
|
+
*/
|
|
629
|
+
readonly conversionOnlyArgTypePrefixes?: readonly string[];
|
|
621
630
|
/**
|
|
622
631
|
* Optional predicate to identify definitions with file-local linkage
|
|
623
632
|
* (e.g. C `static` functions). When provided, `pickUniqueGlobalCallable`
|
|
@@ -49,6 +49,7 @@ export declare function emitFreeCallFallback(graph: KnowledgeGraph, scopes: Scop
|
|
|
49
49
|
};
|
|
50
50
|
}, callerParsed: ParsedFile, scopes: ScopeResolutionIndexes, parsedFiles: readonly ParsedFile[]) => readonly SymbolDefinition[] | undefined;
|
|
51
51
|
readonly conversionRankFn?: ConversionRankFn;
|
|
52
|
+
readonly conversionOnlyArgTypePrefixes?: readonly string[];
|
|
52
53
|
/** Optional per-language constraint hook threaded into
|
|
53
54
|
* `narrowOverloadCandidates`. Drops candidates whose template
|
|
54
55
|
* constraints (e.g. C++ `enable_if_t`, C++20 `requires`) provably
|
|
@@ -99,7 +100,7 @@ export declare function buildGlobalClassIndex(scopes: ScopeResolutionIndexes): R
|
|
|
99
100
|
* order. Exported for unit testing — the `scopeDefsCache` equivalence is
|
|
100
101
|
* exercised via synthetic stubs in `pick-unique-global-callable.test.ts`.
|
|
101
102
|
*/
|
|
102
|
-
export declare function pickUniqueGlobalCallable(name: string, model: SemanticModel, globalCallablesBySimpleName: ReadonlyMap<string, readonly SymbolDefinition[]>, callerFilePath: string, isFileLocalDef?: (def: SymbolDefinition) => boolean, callArity?: number, isCallerVisible?: (candidate: SymbolDefinition) => boolean, callArgTypes?: readonly string[], callArgTypeClasses?: readonly ParameterTypeClass[], conversionRankFn?: ConversionRankFn, scopeDefsCache?: Map<string, readonly SymbolDefinition[]
|
|
103
|
+
export declare function pickUniqueGlobalCallable(name: string, model: SemanticModel, globalCallablesBySimpleName: ReadonlyMap<string, readonly SymbolDefinition[]>, callerFilePath: string, isFileLocalDef?: (def: SymbolDefinition) => boolean, callArity?: number, isCallerVisible?: (candidate: SymbolDefinition) => boolean, callArgTypes?: readonly string[], callArgTypeClasses?: readonly ParameterTypeClass[], conversionRankFn?: ConversionRankFn, scopeDefsCache?: Map<string, readonly SymbolDefinition[]>, conversionOnlyArgTypePrefixes?: readonly string[]): SymbolDefinition | undefined;
|
|
103
104
|
/** Find a unique workspace-wide class-like def by simple name, for a
|
|
104
105
|
* constructor-form call `Type(...)` whose type lives outside the call
|
|
105
106
|
* site's lexical bindings (a sibling/imported file). Returns the def
|
|
@@ -142,5 +143,6 @@ export declare function pickImplicitThisOverload(site: {
|
|
|
142
143
|
readonly argumentTypeClasses?: readonly import('../../../../_shared/index.js').ParameterTypeClass[];
|
|
143
144
|
}, scopes: ScopeResolutionIndexes, workspaceIndex: WorkspaceResolutionIndex, model: SemanticModel, hookCtx?: {
|
|
144
145
|
readonly conversionRankFn?: ConversionRankFn;
|
|
146
|
+
readonly conversionOnlyArgTypePrefixes?: readonly string[];
|
|
145
147
|
readonly constraintCompatibility?: ScopeResolver['constraintCompatibility'];
|
|
146
148
|
}): SymbolDefinition | undefined;
|
|
@@ -89,6 +89,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
|
|
|
89
89
|
if (fnDef === undefined) {
|
|
90
90
|
fnDef = pickImplicitThisOverload(site, scopes, workspaceIndex, model, {
|
|
91
91
|
conversionRankFn: options.conversionRankFn,
|
|
92
|
+
conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
|
|
92
93
|
constraintCompatibility: options.constraintCompatibility,
|
|
93
94
|
});
|
|
94
95
|
fnDefFromImplicitThis = fnDef !== undefined;
|
|
@@ -112,6 +113,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
|
|
|
112
113
|
const narrowed = narrowOverloadCandidates(allCallables, site.arity, site.argumentTypes, {
|
|
113
114
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
114
115
|
conversionRankFn: options.conversionRankFn,
|
|
116
|
+
conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
|
|
115
117
|
constraintCompatibility: options.constraintCompatibility,
|
|
116
118
|
});
|
|
117
119
|
if (narrowed.length === 1) {
|
|
@@ -190,6 +192,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
|
|
|
190
192
|
const narrowed = narrowOverloadCandidates(ordinary, site.arity, site.argumentTypes, {
|
|
191
193
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
192
194
|
conversionRankFn: options.conversionRankFn,
|
|
195
|
+
conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
|
|
193
196
|
constraintCompatibility: options.constraintCompatibility,
|
|
194
197
|
});
|
|
195
198
|
if (narrowed.length === 1) {
|
|
@@ -240,6 +243,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
|
|
|
240
243
|
const narrowed = narrowOverloadCandidates(merged, site.arity, site.argumentTypes, {
|
|
241
244
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
242
245
|
conversionRankFn: options.conversionRankFn,
|
|
246
|
+
conversionOnlyArgTypePrefixes: options.conversionOnlyArgTypePrefixes,
|
|
243
247
|
constraintCompatibility: options.constraintCompatibility,
|
|
244
248
|
});
|
|
245
249
|
if (narrowed.length === 1) {
|
|
@@ -285,7 +289,7 @@ export function emitFreeCallFallback(graph, scopes, parsedFiles, nodeLookup, _re
|
|
|
285
289
|
callerScope: site.inScope,
|
|
286
290
|
scopes,
|
|
287
291
|
})
|
|
288
|
-
: undefined, site.argumentTypes, site.argumentTypeClasses, options.conversionRankFn, scopeDefsCache);
|
|
292
|
+
: undefined, site.argumentTypes, site.argumentTypeClasses, options.conversionRankFn, scopeDefsCache, options.conversionOnlyArgTypePrefixes);
|
|
289
293
|
}
|
|
290
294
|
if (fnDef === undefined)
|
|
291
295
|
continue;
|
|
@@ -442,7 +446,7 @@ export function buildGlobalClassIndex(scopes) {
|
|
|
442
446
|
* order. Exported for unit testing — the `scopeDefsCache` equivalence is
|
|
443
447
|
* exercised via synthetic stubs in `pick-unique-global-callable.test.ts`.
|
|
444
448
|
*/
|
|
445
|
-
export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleName, callerFilePath, isFileLocalDef, callArity, isCallerVisible, callArgTypes, callArgTypeClasses, conversionRankFn, scopeDefsCache) {
|
|
449
|
+
export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleName, callerFilePath, isFileLocalDef, callArity, isCallerVisible, callArgTypes, callArgTypeClasses, conversionRankFn, scopeDefsCache, conversionOnlyArgTypePrefixes) {
|
|
446
450
|
// The scope-index candidate list is a pure function of (name, callerFilePath):
|
|
447
451
|
// the same-name bucket is fixed for the pass, the file-local filter depends
|
|
448
452
|
// only on the candidate + callerFilePath, and the logical-key dedup is
|
|
@@ -504,6 +508,7 @@ export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleNam
|
|
|
504
508
|
const narrowed = narrowOverloadCandidates(scopeDefs, callArity, callArgTypes, {
|
|
505
509
|
argumentTypeClasses: callArgTypeClasses,
|
|
506
510
|
conversionRankFn,
|
|
511
|
+
conversionOnlyArgTypePrefixes,
|
|
507
512
|
});
|
|
508
513
|
if (narrowed.length === 1)
|
|
509
514
|
return narrowed[0];
|
|
@@ -545,6 +550,7 @@ export function pickUniqueGlobalCallable(name, model, globalCallablesBySimpleNam
|
|
|
545
550
|
const narrowed = narrowOverloadCandidates(defs, callArity, callArgTypes, {
|
|
546
551
|
argumentTypeClasses: callArgTypeClasses,
|
|
547
552
|
conversionRankFn,
|
|
553
|
+
conversionOnlyArgTypePrefixes,
|
|
548
554
|
});
|
|
549
555
|
if (narrowed.length === 1)
|
|
550
556
|
return narrowed[0];
|
|
@@ -687,6 +693,7 @@ export function pickImplicitThisOverload(site, scopes, workspaceIndex, model, ho
|
|
|
687
693
|
const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
|
|
688
694
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
689
695
|
conversionRankFn: hookCtx?.conversionRankFn,
|
|
696
|
+
conversionOnlyArgTypePrefixes: hookCtx?.conversionOnlyArgTypePrefixes,
|
|
690
697
|
constraintCompatibility: hookCtx?.constraintCompatibility,
|
|
691
698
|
});
|
|
692
699
|
if (candidates.length !== 1)
|
|
@@ -69,6 +69,10 @@ export interface OverloadNarrowingHookCtx {
|
|
|
69
69
|
/** Conversion-rank scoring fallback (step 4b). Engages when the
|
|
70
70
|
* exact-type filter rejects every candidate. */
|
|
71
71
|
readonly conversionRankFn?: ConversionRankFn;
|
|
72
|
+
/** Per-language argument-type prefixes whose conversion-rank failures
|
|
73
|
+
* should suppress genuinely ambiguous multi-overload sets instead of
|
|
74
|
+
* falling back to arity-only candidates. */
|
|
75
|
+
readonly conversionOnlyArgTypePrefixes?: readonly string[];
|
|
72
76
|
/** Constraint filter (step 4c). Drops candidates whose template
|
|
73
77
|
* guards (SFINAE `enable_if_t`, C++20 `requires`, future Rust
|
|
74
78
|
* trait bounds, etc.) provably fail at the call site. Three-valued
|
|
@@ -106,6 +106,10 @@ export function narrowOverloadCandidates(overloads, argCount, argTypes, hookCtx)
|
|
|
106
106
|
const ranked = rankByConversion(candidates, argTypes, hookCtx.conversionRankFn, hookCtx.argumentTypeClasses);
|
|
107
107
|
if (ranked.length > 0)
|
|
108
108
|
result = ranked;
|
|
109
|
+
else if (candidates.length > 1 &&
|
|
110
|
+
hasConversionOnlyArgType(argTypes, hookCtx.conversionOnlyArgTypePrefixes)) {
|
|
111
|
+
result = [];
|
|
112
|
+
}
|
|
109
113
|
}
|
|
110
114
|
}
|
|
111
115
|
// Constraint filter (step 4c; Tier-A — SFINAE / `requires` clauses).
|
|
@@ -145,6 +149,11 @@ export function narrowOverloadCandidates(overloads, argCount, argTypes, hookCtx)
|
|
|
145
149
|
}
|
|
146
150
|
return result;
|
|
147
151
|
}
|
|
152
|
+
function hasConversionOnlyArgType(argTypes, prefixes) {
|
|
153
|
+
if (prefixes === undefined || prefixes.length === 0)
|
|
154
|
+
return false;
|
|
155
|
+
return argTypes.some((type) => prefixes.some((prefix) => type.startsWith(prefix)));
|
|
156
|
+
}
|
|
148
157
|
function exactTypeSlotMatches(argType, paramType, argTypeClass, paramTypeClass) {
|
|
149
158
|
if (argType !== paramType)
|
|
150
159
|
return false;
|
|
@@ -47,7 +47,7 @@ import type { ResolutionOutcomeRecorder } from '../resolution-outcome.js';
|
|
|
47
47
|
/** Subset of `ScopeResolver` consumed by this pass. Accepting the
|
|
48
48
|
* subset rather than the full provider keeps tests and partial
|
|
49
49
|
* refactors lighter — callers only need to populate what we read. */
|
|
50
|
-
type ReceiverBoundProviderSubset = Pick<ScopeResolver, 'isSuperReceiver' | 'isSuperReceiverInContext' | 'fieldFallbackOnMethodLookup' | 'collapseMemberCallsByCallerTarget' | 'unwrapCollectionAccessor' | 'hoistTypeBindingsToModule' | 'resolveQualifiedReceiverMember' | 'resolveReceiverMember' | 'resolveThisViaEnclosingClass' | 'conversionRankFn' | 'constraintCompatibility' | 'isStaticOnly'>;
|
|
50
|
+
type ReceiverBoundProviderSubset = Pick<ScopeResolver, 'isSuperReceiver' | 'isSuperReceiverInContext' | 'fieldFallbackOnMethodLookup' | 'collapseMemberCallsByCallerTarget' | 'unwrapCollectionAccessor' | 'hoistTypeBindingsToModule' | 'resolveQualifiedReceiverMember' | 'resolveReceiverMember' | 'resolveThisViaEnclosingClass' | 'conversionRankFn' | 'conversionOnlyArgTypePrefixes' | 'constraintCompatibility' | 'isStaticOnly'>;
|
|
51
51
|
export declare function emitReceiverBoundCalls(graph: KnowledgeGraph, scopes: ScopeResolutionIndexes, parsedFiles: readonly ParsedFile[], nodeLookup: GraphNodeLookup, handledSites: Set<string>, provider: ReceiverBoundProviderSubset, index: WorkspaceResolutionIndex, model: SemanticModel, options?: {
|
|
52
52
|
readonly recordResolutionOutcome?: ResolutionOutcomeRecorder;
|
|
53
53
|
}): number;
|
|
@@ -352,6 +352,7 @@ export function emitReceiverBoundCalls(graph, scopes, parsedFiles, nodeLookup, h
|
|
|
352
352
|
const narrowed = narrowOverloadCandidates(methodOverloads, site.arity, site.argumentTypes, {
|
|
353
353
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
354
354
|
conversionRankFn: provider.conversionRankFn,
|
|
355
|
+
conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
|
|
355
356
|
constraintCompatibility: provider.constraintCompatibility,
|
|
356
357
|
});
|
|
357
358
|
if (isOverloadAmbiguousAfterNormalization(narrowed, site.arity)) {
|
|
@@ -867,6 +868,7 @@ function pickOverload(ownerId, memberName, site, model, provider) {
|
|
|
867
868
|
const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
|
|
868
869
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
869
870
|
conversionRankFn: provider.conversionRankFn,
|
|
871
|
+
conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
|
|
870
872
|
constraintCompatibility: provider.constraintCompatibility,
|
|
871
873
|
});
|
|
872
874
|
// When narrowing leaves >1 candidate that share identical normalized
|
|
@@ -967,6 +969,7 @@ function pickFirstNonStaticOnly(ownerId, memberName, site, model, provider) {
|
|
|
967
969
|
const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
|
|
968
970
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
969
971
|
conversionRankFn: provider.conversionRankFn,
|
|
972
|
+
conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
|
|
970
973
|
constraintCompatibility: provider.constraintCompatibility,
|
|
971
974
|
});
|
|
972
975
|
// Same ambiguity handling as `pickOverload`: when normalization
|
|
@@ -1001,6 +1004,7 @@ function recordReceiverOverloadSuppression(record, filePath, site, ownerId, memb
|
|
|
1001
1004
|
const candidates = narrowOverloadCandidates(overloads, site.arity, site.argumentTypes, {
|
|
1002
1005
|
argumentTypeClasses: site.argumentTypeClasses,
|
|
1003
1006
|
conversionRankFn: provider.conversionRankFn,
|
|
1007
|
+
conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
|
|
1004
1008
|
constraintCompatibility: provider.constraintCompatibility,
|
|
1005
1009
|
});
|
|
1006
1010
|
const reason = isOverloadAmbiguousAfterNormalization(candidates, site.arity)
|
|
@@ -446,6 +446,7 @@ export function runScopeResolution(input, provider) {
|
|
|
446
446
|
isCallableVisibleFromCaller: provider.isCallableVisibleFromCaller,
|
|
447
447
|
resolveAdlCandidates: provider.resolveAdlCandidates,
|
|
448
448
|
conversionRankFn: provider.conversionRankFn,
|
|
449
|
+
conversionOnlyArgTypePrefixes: provider.conversionOnlyArgTypePrefixes,
|
|
449
450
|
constraintCompatibility: provider.constraintCompatibility,
|
|
450
451
|
recordResolutionOutcome,
|
|
451
452
|
});
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { extractSimpleTypeName, extractVarName, resolveIterableElementType, methodToTypeArgPosition, } from './shared.js';
|
|
2
|
+
import { CPP_BRACED_INIT_TYPE_PREFIX } from '../languages/cpp/conversion-rank.js';
|
|
2
3
|
const DECLARATION_NODE_TYPES = new Set(['declaration']);
|
|
3
4
|
/** Smart pointer factory function names that create a typed object. */
|
|
4
5
|
const SMART_PTR_FACTORIES = new Set(['make_shared', 'make_unique', 'make_shared_for_overwrite']);
|
|
@@ -447,6 +448,8 @@ const extractForLoopBinding = (node, { scopeEnv, declarationTypeNodes, scope })
|
|
|
447
448
|
/** Infer the type of a literal AST node for C++ overload disambiguation. */
|
|
448
449
|
const inferLiteralType = (node) => {
|
|
449
450
|
switch (node.type) {
|
|
451
|
+
case 'initializer_list':
|
|
452
|
+
return inferBracedInitLiteralType(node);
|
|
450
453
|
case 'number_literal': {
|
|
451
454
|
const t = node.text;
|
|
452
455
|
// Float suffixes
|
|
@@ -475,6 +478,24 @@ const inferLiteralType = (node) => {
|
|
|
475
478
|
return undefined;
|
|
476
479
|
}
|
|
477
480
|
};
|
|
481
|
+
function inferBracedInitLiteralType(node) {
|
|
482
|
+
const elementTypes = [];
|
|
483
|
+
for (const child of node.children) {
|
|
484
|
+
if (child.type === ',' || child.type === '{' || child.type === '}')
|
|
485
|
+
continue;
|
|
486
|
+
const elementType = inferLiteralType(child);
|
|
487
|
+
if (elementType === undefined || elementType.startsWith(CPP_BRACED_INIT_TYPE_PREFIX)) {
|
|
488
|
+
return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length + 1}`;
|
|
489
|
+
}
|
|
490
|
+
elementTypes.push(elementType);
|
|
491
|
+
}
|
|
492
|
+
if (elementTypes.length === 0)
|
|
493
|
+
return `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:0`;
|
|
494
|
+
const first = elementTypes[0];
|
|
495
|
+
return elementTypes.every((type) => type === first)
|
|
496
|
+
? `${CPP_BRACED_INIT_TYPE_PREFIX}${first}:${elementTypes.length}`
|
|
497
|
+
: `${CPP_BRACED_INIT_TYPE_PREFIX}unknown:${elementTypes.length}`;
|
|
498
|
+
}
|
|
478
499
|
/** C++: detect constructor type from smart pointer factory calls (make_shared<Dog>()).
|
|
479
500
|
* Extracts the template type argument as the constructor type for virtual dispatch. */
|
|
480
501
|
const detectCppConstructorType = (node, classNames) => {
|
|
@@ -55,5 +55,22 @@ export interface StreamedCSVResult {
|
|
|
55
55
|
* Stream all CSV data directly to disk files.
|
|
56
56
|
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
57
57
|
* File contents are lazy-read from disk with a generous LRU cache.
|
|
58
|
+
*
|
|
59
|
+
* `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
|
|
60
|
+
* right after every node CSV is fully flushed to disk and BEFORE the
|
|
61
|
+
* relationship pass starts writing any `rel_*.csv`. It receives the finished
|
|
62
|
+
* node-file manifest so the caller can begin `COPY`-ing nodes while this
|
|
63
|
+
* function keeps generating relationship CSVs (the only single-writer-safe
|
|
64
|
+
* overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
|
|
65
|
+
* the relationship pass proceeds concurrently with whatever the caller
|
|
66
|
+
* schedules. A synchronous throw from the callback is allowed and propagates out
|
|
67
|
+
* of this function (rejecting the returned promise) — it is raised before the
|
|
68
|
+
* relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
|
|
69
|
+
* this to surface its PDG-manifest collision guard. The callback must NOT, however,
|
|
70
|
+
* schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
|
|
71
|
+
* behavior, byte-for-byte.
|
|
58
72
|
*/
|
|
59
|
-
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string
|
|
73
|
+
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, onNodePhaseComplete?: (nodeFiles: Map<NodeTableName, {
|
|
74
|
+
csvPath: string;
|
|
75
|
+
rows: number;
|
|
76
|
+
}>) => void) => Promise<StreamedCSVResult>;
|
|
@@ -33,6 +33,14 @@ const orderedNodes = (graph, sorted) => sorted ? [...graph.iterNodes()].sort(byG
|
|
|
33
33
|
const orderedRelationships = (graph, sorted) => sorted ? [...graph.iterRelationships()].sort(byGraphId) : graph.iterRelationships();
|
|
34
34
|
/** Flush buffered rows to disk every N rows */
|
|
35
35
|
const FLUSH_EVERY = 500;
|
|
36
|
+
/**
|
|
37
|
+
* Yield the event loop every N relationship rows during the emit pass (#2226 F4)
|
|
38
|
+
* so a concurrent node COPY (the overlap in loadGraphToLbug) and write-stream
|
|
39
|
+
* drain callbacks get scheduling time during long synchronous emit stretches.
|
|
40
|
+
* Scheduling-only — never changes row content or order (byte-identical). Tuning
|
|
41
|
+
* constant, not load-bearing.
|
|
42
|
+
*/
|
|
43
|
+
const REL_YIELD_EVERY = 5000;
|
|
36
44
|
// ============================================================================
|
|
37
45
|
// CSV ESCAPE UTILITIES
|
|
38
46
|
// ============================================================================
|
|
@@ -239,8 +247,22 @@ export const buildBasicBlockRow = (node) => [
|
|
|
239
247
|
* Stream all CSV data directly to disk files.
|
|
240
248
|
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
241
249
|
* File contents are lazy-read from disk with a generous LRU cache.
|
|
250
|
+
*
|
|
251
|
+
* `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
|
|
252
|
+
* right after every node CSV is fully flushed to disk and BEFORE the
|
|
253
|
+
* relationship pass starts writing any `rel_*.csv`. It receives the finished
|
|
254
|
+
* node-file manifest so the caller can begin `COPY`-ing nodes while this
|
|
255
|
+
* function keeps generating relationship CSVs (the only single-writer-safe
|
|
256
|
+
* overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
|
|
257
|
+
* the relationship pass proceeds concurrently with whatever the caller
|
|
258
|
+
* schedules. A synchronous throw from the callback is allowed and propagates out
|
|
259
|
+
* of this function (rejecting the returned promise) — it is raised before the
|
|
260
|
+
* relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
|
|
261
|
+
* this to surface its PDG-manifest collision guard. The callback must NOT, however,
|
|
262
|
+
* schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
|
|
263
|
+
* behavior, byte-for-byte.
|
|
242
264
|
*/
|
|
243
|
-
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
265
|
+
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, onNodePhaseComplete) => {
|
|
244
266
|
// Deterministic (id-sorted) node/relationship row order when enabled;
|
|
245
267
|
// default off = today's graph-insertion order (byte-identical).
|
|
246
268
|
const sortOutput = parseTruthyEnv(process.env.GITNEXUS_SORT_GRAPH_OUTPUT);
|
|
@@ -502,30 +524,11 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
502
524
|
...multiLangWriters.values(),
|
|
503
525
|
];
|
|
504
526
|
await Promise.all(allWriters.map((w) => w.finish()));
|
|
505
|
-
//
|
|
506
|
-
//
|
|
507
|
-
//
|
|
508
|
-
//
|
|
509
|
-
//
|
|
510
|
-
// validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
|
|
511
|
-
// files are byte-identical (asserted by the differential test).
|
|
512
|
-
const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
|
|
513
|
-
try {
|
|
514
|
-
for (const rel of orderedRelationships(graph, sortOutput)) {
|
|
515
|
-
const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
|
|
516
|
-
if (pending)
|
|
517
|
-
await pending;
|
|
518
|
-
}
|
|
519
|
-
await relRouter.close();
|
|
520
|
-
}
|
|
521
|
-
catch (err) {
|
|
522
|
-
relRouter.destroy();
|
|
523
|
-
// Rethrow the real stream error (EMFILE / disk-full) rather than the generic
|
|
524
|
-
// AbortError a pending drain-await rejects with — mirrors the retained
|
|
525
|
-
// splitRelCsvByLabelPair's `throw streamError ?? err`.
|
|
526
|
-
throw relRouter.lastError ?? err;
|
|
527
|
-
}
|
|
528
|
-
// Build result map — only include tables that have rows
|
|
527
|
+
// Build the node-file manifest now (all writers are flushed; `.rows` is
|
|
528
|
+
// final). Hoisted above the relationship pass so `onNodePhaseComplete` can
|
|
529
|
+
// hand the caller a complete node manifest to start COPY-ing while we keep
|
|
530
|
+
// generating relationship CSVs below (#2203 overlap). The same map is
|
|
531
|
+
// returned, so the result is unchanged when no callback is supplied.
|
|
529
532
|
const nodeFiles = new Map();
|
|
530
533
|
const tableMap = [
|
|
531
534
|
['File', fileWriter],
|
|
@@ -551,6 +554,38 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
551
554
|
});
|
|
552
555
|
}
|
|
553
556
|
}
|
|
557
|
+
// Node CSVs are on disk; relationship CSVs have not been touched yet. Hand
|
|
558
|
+
// the manifest to the caller (not awaited — the rel pass runs concurrently).
|
|
559
|
+
onNodePhaseComplete?.(nodeFiles);
|
|
560
|
+
// --- Stream relationships directly to per-FROM→TO-label-pair files ---
|
|
561
|
+
// (#2203 U2) Route every edge to its pair file in this single pass. The old
|
|
562
|
+
// monolithic relations.csv — and its line-by-line re-read + per-edge regex
|
|
563
|
+
// re-split in loadGraphToLbug — are gone, so the ~1M-edge set is written and
|
|
564
|
+
// read once instead of twice. The router applies the SAME label-derivation +
|
|
565
|
+
// validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
|
|
566
|
+
// files are byte-identical (asserted by the differential test).
|
|
567
|
+
const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
|
|
568
|
+
try {
|
|
569
|
+
let emitted = 0;
|
|
570
|
+
for (const rel of orderedRelationships(graph, sortOutput)) {
|
|
571
|
+
const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
|
|
572
|
+
if (pending)
|
|
573
|
+
await pending;
|
|
574
|
+
// Periodically hand the event loop back so the overlapped node COPY and
|
|
575
|
+
// write-stream drains run instead of starving behind this synchronous
|
|
576
|
+
// loop (#2226 F4). No effect on emitted bytes — pure scheduling.
|
|
577
|
+
if (++emitted % REL_YIELD_EVERY === 0)
|
|
578
|
+
await new Promise((r) => setImmediate(r));
|
|
579
|
+
}
|
|
580
|
+
await relRouter.close();
|
|
581
|
+
}
|
|
582
|
+
catch (err) {
|
|
583
|
+
relRouter.destroy();
|
|
584
|
+
// Rethrow the real stream error (EMFILE / disk-full) rather than the generic
|
|
585
|
+
// AbortError a pending drain-await rejects with — mirrors the retained
|
|
586
|
+
// splitRelCsvByLabelPair's `throw streamError ?? err`.
|
|
587
|
+
throw relRouter.lastError ?? err;
|
|
588
|
+
}
|
|
554
589
|
return {
|
|
555
590
|
nodeFiles,
|
|
556
591
|
relsByPair: relRouter.byPair,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import lbug from '@ladybugdb/core';
|
|
2
2
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
3
|
+
import { NodeTableName } from './schema.js';
|
|
3
4
|
import type { PdgEmitManifest } from './pdg-emit-sink.js';
|
|
4
5
|
import type { CachedEmbedding } from '../embeddings/types.js';
|
|
5
6
|
import { type ExtensionEnsureOptions } from './extension-loader.js';
|
|
@@ -54,6 +55,18 @@ export declare const withLbugDb: <T>(dbPath: string, operation: () => Promise<T>
|
|
|
54
55
|
readOnly?: boolean;
|
|
55
56
|
}) => Promise<T>;
|
|
56
57
|
export type LbugProgressCallback = (message: string) => void;
|
|
58
|
+
/**
|
|
59
|
+
* Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
|
|
60
|
+
* relationship emit — see the body) and relationships.
|
|
61
|
+
*
|
|
62
|
+
* NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
|
|
63
|
+
* surrounding transaction, so a failure partway through — a node `COPY` that
|
|
64
|
+
* throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
|
|
65
|
+
* collision raised after node rows have already committed in the overlap path —
|
|
66
|
+
* leaves a partially-loaded DB. The caller surfaces the error; recovery is a
|
|
67
|
+
* `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
|
|
68
|
+
* assume the DB is either fully loaded or untouched after a rejection.
|
|
69
|
+
*/
|
|
57
70
|
export declare const loadGraphToLbug: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback,
|
|
58
71
|
/**
|
|
59
72
|
* Streamed PDG-emit manifest (#2202). When present (streaming was on, full
|
|
@@ -69,6 +82,8 @@ pdgEmitManifest?: PdgEmitManifest) => Promise<{
|
|
|
69
82
|
skippedRels: number;
|
|
70
83
|
warnings: string[];
|
|
71
84
|
}>;
|
|
85
|
+
export declare const COPY_CSV_OPTS = "(HEADER=true, ESCAPE='\"', DELIM=',', QUOTE='\"', PARALLEL=false, auto_detect=false)";
|
|
86
|
+
export declare const getCopyQuery: (table: NodeTableName, filePath: string) => string;
|
|
72
87
|
/**
|
|
73
88
|
* Insert a single node to LadybugDB
|
|
74
89
|
* @param label - Node type (File, Function, Class, etc.)
|
|
@@ -740,6 +740,60 @@ const doInitLbug = async (dbPath, readOnly = false) => {
|
|
|
740
740
|
currentDbPath = dbPath;
|
|
741
741
|
return { db, conn };
|
|
742
742
|
};
|
|
743
|
+
/**
|
|
744
|
+
* Run a COPY, retrying once with IGNORE_ERRORS=true (which skips row-level
|
|
745
|
+
* errors) on first failure. On a second failure, hand the RAW retry error to
|
|
746
|
+
* `onError` — each call site formats + slices its own message (#2226 F5: node
|
|
747
|
+
* COPY slices to 200 chars and throws; relationship COPY slices to 80 and warns,
|
|
748
|
+
* so the helper must not pre-format and lose that distinction). `onError` may
|
|
749
|
+
* throw to propagate the failure.
|
|
750
|
+
*/
|
|
751
|
+
const copyCsvWithRetry = async (targetConn, copyQuery, onError) => {
|
|
752
|
+
try {
|
|
753
|
+
await queryAndDrain(targetConn, copyQuery);
|
|
754
|
+
}
|
|
755
|
+
catch {
|
|
756
|
+
try {
|
|
757
|
+
const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
|
|
758
|
+
await queryAndDrain(targetConn, retryQuery);
|
|
759
|
+
}
|
|
760
|
+
catch (retryErr) {
|
|
761
|
+
onError(retryErr);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
};
|
|
765
|
+
/**
|
|
766
|
+
* Bulk-COPY every node CSV sequentially on the single writable connection
|
|
767
|
+
* (LadybugDB allows one write txn at a time). Extracted from loadGraphToLbug so
|
|
768
|
+
* it can run either at the node-phase boundary — overlapping the relationship
|
|
769
|
+
* emit pass (#2203) — or after emit in the serial escape-hatch path. Each COPY
|
|
770
|
+
* keeps the IGNORE_ERRORS=true retry; a hard failure throws (no node rows ⇒ the
|
|
771
|
+
* relationship COPY would dangle on missing endpoints).
|
|
772
|
+
*/
|
|
773
|
+
const copyNodeCSVs = async (targetConn, nodeFileEntries, log, totalSteps) => {
|
|
774
|
+
let stepsDone = 0;
|
|
775
|
+
for (const [table, { csvPath, rows }] of nodeFileEntries) {
|
|
776
|
+
stepsDone++;
|
|
777
|
+
log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`);
|
|
778
|
+
const copyQuery = getCopyQuery(table, normalizeCopyPath(csvPath));
|
|
779
|
+
await copyCsvWithRetry(targetConn, copyQuery, (retryErr) => {
|
|
780
|
+
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
781
|
+
throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
/**
|
|
786
|
+
* Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
|
|
787
|
+
* relationship emit — see the body) and relationships.
|
|
788
|
+
*
|
|
789
|
+
* NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
|
|
790
|
+
* surrounding transaction, so a failure partway through — a node `COPY` that
|
|
791
|
+
* throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
|
|
792
|
+
* collision raised after node rows have already committed in the overlap path —
|
|
793
|
+
* leaves a partially-loaded DB. The caller surfaces the error; recovery is a
|
|
794
|
+
* `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
|
|
795
|
+
* assume the DB is either fully loaded or untouched after a rejection.
|
|
796
|
+
*/
|
|
743
797
|
export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress,
|
|
744
798
|
/**
|
|
745
799
|
* Streamed PDG-emit manifest (#2202). When present (streaming was on, full
|
|
@@ -761,31 +815,87 @@ pdgEmitManifest) => {
|
|
|
761
815
|
// the gap that the DB-persistence path is un-timed today (the analyze
|
|
762
816
|
// "emit" number is the scope-resolution emit bucket, not this COPY path).
|
|
763
817
|
const PROF = process.env.PROF_LBUG_LOAD === '1';
|
|
818
|
+
// Escape hatch / differential oracle (#2203): force the legacy strictly-serial
|
|
819
|
+
// load order (emit everything, THEN COPY nodes, THEN COPY rels) instead of the
|
|
820
|
+
// default node-COPY ‖ rel-emit overlap. Lets an operator revert the behavior at
|
|
821
|
+
// runtime, and lets a test load the same graph both ways and assert identical
|
|
822
|
+
// persisted content.
|
|
823
|
+
const SERIAL = process.env.GITNEXUS_SERIAL_LBUG_LOAD === '1';
|
|
764
824
|
const mark = () => (PROF ? process.hrtime.bigint() : 0n);
|
|
765
825
|
const span = (a, b) => (Number(b - a) / 1e6).toFixed(1);
|
|
766
826
|
const tStart = mark();
|
|
767
827
|
const csvDir = resolveNativeSafeStorageDir(storagePath, 'csv');
|
|
768
|
-
|
|
769
|
-
const
|
|
770
|
-
//
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
//
|
|
774
|
-
//
|
|
775
|
-
//
|
|
776
|
-
|
|
828
|
+
// The single writable connection (LadybugDB is single-writer). Captured as a
|
|
829
|
+
// const so the node-COPY closure has a non-null reference — TS cannot narrow
|
|
830
|
+
// the reassignable module-level `conn` across the callback boundary.
|
|
831
|
+
const writeConn = conn;
|
|
832
|
+
const validTables = new Set(NODE_TABLES);
|
|
833
|
+
// Merge the streamed PDG-emit node CSVs (#2202) into a node-file map. Collision
|
|
834
|
+
// guard: a BasicBlock in the in-memory graph during a streamed run is an
|
|
835
|
+
// invariant violation (streamAllCSVsToDisk would also emit basicblock.csv), so
|
|
836
|
+
// fail loudly rather than drop rows (#2202 review #3). Runs at the node-phase
|
|
837
|
+
// boundary so the manifest BasicBlock table COPYs with the structural CSVs.
|
|
838
|
+
const mergeManifestNodeFiles = (nodeFilesMap) => {
|
|
839
|
+
if (!pdgEmitManifest)
|
|
840
|
+
return;
|
|
777
841
|
for (const [table, meta] of pdgEmitManifest.nodeFiles) {
|
|
778
|
-
|
|
779
|
-
// streamed run (streamAllCSVsToDisk then emitted a structural basicblock.csv).
|
|
780
|
-
// That is a streaming-invariant violation — fail loudly rather than
|
|
781
|
-
// silently overwrite one CSV with the other and drop its rows (#2202 review #3).
|
|
782
|
-
if (csvResult.nodeFiles.has(table)) {
|
|
842
|
+
if (nodeFilesMap.has(table)) {
|
|
783
843
|
throw new Error(`Streaming PDG manifest collides with a structural node CSV for "${table}" — ` +
|
|
784
844
|
`the in-memory graph should hold zero ${table} nodes when streaming. ` +
|
|
785
845
|
`A ${table} node leaked into the graph during a streamed emit.`);
|
|
786
846
|
}
|
|
787
|
-
|
|
847
|
+
nodeFilesMap.set(table, meta);
|
|
788
848
|
}
|
|
849
|
+
};
|
|
850
|
+
// Node COPY is the only DB write that can overlap relationship CSV emit: the
|
|
851
|
+
// rel pass writes new rel_*.csv files and never touches `conn`, while node COPY
|
|
852
|
+
// uses `conn` and never touches the rel files. We start node COPY at the
|
|
853
|
+
// node-phase boundary and let the rel pass run concurrently — the only
|
|
854
|
+
// single-writer-safe parallelism (#2203). The rel COPY still waits for node
|
|
855
|
+
// COPY (FK precondition), so the DB load order is unchanged.
|
|
856
|
+
let nodeCopyPromise;
|
|
857
|
+
let nodeCopyError;
|
|
858
|
+
const beginNodeCopy = (nodeFilesMap) => {
|
|
859
|
+
mergeManifestNodeFiles(nodeFilesMap);
|
|
860
|
+
const entries = [...nodeFilesMap.entries()];
|
|
861
|
+
// copyNodeCSVs logs node progress as step/total; it processes only node
|
|
862
|
+
// tables (the rel COPY has its own "Loading edges" progress line), so the
|
|
863
|
+
// denominator is the node-table count — not +1 reserving a rel step.
|
|
864
|
+
// .catch captures the failure so an overlapped (mid-emit) rejection cannot
|
|
865
|
+
// surface as an unhandled rejection; it is rethrown at the FK barrier below.
|
|
866
|
+
nodeCopyPromise = copyNodeCSVs(writeConn, entries, log, entries.length).catch((e) => {
|
|
867
|
+
nodeCopyError = e;
|
|
868
|
+
});
|
|
869
|
+
};
|
|
870
|
+
log('Streaming CSVs to disk...');
|
|
871
|
+
let csvResult;
|
|
872
|
+
try {
|
|
873
|
+
csvResult = SERIAL
|
|
874
|
+
? await streamAllCSVsToDisk(graph, repoPath, csvDir)
|
|
875
|
+
: await streamAllCSVsToDisk(graph, repoPath, csvDir, beginNodeCopy);
|
|
876
|
+
}
|
|
877
|
+
catch (emitErr) {
|
|
878
|
+
// Relationship emit failed. In overlap mode a node COPY may be in flight —
|
|
879
|
+
// settle it (the .catch above means this never rejects) before rethrowing so
|
|
880
|
+
// it cannot leak as an unhandled rejection.
|
|
881
|
+
if (nodeCopyPromise)
|
|
882
|
+
await nodeCopyPromise;
|
|
883
|
+
// If node COPY ALSO failed, emitErr wins the throw — log the swallowed node
|
|
884
|
+
// error so a half-loaded DB isn't misattributed to the emit failure alone.
|
|
885
|
+
if (nodeCopyError) {
|
|
886
|
+
logger.warn({ err: nodeCopyError }, '[lbug-load] node COPY also failed while relationship emit was failing');
|
|
887
|
+
}
|
|
888
|
+
throw emitErr;
|
|
889
|
+
}
|
|
890
|
+
const tCsv = mark();
|
|
891
|
+
// Merge the streamed PDG-emit per-pair rel CSVs (#2202) into the COPY plan —
|
|
892
|
+
// collision-guarded. Done BEFORE node COPY so the serial escape hatch detects a
|
|
893
|
+
// manifest/structural pair collision before committing any node rows (legacy
|
|
894
|
+
// parity with the pre-overlap path), and the overlap path detects it as early
|
|
895
|
+
// as csvResult is available. When a manifest is present, streaming was on and
|
|
896
|
+
// the in-memory graph held zero BasicBlocks, so a structural collision means a
|
|
897
|
+
// streaming-invariant violation — fail loudly rather than load corrupt data.
|
|
898
|
+
if (pdgEmitManifest) {
|
|
789
899
|
for (const [pairKey, meta] of pdgEmitManifest.relsByPair) {
|
|
790
900
|
if (csvResult.relsByPair.has(pairKey)) {
|
|
791
901
|
throw new Error(`Streaming PDG manifest collides with a structural relationship CSV for pair ` +
|
|
@@ -795,30 +905,17 @@ pdgEmitManifest) => {
|
|
|
795
905
|
csvResult.totalValidRels += meta.rows;
|
|
796
906
|
}
|
|
797
907
|
}
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
try {
|
|
810
|
-
await queryAndDrain(conn, copyQuery);
|
|
811
|
-
}
|
|
812
|
-
catch (err) {
|
|
813
|
-
try {
|
|
814
|
-
const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
|
|
815
|
-
await queryAndDrain(conn, retryQuery);
|
|
816
|
-
}
|
|
817
|
-
catch (retryErr) {
|
|
818
|
-
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
819
|
-
throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
|
|
820
|
-
}
|
|
821
|
-
}
|
|
908
|
+
// Serial path: all CSVs are on disk and node COPY has not started — start it
|
|
909
|
+
// here so the barrier below blocks on it exactly as the legacy path did.
|
|
910
|
+
if (SERIAL)
|
|
911
|
+
beginNodeCopy(csvResult.nodeFiles);
|
|
912
|
+
// FK barrier: node rows must exist before the relationship COPY resolves their
|
|
913
|
+
// endpoints. In overlap mode most of node COPY was hidden behind rel emit, so
|
|
914
|
+
// this await is the *residual* node-COPY time (≈0 when fully overlapped).
|
|
915
|
+
if (nodeCopyPromise)
|
|
916
|
+
await nodeCopyPromise;
|
|
917
|
+
if (nodeCopyError) {
|
|
918
|
+
throw nodeCopyError instanceof Error ? nodeCopyError : new Error(String(nodeCopyError));
|
|
822
919
|
}
|
|
823
920
|
const tCopyNodes = mark();
|
|
824
921
|
// Bulk COPY relationships. They were already routed to per-FROM→TO-label-pair
|
|
@@ -838,25 +935,17 @@ pdgEmitManifest) => {
|
|
|
838
935
|
pairIdx++;
|
|
839
936
|
const [fromLabel, toLabel] = pairKey.split('|');
|
|
840
937
|
const normalizedPath = normalizeCopyPath(pairCsvPath);
|
|
938
|
+
// PARALLEL=false is load-bearing here too — see COPY_CSV_OPTS (#2203 / kuzudb/kuzu#5778).
|
|
841
939
|
const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
|
|
842
940
|
if (pairIdx % 5 === 0 || rows > 1000) {
|
|
843
941
|
log(`Loading edges: ${pairIdx}/${relsByPair.size} types (${fromLabel} -> ${toLabel})`);
|
|
844
942
|
}
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
await queryAndDrain(conn, retryQuery);
|
|
852
|
-
}
|
|
853
|
-
catch (retryErr) {
|
|
854
|
-
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
855
|
-
warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
|
|
856
|
-
failedPairEdges += rows;
|
|
857
|
-
failedPairCsvPaths.add(pairCsvPath);
|
|
858
|
-
}
|
|
859
|
-
}
|
|
943
|
+
await copyCsvWithRetry(conn, copyQuery, (retryErr) => {
|
|
944
|
+
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
945
|
+
warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
|
|
946
|
+
failedPairEdges += rows;
|
|
947
|
+
failedPairCsvPaths.add(pairCsvPath);
|
|
948
|
+
});
|
|
860
949
|
// Only delete if not in failedPairCsvPaths (needed for fallback)
|
|
861
950
|
if (!failedPairCsvPaths.has(pairCsvPath)) {
|
|
862
951
|
try {
|
|
@@ -919,7 +1008,12 @@ pdgEmitManifest) => {
|
|
|
919
1008
|
let totalNodeRows = 0;
|
|
920
1009
|
for (const [, { rows }] of csvResult.nodeFiles)
|
|
921
1010
|
totalNodeRows += rows;
|
|
922
|
-
|
|
1011
|
+
// `mode` records which load path ran. In overlap mode `csv-emit` is the wall
|
|
1012
|
+
// to streamAllCSVsToDisk's return (node COPY overlapped part of it) and
|
|
1013
|
+
// `copy-nodes` is the RESIDUAL node-COPY await after emit returned — it
|
|
1014
|
+
// trends to 0 as the overlap hides node COPY behind relationship emit. In
|
|
1015
|
+
// serial mode the buckets carry their legacy, disjoint meaning.
|
|
1016
|
+
logger.warn(`[lbug-load prof] mode=${SERIAL ? 'serial' : 'overlap'} csv-emit=${span(tStart, tCsv)}ms ` +
|
|
923
1017
|
`copy-nodes=${span(tCsv, tCopyNodes)}ms copy-rels=${span(tCopyNodes, tCopyRels)}ms ` +
|
|
924
1018
|
`fallback=${span(tCopyRels, tFallback)}ms total=${span(tStart, tEnd)}ms ` +
|
|
925
1019
|
`(${totalNodeRows} nodes, ${insertedRels} rels)`);
|
|
@@ -930,7 +1024,18 @@ pdgEmitManifest) => {
|
|
|
930
1024
|
// Source code content is full of backslashes which confuse the auto-detection.
|
|
931
1025
|
// We MUST explicitly set ESCAPE='"' to use RFC 4180 escaping, and disable auto_detect to prevent
|
|
932
1026
|
// LadybugDB from overriding our settings based on sample rows.
|
|
933
|
-
|
|
1027
|
+
//
|
|
1028
|
+
// PARALLEL=false IS LOAD-BEARING FOR CORRECTNESS — DO NOT FLIP IT (#2203).
|
|
1029
|
+
// LadybugDB's parallel CSV reader (Kuzu-derived; default PARALLEL=true) splits the
|
|
1030
|
+
// file into byte ranges parsed concurrently, and CANNOT determine line boundaries
|
|
1031
|
+
// when a quoted field contains an embedded newline — it errors with "Quoted newlines
|
|
1032
|
+
// are not supported in parallel CSV reader. Please specify PARALLEL=FALSE", or worse,
|
|
1033
|
+
// mis-parses silently (upstream kuzudb/kuzu#5778, still open). Our `content`/`text`
|
|
1034
|
+
// columns hold source code, so quoted multiline fields are guaranteed. PARALLEL=false
|
|
1035
|
+
// is therefore required, not conservative. The multiline-quoted round-trip in
|
|
1036
|
+
// test/integration/copy-parallel-invariant.test.ts fails loudly if this is ever flipped.
|
|
1037
|
+
// Exported so that test asserts the invariant statically as well.
|
|
1038
|
+
export const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
|
|
934
1039
|
// Multi-language table names that were created with backticks in CODE_ELEMENT_BASE
|
|
935
1040
|
// and must always be referenced with backticks in queries
|
|
936
1041
|
const BACKTICK_TABLES = new Set([
|
|
@@ -996,7 +1101,7 @@ const TABLES_WITH_EXPORTED = new Set([
|
|
|
996
1101
|
'Method',
|
|
997
1102
|
'CodeElement',
|
|
998
1103
|
]);
|
|
999
|
-
const getCopyQuery = (table, filePath) => {
|
|
1104
|
+
export const getCopyQuery = (table, filePath) => {
|
|
1000
1105
|
const t = escapeTableName(table);
|
|
1001
1106
|
if (table === 'File') {
|
|
1002
1107
|
return `COPY ${t}(id, name, filePath, content) FROM "${filePath}" ${COPY_CSV_OPTS}`;
|
package/package.json
CHANGED