gitnexus 1.6.8-rc.45 → 1.6.8-rc.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/language-detection.d.ts.map +1 -1
- package/dist/_shared/language-detection.js +11 -1
- package/dist/_shared/language-detection.js.map +1 -1
- package/dist/core/group/extractors/include-extractor.js +15 -6
- package/dist/core/ingestion/import-resolvers/utils.js +2 -0
- package/dist/core/ingestion/languages/c-cpp.js +3 -1
- package/dist/core/ingestion/languages/cpp/header-scan.d.ts +1 -1
- package/dist/core/ingestion/languages/cpp/header-scan.js +2 -2
- package/dist/core/lbug/csv-generator.d.ts +18 -1
- package/dist/core/lbug/csv-generator.js +60 -25
- package/dist/core/lbug/lbug-adapter.d.ts +15 -0
- package/dist/core/lbug/lbug-adapter.js +162 -57
- package/package.json +1 -1
- package/web/assets/{agent-D5J40fBb.js → agent-D3InsSLB.js} +1 -1
- package/web/assets/{architectureDiagram-UL44E2DR-CUVrwu-f.js → architectureDiagram-UL44E2DR-CgDGC1DL.js} +1 -1
- package/web/assets/{chunk-LCXTWHL2-DYctxkWH.js → chunk-LCXTWHL2-C2_vB0oy.js} +1 -1
- package/web/assets/{chunk-RG4AUYOV-Dai0blvV.js → chunk-RG4AUYOV-D84_4-Cg.js} +1 -1
- package/web/assets/{classDiagram-KGZ6W3CR-DGMIeRUV.js → classDiagram-KGZ6W3CR-CPYfTJpw.js} +1 -1
- package/web/assets/{classDiagram-v2-72OJOZXJ-DGMIeRUV.js → classDiagram-v2-72OJOZXJ-CPYfTJpw.js} +1 -1
- package/web/assets/{dagre-ND4H6XIP-DjIPJ0yu.js → dagre-ND4H6XIP-CVdjclRz.js} +1 -1
- package/web/assets/{diagram-3NCE3AQN-D-gExQR8.js → diagram-3NCE3AQN-DxbKpDxH.js} +1 -1
- package/web/assets/{diagram-GF46GFSD-B7qp9EBf.js → diagram-GF46GFSD-ZHL89iOK.js} +1 -1
- package/web/assets/{diagram-QXG6HAR7-CVD497Ig.js → diagram-QXG6HAR7-BQ3FK6K4.js} +1 -1
- package/web/assets/{diagram-WEQXMOUZ-BZgvAFQK.js → diagram-WEQXMOUZ-Daor0MfI.js} +1 -1
- package/web/assets/{erDiagram-L5TCEMPS-eFDAIcoP.js → erDiagram-L5TCEMPS-Cx5qGjOE.js} +1 -1
- package/web/assets/{flowDiagram-H6V6AXG4-mlFvvBcJ.js → flowDiagram-H6V6AXG4-DfF8zxfW.js} +1 -1
- package/web/assets/{index-80yfnWPU.js → index-BpnK_6O6.js} +6 -6
- package/web/assets/{infoDiagram-3YFTVSEB-A6e-DQS2.js → infoDiagram-3YFTVSEB-0TASL532.js} +1 -1
- package/web/assets/{ishikawaDiagram-BNXS4ZKH-CmzHpns0.js → ishikawaDiagram-BNXS4ZKH-B6oWSm73.js} +1 -1
- package/web/assets/{kanban-definition-75IXJCU3-BRr8hm0l.js → kanban-definition-75IXJCU3-YBs3SqPA.js} +1 -1
- package/web/assets/{mindmap-definition-2TDM6QVE-BmRh5T1l.js → mindmap-definition-2TDM6QVE-CezQz4B1.js} +1 -1
- package/web/assets/{pieDiagram-CU6KROY3-j4wKlimb.js → pieDiagram-CU6KROY3-CTCPF17p.js} +1 -1
- package/web/assets/{requirementDiagram-JXO7QTGE-bMkROmfn.js → requirementDiagram-JXO7QTGE-_TvUqq8o.js} +1 -1
- package/web/assets/{sequenceDiagram-VS2MUI6T-Dws3nv7I.js → sequenceDiagram-VS2MUI6T-HVEY6R-z.js} +1 -1
- package/web/assets/{stateDiagram-7D4R322I-DePQE9Ss.js → stateDiagram-7D4R322I-C1zan0lv.js} +1 -1
- package/web/assets/{stateDiagram-v2-36443NZ5-XwJow906.js → stateDiagram-v2-36443NZ5-DEXf5Sf7.js} +1 -1
- package/web/assets/{timeline-definition-O6YCAMPW-D8q1B9gj.js → timeline-definition-O6YCAMPW-B0fWGgTk.js} +1 -1
- package/web/assets/{vennDiagram-MWXL3ELB-C1wh9rL3.js → vennDiagram-MWXL3ELB-3LZreYcV.js} +1 -1
- package/web/assets/{wardleyDiagram-CUQ6CDDI-D4Y6EyeO.js → wardleyDiagram-CUQ6CDDI-B08rmgyx.js} +1 -1
- package/web/assets/{xychartDiagram-N2JHSOCM-CGE9TFhV.js → xychartDiagram-N2JHSOCM-CBVkhshc.js} +1 -1
- package/web/index.html +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"language-detection.d.ts","sourceRoot":"","sources":["../src/language-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"language-detection.d.ts","sourceRoot":"","sources":["../src/language-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAyDpD;;;;GAIG;AACH,eAAO,MAAM,uBAAuB,GAAI,UAAU,MAAM,KAAG,OACQ,CAAC;AAEpE;;;GAGG;AACH,eAAO,MAAM,uBAAuB,GAAI,UAAU,MAAM,KAAG,kBAAkB,GAAG,IAkB/E,CAAC;AAwDF;;;;GAIG;AACH,eAAO,MAAM,6BAA6B,GAAI,UAAU,MAAM,KAAG,MAUhE,CAAC"}
|
|
@@ -29,7 +29,17 @@ const EXTENSION_MAP = {
|
|
|
29
29
|
[SupportedLanguages.Python]: ['.py'],
|
|
30
30
|
[SupportedLanguages.Java]: ['.java'],
|
|
31
31
|
[SupportedLanguages.C]: ['.c'],
|
|
32
|
-
[SupportedLanguages.CPlusPlus]: [
|
|
32
|
+
[SupportedLanguages.CPlusPlus]: [
|
|
33
|
+
'.cpp',
|
|
34
|
+
'.cc',
|
|
35
|
+
'.cxx',
|
|
36
|
+
'.h',
|
|
37
|
+
'.hpp',
|
|
38
|
+
'.hxx',
|
|
39
|
+
'.hh',
|
|
40
|
+
'.cu',
|
|
41
|
+
'.cuh',
|
|
42
|
+
],
|
|
33
43
|
[SupportedLanguages.CSharp]: ['.cs'],
|
|
34
44
|
[SupportedLanguages.Go]: ['.go'],
|
|
35
45
|
[SupportedLanguages.Ruby]: ['.rb', '.rake', '.gemspec'],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"language-detection.js","sourceRoot":"","sources":["../src/language-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAEpD,6DAA6D;AAC7D,MAAM,wBAAwB,GAAG,IAAI,GAAG,CAAC;IACvC,UAAU;IACV,SAAS;IACT,WAAW;IACX,aAAa;IACb,UAAU;CACX,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,aAAa,GAAkD;IACnE,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IAChE,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IAChE,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC;IACpC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;IACpC,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC;IAC9B,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE,
|
|
1
|
+
{"version":3,"file":"language-detection.js","sourceRoot":"","sources":["../src/language-detection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAEpD,6DAA6D;AAC7D,MAAM,wBAAwB,GAAG,IAAI,GAAG,CAAC;IACvC,UAAU;IACV,SAAS;IACT,WAAW;IACX,aAAa;IACb,UAAU;CACX,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,aAAa,GAAkD;IACnE,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IAChE,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IAChE,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC;IACpC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;IACpC,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC;IAC9B,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAC9B,MAAM;QACN,KAAK;QACL,MAAM;QACN,IAAI;QACJ,MAAM;QACN,MAAM;QACN,KAAK;QACL,KAAK;QACL,MAAM;KACP;IACD,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC;IACpC,CAAC,kBAAkB,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC;IAChC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,UAAU,CAAC;IACvD,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC;IAClC,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC;IAChF,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,CAAC;IAC5C,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC;IACtC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;IACpC,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC;IAClC,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;CACP,CAAC,CAAC,wBAAwB;AAEnF,kFAAkF;AAClF,MAAM,SAAS,GAAG,IAAI,GAAG,EAA8B,CAAC;AACxD,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,aAAa,CAGpD,EAAE,CAAC;IACJ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,QAAgB,EAAW,EAAE,CACnE,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;AAEpE;;;GAGG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,QAAgB,EAA6B,EAAE;IACrF,IAAI,uBAAuB,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnD,qCAAqC;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC1C,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;QACjB,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QAClD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,IAAI,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;IACtC,CAAC;IAED,qDAAqD;IACrD,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,QAAQ,CAAC;IACvD,IAAI,wBAAwB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3C,OAAO,kBAAkB,CAAC,IAAI,CAAC;IACjC,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,GAAuC;IACrD,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,YAAY;IAC7C,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE,YAAY;IAC7C,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,QAAQ;IACrC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,MAAM;IACjC,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,GAAG;IAC3B,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE,KAAK;IACrC,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,QAAQ;IACrC,CAAC,kBAAkB,CAAC,EAAE,CAAC,EAAE,IAAI;IAC7B,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,MAAM;IACjC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,MAAM;IACjC,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,KAAK;IAC/B,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE,QAAQ;IACrC,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,OAAO;IACnC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,MAAM;IACjC,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,YAAY;IACtC,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,OAAO;CACS,CAAC,CAAC,wBAAwB;AAExE,qEAAqE;AACrE,MAAM,oBAAoB,GAA2B;IACnD,IAAI,EAAE,MAAM;IACZ,IAAI,EAAE,MAAM;IACZ,GAAG,EAAE,MAAM;IACX,EAAE,EAAE,UAAU;IACd,GAAG,EAAE,UAAU;IACf,IAAI,EAAE,QAAQ;IACd,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,KAAK;IACV,IAAI,EAAE,KAAK;IACX,IAAI,EAAE,KAAK;IACX,EAAE,EAAE,MAAM;IACV,IAAI,EAAE,MAAM;IACZ,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,KAAK;IACV,IAAI,EAAE,MAAM;IACZ,GAAG,EAAE,KAAK;IACV,UAAU,EAAE,QAAQ;CACrB,CAAC;AAEF,oEAAoE;AACpE,MAAM,sBAAsB,GAA2B;IACrD,QAAQ,EAAE,UAAU;IACpB,UAAU,EAAE,QAAQ;CACrB,CAAC;AAEF;;;;GAIG;AACH,MAAM,CAAC,MAAM,6BAA6B,GAAG,CAAC,QAAgB,EAAU,EAAE;IACxE,IAAI,uBAAuB,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAEvD,MAAM,IAAI,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,IAAI;QAAE,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,CAAC;IACrD,IAAI,GAAG,IAAI,GAAG,IAAI,oBAAoB;QAAE,OAAO,oBAAoB,CAAC,GAAG,CAAC,CAAC;IACzE,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;IACjD,IAAI,QAAQ,IAAI,sBAAsB;QAAE,OAAO,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAChF,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC"}
|
|
@@ -27,7 +27,7 @@ import { logger } from '../../logger.js';
|
|
|
27
27
|
/**
|
|
28
28
|
* Cross-repo C/C++ `#include` dependency extractor.
|
|
29
29
|
*
|
|
30
|
-
* **Provider side:** registers every `.h/.hpp/.hxx/.hh` file in the repo
|
|
30
|
+
* **Provider side:** registers every `.h/.hpp/.hxx/.hh/.cuh` file in the repo
|
|
31
31
|
* as a provider contract with `include::<relative-path>`.
|
|
32
32
|
*
|
|
33
33
|
* **Consumer side:** parses all C/C++ source/header files for `#include "…"`
|
|
@@ -40,12 +40,19 @@ import { logger } from '../../logger.js';
|
|
|
40
40
|
* exact contract-id equality in `runExactMatch`.
|
|
41
41
|
*/
|
|
42
42
|
// ---------- constants ----------
|
|
43
|
-
const HEADER_EXTENSIONS = new Set(['.h', '.hpp', '.hxx', '.hh']);
|
|
44
|
-
// Source = headers (provider-eligible) ∪ implementation files (.c/.cpp/.cc/.cxx).
|
|
43
|
+
const HEADER_EXTENSIONS = new Set(['.h', '.hpp', '.hxx', '.hh', '.cuh']);
|
|
44
|
+
// Source = headers (provider-eligible) ∪ implementation files (.c/.cpp/.cc/.cxx/.cu).
|
|
45
45
|
// Spread keeps the subset relationship explicit so a future contributor adding
|
|
46
46
|
// a new header extension to HEADER_EXTENSIONS does not have to remember to
|
|
47
47
|
// also add it here.
|
|
48
|
-
const SOURCE_EXTENSIONS = new Set([
|
|
48
|
+
const SOURCE_EXTENSIONS = new Set([
|
|
49
|
+
...HEADER_EXTENSIONS,
|
|
50
|
+
'.c',
|
|
51
|
+
'.cpp',
|
|
52
|
+
'.cc',
|
|
53
|
+
'.cxx',
|
|
54
|
+
'.cu',
|
|
55
|
+
]);
|
|
49
56
|
const INCLUDE_QUERY_SRC = '(preproc_include path: (_) @import.source) @import';
|
|
50
57
|
/**
|
|
51
58
|
* Well-known C/C++ standard library headers that can appear in `#include "…"`
|
|
@@ -259,6 +266,8 @@ function getLanguageForFile(filePath) {
|
|
|
259
266
|
case '.hpp':
|
|
260
267
|
case '.hxx':
|
|
261
268
|
case '.hh':
|
|
269
|
+
case '.cu':
|
|
270
|
+
case '.cuh':
|
|
262
271
|
return Cpp;
|
|
263
272
|
default:
|
|
264
273
|
return null;
|
|
@@ -281,7 +290,7 @@ function getLanguageForFile(filePath) {
|
|
|
281
290
|
function isLocalInclude(cleaned, suffixIndex) {
|
|
282
291
|
const candidates = [cleaned];
|
|
283
292
|
if (!/\.[a-zA-Z0-9]+$/.test(cleaned)) {
|
|
284
|
-
for (const ext of
|
|
293
|
+
for (const ext of HEADER_EXTENSIONS)
|
|
285
294
|
candidates.push(cleaned + ext);
|
|
286
295
|
}
|
|
287
296
|
for (const c of candidates) {
|
|
@@ -386,7 +395,7 @@ export class IncludeExtractor {
|
|
|
386
395
|
async extractProvidersGraph(db, repoPath) {
|
|
387
396
|
try {
|
|
388
397
|
const rows = await db(`MATCH (f:File)
|
|
389
|
-
WHERE f.filePath =~ '.*\\\\.(h|hpp|hxx|hh)$'
|
|
398
|
+
WHERE f.filePath =~ '.*\\\\.(h|hpp|hxx|hh|cuh)$'
|
|
390
399
|
RETURN f.filePath AS filePath, f.id AS fileId`);
|
|
391
400
|
// gitnexus analyze stores absolute paths in the File.filePath column.
|
|
392
401
|
// Provider contract IDs MUST be repo-relative — otherwise the consumer
|
|
@@ -375,7 +375,9 @@ export const cProvider = defineLanguage({
|
|
|
375
375
|
});
|
|
376
376
|
export const cppProvider = defineLanguage({
|
|
377
377
|
id: SupportedLanguages.CPlusPlus,
|
|
378
|
-
|
|
378
|
+
// CUDA files route through tree-sitter-cpp as a conservative C++-subset parser:
|
|
379
|
+
// definitions still extract, but CUDA launch syntax (`<<< >>>`) is not modeled as calls.
|
|
380
|
+
extensions: ['.cpp', '.cc', '.cxx', '.h', '.hpp', '.hxx', '.hh', '.cu', '.cuh'],
|
|
379
381
|
entryPointPatterns: [
|
|
380
382
|
/^main$/,
|
|
381
383
|
/^init_/,
|
|
@@ -3,6 +3,6 @@
|
|
|
3
3
|
* Used by `loadResolutionConfig` so the C++ resolver can resolve `#include`
|
|
4
4
|
* targets that live in header files.
|
|
5
5
|
*
|
|
6
|
-
* Scans for: .h, .hpp, .hxx, .hh
|
|
6
|
+
* Scans for: .h, .hpp, .hxx, .hh, .cuh
|
|
7
7
|
*/
|
|
8
8
|
export declare function scanCppHeaderFiles(repoPath: string): ReadonlySet<string>;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { readdirSync } from 'fs';
|
|
2
2
|
import { join, relative } from 'path';
|
|
3
3
|
/** C++ header extensions to scan for in the workspace. */
|
|
4
|
-
const HEADER_EXTENSIONS = new Set(['.h', '.hpp', '.hxx', '.hh']);
|
|
4
|
+
const HEADER_EXTENSIONS = new Set(['.h', '.hpp', '.hxx', '.hh', '.cuh']);
|
|
5
5
|
/**
|
|
6
6
|
* Walk `repoPath` recursively and return relative paths of all C++ header files.
|
|
7
7
|
* Used by `loadResolutionConfig` so the C++ resolver can resolve `#include`
|
|
8
8
|
* targets that live in header files.
|
|
9
9
|
*
|
|
10
|
-
* Scans for: .h, .hpp, .hxx, .hh
|
|
10
|
+
* Scans for: .h, .hpp, .hxx, .hh, .cuh
|
|
11
11
|
*/
|
|
12
12
|
export function scanCppHeaderFiles(repoPath) {
|
|
13
13
|
const headers = new Set();
|
|
@@ -55,5 +55,22 @@ export interface StreamedCSVResult {
|
|
|
55
55
|
* Stream all CSV data directly to disk files.
|
|
56
56
|
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
57
57
|
* File contents are lazy-read from disk with a generous LRU cache.
|
|
58
|
+
*
|
|
59
|
+
* `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
|
|
60
|
+
* right after every node CSV is fully flushed to disk and BEFORE the
|
|
61
|
+
* relationship pass starts writing any `rel_*.csv`. It receives the finished
|
|
62
|
+
* node-file manifest so the caller can begin `COPY`-ing nodes while this
|
|
63
|
+
* function keeps generating relationship CSVs (the only single-writer-safe
|
|
64
|
+
* overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
|
|
65
|
+
* the relationship pass proceeds concurrently with whatever the caller
|
|
66
|
+
* schedules. A synchronous throw from the callback is allowed and propagates out
|
|
67
|
+
* of this function (rejecting the returned promise) — it is raised before the
|
|
68
|
+
* relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
|
|
69
|
+
* this to surface its PDG-manifest collision guard. The callback must NOT, however,
|
|
70
|
+
* schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
|
|
71
|
+
* behavior, byte-for-byte.
|
|
58
72
|
*/
|
|
59
|
-
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string
|
|
73
|
+
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, onNodePhaseComplete?: (nodeFiles: Map<NodeTableName, {
|
|
74
|
+
csvPath: string;
|
|
75
|
+
rows: number;
|
|
76
|
+
}>) => void) => Promise<StreamedCSVResult>;
|
|
@@ -33,6 +33,14 @@ const orderedNodes = (graph, sorted) => sorted ? [...graph.iterNodes()].sort(byG
|
|
|
33
33
|
const orderedRelationships = (graph, sorted) => sorted ? [...graph.iterRelationships()].sort(byGraphId) : graph.iterRelationships();
|
|
34
34
|
/** Flush buffered rows to disk every N rows */
|
|
35
35
|
const FLUSH_EVERY = 500;
|
|
36
|
+
/**
|
|
37
|
+
* Yield the event loop every N relationship rows during the emit pass (#2226 F4)
|
|
38
|
+
* so a concurrent node COPY (the overlap in loadGraphToLbug) and write-stream
|
|
39
|
+
* drain callbacks get scheduling time during long synchronous emit stretches.
|
|
40
|
+
* Scheduling-only — never changes row content or order (byte-identical). Tuning
|
|
41
|
+
* constant, not load-bearing.
|
|
42
|
+
*/
|
|
43
|
+
const REL_YIELD_EVERY = 5000;
|
|
36
44
|
// ============================================================================
|
|
37
45
|
// CSV ESCAPE UTILITIES
|
|
38
46
|
// ============================================================================
|
|
@@ -239,8 +247,22 @@ export const buildBasicBlockRow = (node) => [
|
|
|
239
247
|
* Stream all CSV data directly to disk files.
|
|
240
248
|
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
241
249
|
* File contents are lazy-read from disk with a generous LRU cache.
|
|
250
|
+
*
|
|
251
|
+
* `onNodePhaseComplete` (optional, #2203 parallelism leg): fired exactly once,
|
|
252
|
+
* right after every node CSV is fully flushed to disk and BEFORE the
|
|
253
|
+
* relationship pass starts writing any `rel_*.csv`. It receives the finished
|
|
254
|
+
* node-file manifest so the caller can begin `COPY`-ing nodes while this
|
|
255
|
+
* function keeps generating relationship CSVs (the only single-writer-safe
|
|
256
|
+
* overlap — node `COPY` ‖ relationship emit). It is intentionally NOT awaited:
|
|
257
|
+
* the relationship pass proceeds concurrently with whatever the caller
|
|
258
|
+
* schedules. A synchronous throw from the callback is allowed and propagates out
|
|
259
|
+
* of this function (rejecting the returned promise) — it is raised before the
|
|
260
|
+
* relationship pass begins, so no `rel_*.csv` is written; `loadGraphToLbug` uses
|
|
261
|
+
* this to surface its PDG-manifest collision guard. The callback must NOT, however,
|
|
262
|
+
* schedule un-awaited async work that can reject unobserved. Absent ⇒ today's
|
|
263
|
+
* behavior, byte-for-byte.
|
|
242
264
|
*/
|
|
243
|
-
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
265
|
+
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, onNodePhaseComplete) => {
|
|
244
266
|
// Deterministic (id-sorted) node/relationship row order when enabled;
|
|
245
267
|
// default off = today's graph-insertion order (byte-identical).
|
|
246
268
|
const sortOutput = parseTruthyEnv(process.env.GITNEXUS_SORT_GRAPH_OUTPUT);
|
|
@@ -502,30 +524,11 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
502
524
|
...multiLangWriters.values(),
|
|
503
525
|
];
|
|
504
526
|
await Promise.all(allWriters.map((w) => w.finish()));
|
|
505
|
-
//
|
|
506
|
-
//
|
|
507
|
-
//
|
|
508
|
-
//
|
|
509
|
-
//
|
|
510
|
-
// validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
|
|
511
|
-
// files are byte-identical (asserted by the differential test).
|
|
512
|
-
const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
|
|
513
|
-
try {
|
|
514
|
-
for (const rel of orderedRelationships(graph, sortOutput)) {
|
|
515
|
-
const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
|
|
516
|
-
if (pending)
|
|
517
|
-
await pending;
|
|
518
|
-
}
|
|
519
|
-
await relRouter.close();
|
|
520
|
-
}
|
|
521
|
-
catch (err) {
|
|
522
|
-
relRouter.destroy();
|
|
523
|
-
// Rethrow the real stream error (EMFILE / disk-full) rather than the generic
|
|
524
|
-
// AbortError a pending drain-await rejects with — mirrors the retained
|
|
525
|
-
// splitRelCsvByLabelPair's `throw streamError ?? err`.
|
|
526
|
-
throw relRouter.lastError ?? err;
|
|
527
|
-
}
|
|
528
|
-
// Build result map — only include tables that have rows
|
|
527
|
+
// Build the node-file manifest now (all writers are flushed; `.rows` is
|
|
528
|
+
// final). Hoisted above the relationship pass so `onNodePhaseComplete` can
|
|
529
|
+
// hand the caller a complete node manifest to start COPY-ing while we keep
|
|
530
|
+
// generating relationship CSVs below (#2203 overlap). The same map is
|
|
531
|
+
// returned, so the result is unchanged when no callback is supplied.
|
|
529
532
|
const nodeFiles = new Map();
|
|
530
533
|
const tableMap = [
|
|
531
534
|
['File', fileWriter],
|
|
@@ -551,6 +554,38 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
551
554
|
});
|
|
552
555
|
}
|
|
553
556
|
}
|
|
557
|
+
// Node CSVs are on disk; relationship CSVs have not been touched yet. Hand
|
|
558
|
+
// the manifest to the caller (not awaited — the rel pass runs concurrently).
|
|
559
|
+
onNodePhaseComplete?.(nodeFiles);
|
|
560
|
+
// --- Stream relationships directly to per-FROM→TO-label-pair files ---
|
|
561
|
+
// (#2203 U2) Route every edge to its pair file in this single pass. The old
|
|
562
|
+
// monolithic relations.csv — and its line-by-line re-read + per-edge regex
|
|
563
|
+
// re-split in loadGraphToLbug — are gone, so the ~1M-edge set is written and
|
|
564
|
+
// read once instead of twice. The router applies the SAME label-derivation +
|
|
565
|
+
// validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
|
|
566
|
+
// files are byte-identical (asserted by the differential test).
|
|
567
|
+
const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
|
|
568
|
+
try {
|
|
569
|
+
let emitted = 0;
|
|
570
|
+
for (const rel of orderedRelationships(graph, sortOutput)) {
|
|
571
|
+
const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
|
|
572
|
+
if (pending)
|
|
573
|
+
await pending;
|
|
574
|
+
// Periodically hand the event loop back so the overlapped node COPY and
|
|
575
|
+
// write-stream drains run instead of starving behind this synchronous
|
|
576
|
+
// loop (#2226 F4). No effect on emitted bytes — pure scheduling.
|
|
577
|
+
if (++emitted % REL_YIELD_EVERY === 0)
|
|
578
|
+
await new Promise((r) => setImmediate(r));
|
|
579
|
+
}
|
|
580
|
+
await relRouter.close();
|
|
581
|
+
}
|
|
582
|
+
catch (err) {
|
|
583
|
+
relRouter.destroy();
|
|
584
|
+
// Rethrow the real stream error (EMFILE / disk-full) rather than the generic
|
|
585
|
+
// AbortError a pending drain-await rejects with — mirrors the retained
|
|
586
|
+
// splitRelCsvByLabelPair's `throw streamError ?? err`.
|
|
587
|
+
throw relRouter.lastError ?? err;
|
|
588
|
+
}
|
|
554
589
|
return {
|
|
555
590
|
nodeFiles,
|
|
556
591
|
relsByPair: relRouter.byPair,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import lbug from '@ladybugdb/core';
|
|
2
2
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
3
|
+
import { NodeTableName } from './schema.js';
|
|
3
4
|
import type { PdgEmitManifest } from './pdg-emit-sink.js';
|
|
4
5
|
import type { CachedEmbedding } from '../embeddings/types.js';
|
|
5
6
|
import { type ExtensionEnsureOptions } from './extension-loader.js';
|
|
@@ -54,6 +55,18 @@ export declare const withLbugDb: <T>(dbPath: string, operation: () => Promise<T>
|
|
|
54
55
|
readOnly?: boolean;
|
|
55
56
|
}) => Promise<T>;
|
|
56
57
|
export type LbugProgressCallback = (message: string) => void;
|
|
58
|
+
/**
|
|
59
|
+
* Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
|
|
60
|
+
* relationship emit — see the body) and relationships.
|
|
61
|
+
*
|
|
62
|
+
* NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
|
|
63
|
+
* surrounding transaction, so a failure partway through — a node `COPY` that
|
|
64
|
+
* throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
|
|
65
|
+
* collision raised after node rows have already committed in the overlap path —
|
|
66
|
+
* leaves a partially-loaded DB. The caller surfaces the error; recovery is a
|
|
67
|
+
* `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
|
|
68
|
+
* assume the DB is either fully loaded or untouched after a rejection.
|
|
69
|
+
*/
|
|
57
70
|
export declare const loadGraphToLbug: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback,
|
|
58
71
|
/**
|
|
59
72
|
* Streamed PDG-emit manifest (#2202). When present (streaming was on, full
|
|
@@ -69,6 +82,8 @@ pdgEmitManifest?: PdgEmitManifest) => Promise<{
|
|
|
69
82
|
skippedRels: number;
|
|
70
83
|
warnings: string[];
|
|
71
84
|
}>;
|
|
85
|
+
export declare const COPY_CSV_OPTS = "(HEADER=true, ESCAPE='\"', DELIM=',', QUOTE='\"', PARALLEL=false, auto_detect=false)";
|
|
86
|
+
export declare const getCopyQuery: (table: NodeTableName, filePath: string) => string;
|
|
72
87
|
/**
|
|
73
88
|
* Insert a single node to LadybugDB
|
|
74
89
|
* @param label - Node type (File, Function, Class, etc.)
|
|
@@ -740,6 +740,60 @@ const doInitLbug = async (dbPath, readOnly = false) => {
|
|
|
740
740
|
currentDbPath = dbPath;
|
|
741
741
|
return { db, conn };
|
|
742
742
|
};
|
|
743
|
+
/**
|
|
744
|
+
* Run a COPY, retrying once with IGNORE_ERRORS=true (which skips row-level
|
|
745
|
+
* errors) on first failure. On a second failure, hand the RAW retry error to
|
|
746
|
+
* `onError` — each call site formats + slices its own message (#2226 F5: node
|
|
747
|
+
* COPY slices to 200 chars and throws; relationship COPY slices to 80 and warns,
|
|
748
|
+
* so the helper must not pre-format and lose that distinction). `onError` may
|
|
749
|
+
* throw to propagate the failure.
|
|
750
|
+
*/
|
|
751
|
+
const copyCsvWithRetry = async (targetConn, copyQuery, onError) => {
|
|
752
|
+
try {
|
|
753
|
+
await queryAndDrain(targetConn, copyQuery);
|
|
754
|
+
}
|
|
755
|
+
catch {
|
|
756
|
+
try {
|
|
757
|
+
const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
|
|
758
|
+
await queryAndDrain(targetConn, retryQuery);
|
|
759
|
+
}
|
|
760
|
+
catch (retryErr) {
|
|
761
|
+
onError(retryErr);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
};
|
|
765
|
+
/**
|
|
766
|
+
* Bulk-COPY every node CSV sequentially on the single writable connection
|
|
767
|
+
* (LadybugDB allows one write txn at a time). Extracted from loadGraphToLbug so
|
|
768
|
+
* it can run either at the node-phase boundary — overlapping the relationship
|
|
769
|
+
* emit pass (#2203) — or after emit in the serial escape-hatch path. Each COPY
|
|
770
|
+
* keeps the IGNORE_ERRORS=true retry; a hard failure throws (no node rows ⇒ the
|
|
771
|
+
* relationship COPY would dangle on missing endpoints).
|
|
772
|
+
*/
|
|
773
|
+
const copyNodeCSVs = async (targetConn, nodeFileEntries, log, totalSteps) => {
|
|
774
|
+
let stepsDone = 0;
|
|
775
|
+
for (const [table, { csvPath, rows }] of nodeFileEntries) {
|
|
776
|
+
stepsDone++;
|
|
777
|
+
log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`);
|
|
778
|
+
const copyQuery = getCopyQuery(table, normalizeCopyPath(csvPath));
|
|
779
|
+
await copyCsvWithRetry(targetConn, copyQuery, (retryErr) => {
|
|
780
|
+
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
781
|
+
throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
/**
|
|
786
|
+
* Persist a KnowledgeGraph: stream CSVs, then bulk-COPY nodes (overlapped with
|
|
787
|
+
* relationship emit — see the body) and relationships.
|
|
788
|
+
*
|
|
789
|
+
* NOT TRANSACTIONAL (#2226). Each `COPY` commits independently and there is no
|
|
790
|
+
* surrounding transaction, so a failure partway through — a node `COPY` that
|
|
791
|
+
* throws at the FK barrier, a relationship `COPY` failure, or a `pdgEmitManifest`
|
|
792
|
+
* collision raised after node rows have already committed in the overlap path —
|
|
793
|
+
* leaves a partially-loaded DB. The caller surfaces the error; recovery is a
|
|
794
|
+
* `--force` re-analyze (a full rebuild), not a partial retry. Callers must not
|
|
795
|
+
* assume the DB is either fully loaded or untouched after a rejection.
|
|
796
|
+
*/
|
|
743
797
|
export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress,
|
|
744
798
|
/**
|
|
745
799
|
* Streamed PDG-emit manifest (#2202). When present (streaming was on, full
|
|
@@ -761,31 +815,87 @@ pdgEmitManifest) => {
|
|
|
761
815
|
// the gap that the DB-persistence path is un-timed today (the analyze
|
|
762
816
|
// "emit" number is the scope-resolution emit bucket, not this COPY path).
|
|
763
817
|
const PROF = process.env.PROF_LBUG_LOAD === '1';
|
|
818
|
+
// Escape hatch / differential oracle (#2203): force the legacy strictly-serial
|
|
819
|
+
// load order (emit everything, THEN COPY nodes, THEN COPY rels) instead of the
|
|
820
|
+
// default node-COPY ‖ rel-emit overlap. Lets an operator revert the behavior at
|
|
821
|
+
// runtime, and lets a test load the same graph both ways and assert identical
|
|
822
|
+
// persisted content.
|
|
823
|
+
const SERIAL = process.env.GITNEXUS_SERIAL_LBUG_LOAD === '1';
|
|
764
824
|
const mark = () => (PROF ? process.hrtime.bigint() : 0n);
|
|
765
825
|
const span = (a, b) => (Number(b - a) / 1e6).toFixed(1);
|
|
766
826
|
const tStart = mark();
|
|
767
827
|
const csvDir = resolveNativeSafeStorageDir(storagePath, 'csv');
|
|
768
|
-
|
|
769
|
-
const
|
|
770
|
-
//
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
//
|
|
774
|
-
//
|
|
775
|
-
//
|
|
776
|
-
|
|
828
|
+
// The single writable connection (LadybugDB is single-writer). Captured as a
|
|
829
|
+
// const so the node-COPY closure has a non-null reference — TS cannot narrow
|
|
830
|
+
// the reassignable module-level `conn` across the callback boundary.
|
|
831
|
+
const writeConn = conn;
|
|
832
|
+
const validTables = new Set(NODE_TABLES);
|
|
833
|
+
// Merge the streamed PDG-emit node CSVs (#2202) into a node-file map. Collision
|
|
834
|
+
// guard: a BasicBlock in the in-memory graph during a streamed run is an
|
|
835
|
+
// invariant violation (streamAllCSVsToDisk would also emit basicblock.csv), so
|
|
836
|
+
// fail loudly rather than drop rows (#2202 review #3). Runs at the node-phase
|
|
837
|
+
// boundary so the manifest BasicBlock table COPYs with the structural CSVs.
|
|
838
|
+
const mergeManifestNodeFiles = (nodeFilesMap) => {
|
|
839
|
+
if (!pdgEmitManifest)
|
|
840
|
+
return;
|
|
777
841
|
for (const [table, meta] of pdgEmitManifest.nodeFiles) {
|
|
778
|
-
|
|
779
|
-
// streamed run (streamAllCSVsToDisk then emitted a structural basicblock.csv).
|
|
780
|
-
// That is a streaming-invariant violation — fail loudly rather than
|
|
781
|
-
// silently overwrite one CSV with the other and drop its rows (#2202 review #3).
|
|
782
|
-
if (csvResult.nodeFiles.has(table)) {
|
|
842
|
+
if (nodeFilesMap.has(table)) {
|
|
783
843
|
throw new Error(`Streaming PDG manifest collides with a structural node CSV for "${table}" — ` +
|
|
784
844
|
`the in-memory graph should hold zero ${table} nodes when streaming. ` +
|
|
785
845
|
`A ${table} node leaked into the graph during a streamed emit.`);
|
|
786
846
|
}
|
|
787
|
-
|
|
847
|
+
nodeFilesMap.set(table, meta);
|
|
788
848
|
}
|
|
849
|
+
};
|
|
850
|
+
// Node COPY is the only DB write that can overlap relationship CSV emit: the
|
|
851
|
+
// rel pass writes new rel_*.csv files and never touches `conn`, while node COPY
|
|
852
|
+
// uses `conn` and never touches the rel files. We start node COPY at the
|
|
853
|
+
// node-phase boundary and let the rel pass run concurrently — the only
|
|
854
|
+
// single-writer-safe parallelism (#2203). The rel COPY still waits for node
|
|
855
|
+
// COPY (FK precondition), so the DB load order is unchanged.
|
|
856
|
+
let nodeCopyPromise;
|
|
857
|
+
let nodeCopyError;
|
|
858
|
+
const beginNodeCopy = (nodeFilesMap) => {
|
|
859
|
+
mergeManifestNodeFiles(nodeFilesMap);
|
|
860
|
+
const entries = [...nodeFilesMap.entries()];
|
|
861
|
+
// copyNodeCSVs logs node progress as step/total; it processes only node
|
|
862
|
+
// tables (the rel COPY has its own "Loading edges" progress line), so the
|
|
863
|
+
// denominator is the node-table count — not +1 reserving a rel step.
|
|
864
|
+
// .catch captures the failure so an overlapped (mid-emit) rejection cannot
|
|
865
|
+
// surface as an unhandled rejection; it is rethrown at the FK barrier below.
|
|
866
|
+
nodeCopyPromise = copyNodeCSVs(writeConn, entries, log, entries.length).catch((e) => {
|
|
867
|
+
nodeCopyError = e;
|
|
868
|
+
});
|
|
869
|
+
};
|
|
870
|
+
log('Streaming CSVs to disk...');
|
|
871
|
+
let csvResult;
|
|
872
|
+
try {
|
|
873
|
+
csvResult = SERIAL
|
|
874
|
+
? await streamAllCSVsToDisk(graph, repoPath, csvDir)
|
|
875
|
+
: await streamAllCSVsToDisk(graph, repoPath, csvDir, beginNodeCopy);
|
|
876
|
+
}
|
|
877
|
+
catch (emitErr) {
|
|
878
|
+
// Relationship emit failed. In overlap mode a node COPY may be in flight —
|
|
879
|
+
// settle it (the .catch above means this never rejects) before rethrowing so
|
|
880
|
+
// it cannot leak as an unhandled rejection.
|
|
881
|
+
if (nodeCopyPromise)
|
|
882
|
+
await nodeCopyPromise;
|
|
883
|
+
// If node COPY ALSO failed, emitErr wins the throw — log the swallowed node
|
|
884
|
+
// error so a half-loaded DB isn't misattributed to the emit failure alone.
|
|
885
|
+
if (nodeCopyError) {
|
|
886
|
+
logger.warn({ err: nodeCopyError }, '[lbug-load] node COPY also failed while relationship emit was failing');
|
|
887
|
+
}
|
|
888
|
+
throw emitErr;
|
|
889
|
+
}
|
|
890
|
+
const tCsv = mark();
|
|
891
|
+
// Merge the streamed PDG-emit per-pair rel CSVs (#2202) into the COPY plan —
|
|
892
|
+
// collision-guarded. Done BEFORE node COPY so the serial escape hatch detects a
|
|
893
|
+
// manifest/structural pair collision before committing any node rows (legacy
|
|
894
|
+
// parity with the pre-overlap path), and the overlap path detects it as early
|
|
895
|
+
// as csvResult is available. When a manifest is present, streaming was on and
|
|
896
|
+
// the in-memory graph held zero BasicBlocks, so a structural collision means a
|
|
897
|
+
// streaming-invariant violation — fail loudly rather than load corrupt data.
|
|
898
|
+
if (pdgEmitManifest) {
|
|
789
899
|
for (const [pairKey, meta] of pdgEmitManifest.relsByPair) {
|
|
790
900
|
if (csvResult.relsByPair.has(pairKey)) {
|
|
791
901
|
throw new Error(`Streaming PDG manifest collides with a structural relationship CSV for pair ` +
|
|
@@ -795,30 +905,17 @@ pdgEmitManifest) => {
|
|
|
795
905
|
csvResult.totalValidRels += meta.rows;
|
|
796
906
|
}
|
|
797
907
|
}
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
try {
|
|
810
|
-
await queryAndDrain(conn, copyQuery);
|
|
811
|
-
}
|
|
812
|
-
catch (err) {
|
|
813
|
-
try {
|
|
814
|
-
const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
|
|
815
|
-
await queryAndDrain(conn, retryQuery);
|
|
816
|
-
}
|
|
817
|
-
catch (retryErr) {
|
|
818
|
-
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
819
|
-
throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
|
|
820
|
-
}
|
|
821
|
-
}
|
|
908
|
+
// Serial path: all CSVs are on disk and node COPY has not started — start it
|
|
909
|
+
// here so the barrier below blocks on it exactly as the legacy path did.
|
|
910
|
+
if (SERIAL)
|
|
911
|
+
beginNodeCopy(csvResult.nodeFiles);
|
|
912
|
+
// FK barrier: node rows must exist before the relationship COPY resolves their
|
|
913
|
+
// endpoints. In overlap mode most of node COPY was hidden behind rel emit, so
|
|
914
|
+
// this await is the *residual* node-COPY time (≈0 when fully overlapped).
|
|
915
|
+
if (nodeCopyPromise)
|
|
916
|
+
await nodeCopyPromise;
|
|
917
|
+
if (nodeCopyError) {
|
|
918
|
+
throw nodeCopyError instanceof Error ? nodeCopyError : new Error(String(nodeCopyError));
|
|
822
919
|
}
|
|
823
920
|
const tCopyNodes = mark();
|
|
824
921
|
// Bulk COPY relationships. They were already routed to per-FROM→TO-label-pair
|
|
@@ -838,25 +935,17 @@ pdgEmitManifest) => {
|
|
|
838
935
|
pairIdx++;
|
|
839
936
|
const [fromLabel, toLabel] = pairKey.split('|');
|
|
840
937
|
const normalizedPath = normalizeCopyPath(pairCsvPath);
|
|
938
|
+
// PARALLEL=false is load-bearing here too — see COPY_CSV_OPTS (#2203 / kuzudb/kuzu#5778).
|
|
841
939
|
const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
|
|
842
940
|
if (pairIdx % 5 === 0 || rows > 1000) {
|
|
843
941
|
log(`Loading edges: ${pairIdx}/${relsByPair.size} types (${fromLabel} -> ${toLabel})`);
|
|
844
942
|
}
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
await queryAndDrain(conn, retryQuery);
|
|
852
|
-
}
|
|
853
|
-
catch (retryErr) {
|
|
854
|
-
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
855
|
-
warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
|
|
856
|
-
failedPairEdges += rows;
|
|
857
|
-
failedPairCsvPaths.add(pairCsvPath);
|
|
858
|
-
}
|
|
859
|
-
}
|
|
943
|
+
await copyCsvWithRetry(conn, copyQuery, (retryErr) => {
|
|
944
|
+
const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
|
|
945
|
+
warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
|
|
946
|
+
failedPairEdges += rows;
|
|
947
|
+
failedPairCsvPaths.add(pairCsvPath);
|
|
948
|
+
});
|
|
860
949
|
// Only delete if not in failedPairCsvPaths (needed for fallback)
|
|
861
950
|
if (!failedPairCsvPaths.has(pairCsvPath)) {
|
|
862
951
|
try {
|
|
@@ -919,7 +1008,12 @@ pdgEmitManifest) => {
|
|
|
919
1008
|
let totalNodeRows = 0;
|
|
920
1009
|
for (const [, { rows }] of csvResult.nodeFiles)
|
|
921
1010
|
totalNodeRows += rows;
|
|
922
|
-
|
|
1011
|
+
// `mode` records which load path ran. In overlap mode `csv-emit` is the wall
|
|
1012
|
+
// to streamAllCSVsToDisk's return (node COPY overlapped part of it) and
|
|
1013
|
+
// `copy-nodes` is the RESIDUAL node-COPY await after emit returned — it
|
|
1014
|
+
// trends to 0 as the overlap hides node COPY behind relationship emit. In
|
|
1015
|
+
// serial mode the buckets carry their legacy, disjoint meaning.
|
|
1016
|
+
logger.warn(`[lbug-load prof] mode=${SERIAL ? 'serial' : 'overlap'} csv-emit=${span(tStart, tCsv)}ms ` +
|
|
923
1017
|
`copy-nodes=${span(tCsv, tCopyNodes)}ms copy-rels=${span(tCopyNodes, tCopyRels)}ms ` +
|
|
924
1018
|
`fallback=${span(tCopyRels, tFallback)}ms total=${span(tStart, tEnd)}ms ` +
|
|
925
1019
|
`(${totalNodeRows} nodes, ${insertedRels} rels)`);
|
|
@@ -930,7 +1024,18 @@ pdgEmitManifest) => {
|
|
|
930
1024
|
// Source code content is full of backslashes which confuse the auto-detection.
|
|
931
1025
|
// We MUST explicitly set ESCAPE='"' to use RFC 4180 escaping, and disable auto_detect to prevent
|
|
932
1026
|
// LadybugDB from overriding our settings based on sample rows.
|
|
933
|
-
|
|
1027
|
+
//
|
|
1028
|
+
// PARALLEL=false IS LOAD-BEARING FOR CORRECTNESS — DO NOT FLIP IT (#2203).
|
|
1029
|
+
// LadybugDB's parallel CSV reader (Kuzu-derived; default PARALLEL=true) splits the
|
|
1030
|
+
// file into byte ranges parsed concurrently, and CANNOT determine line boundaries
|
|
1031
|
+
// when a quoted field contains an embedded newline — it errors with "Quoted newlines
|
|
1032
|
+
// are not supported in parallel CSV reader. Please specify PARALLEL=FALSE", or worse,
|
|
1033
|
+
// mis-parses silently (upstream kuzudb/kuzu#5778, still open). Our `content`/`text`
|
|
1034
|
+
// columns hold source code, so quoted multiline fields are guaranteed. PARALLEL=false
|
|
1035
|
+
// is therefore required, not conservative. The multiline-quoted round-trip in
|
|
1036
|
+
// test/integration/copy-parallel-invariant.test.ts fails loudly if this is ever flipped.
|
|
1037
|
+
// Exported so that test asserts the invariant statically as well.
|
|
1038
|
+
export const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
|
|
934
1039
|
// Multi-language table names that were created with backticks in CODE_ELEMENT_BASE
|
|
935
1040
|
// and must always be referenced with backticks in queries
|
|
936
1041
|
const BACKTICK_TABLES = new Set([
|
|
@@ -996,7 +1101,7 @@ const TABLES_WITH_EXPORTED = new Set([
|
|
|
996
1101
|
'Method',
|
|
997
1102
|
'CodeElement',
|
|
998
1103
|
]);
|
|
999
|
-
const getCopyQuery = (table, filePath) => {
|
|
1104
|
+
export const getCopyQuery = (table, filePath) => {
|
|
1000
1105
|
const t = escapeTableName(table);
|
|
1001
1106
|
if (table === 'File') {
|
|
1002
1107
|
return `COPY ${t}(id, name, filePath, content) FROM "${filePath}" ${COPY_CSV_OPTS}`;
|
package/package.json
CHANGED