gitnexus 1.6.1 → 1.6.2-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +73 -0
  2. package/dist/cli/analyze.js +23 -1
  3. package/dist/core/embeddings/embedder.js +5 -0
  4. package/dist/core/embeddings/embedding-pipeline.d.ts +12 -3
  5. package/dist/core/embeddings/embedding-pipeline.js +79 -29
  6. package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
  7. package/dist/core/group/extractors/grpc-extractor.js +28 -13
  8. package/dist/core/group/extractors/http-route-extractor.js +35 -5
  9. package/dist/core/group/extractors/manifest-extractor.js +66 -9
  10. package/dist/core/group/sync.js +49 -1
  11. package/dist/core/ingestion/language-provider.d.ts +24 -5
  12. package/dist/core/ingestion/languages/c-cpp.js +2 -2
  13. package/dist/core/ingestion/languages/dart.d.ts +1 -1
  14. package/dist/core/ingestion/languages/dart.js +2 -2
  15. package/dist/core/ingestion/languages/go.d.ts +1 -1
  16. package/dist/core/ingestion/languages/go.js +2 -2
  17. package/dist/core/ingestion/languages/ruby.js +1 -1
  18. package/dist/core/ingestion/languages/swift.d.ts +1 -1
  19. package/dist/core/ingestion/languages/swift.js +2 -2
  20. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
  21. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
  22. package/dist/core/lbug/csv-generator.js +7 -4
  23. package/dist/core/lbug/lbug-adapter.d.ts +38 -0
  24. package/dist/core/lbug/lbug-adapter.js +189 -65
  25. package/dist/core/lbug/schema.d.ts +7 -0
  26. package/dist/core/lbug/schema.js +9 -1
  27. package/dist/core/run-analyze.js +18 -4
  28. package/dist/mcp/core/embedder.js +5 -0
  29. package/dist/server/api.js +9 -1
  30. package/package.json +6 -4
  31. package/scripts/build-tree-sitter-proto.cjs +82 -0
  32. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  33. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  34. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  35. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  36. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  37. package/vendor/tree-sitter-proto/package.json +1 -7
@@ -21,8 +21,25 @@ import type { SyntaxNode } from './utils/ast-helpers.js';
21
21
  import type { NodeLabel } from '../../_shared/index.js';
22
22
  /** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */
23
23
  export type CaptureMap = Record<string, SyntaxNode | undefined>;
24
- /** How a language handles imports — determines wildcard synthesis behavior. */
25
- export type ImportSemantics = 'named' | 'wildcard' | 'namespace';
24
+ /**
25
+ * How a language handles imports — determines wildcard synthesis behavior.
26
+ *
27
+ * Import resolution is a graph-traversal policy with multiple distinct strategies,
28
+ * analogous to MRO for method resolution. Each tag picks a strategy:
29
+ *
30
+ * | Tag | Mechanism | Traversal | Languages |
31
+ * |-----------------------|------------------------------------------------|---------------------|--------------------------------------------|
32
+ * | `named` | Per-symbol imports | None (use-site) | JS/TS, Java, C#, Rust, PHP, Kotlin, Vue |
33
+ * | `wildcard-transitive` | Textual paste, symbols chain through files | BFS closure | C, C++ (future: Obj-C, Fortran, Nim) |
34
+ * | `wildcard-leaf` | Whole public API, single hop | None (direct only) | Go, Ruby, Swift, Dart |
35
+ * | `namespace` | Qualified handle; symbols resolved at call site| None at import | Python |
36
+ * | `explicit-reexport` | Opt-in per-symbol re-export (SCAFFOLD) | Topological DAG | (future: TS `export *`, Rust `pub use`) |
37
+ *
38
+ * The `explicit-reexport` tag is a compile-time scaffold; no provider claims it yet.
39
+ * It falls through to `wildcard-leaf` behavior in synthesis so today's TS/Rust
40
+ * handling is unchanged. A future PR will implement the DAG walk for `export *`.
41
+ */
42
+ export type ImportSemantics = 'named' | 'wildcard-transitive' | 'wildcard-leaf' | 'namespace' | 'explicit-reexport';
26
43
  /**
27
44
  * Everything a language needs to provide.
28
45
  * Required fields must be explicitly set; optional fields have defaults
@@ -51,10 +68,12 @@ interface LanguageProviderConfig {
51
68
  /** Named binding extraction from import statements.
52
69
  * Default: undefined (language uses wildcard/whole-module imports). */
53
70
  readonly namedBindingExtractor?: NamedBindingExtractorFn;
54
- /** How this language handles imports.
71
+ /** How this language handles imports. See `ImportSemantics` for the full taxonomy.
55
72
  * - 'named': per-symbol imports (JS/TS, Java, C#, Rust, PHP, Kotlin)
56
- * - 'wildcard': whole-module imports, needs synthesis (Go, Ruby, C/C++, Swift)
57
- * - 'namespace': namespace imports, needs moduleAliasMap (Python)
73
+ * - 'wildcard-transitive': textual-include closure; imports chain through files (C, C++)
74
+ * - 'wildcard-leaf': whole-module single-hop imports; no transitive chaining (Go, Ruby, Swift, Dart)
75
+ * - 'namespace': qualified namespace imports, needs moduleAliasMap (Python)
76
+ * - 'explicit-reexport': opt-in per-symbol re-export (scaffold; no provider uses yet)
58
77
  * Default: 'named'. */
59
78
  readonly importSemantics?: ImportSemantics;
60
79
  /** Language-specific transformation of raw import path text before resolution.
@@ -293,7 +293,7 @@ export const cProvider = defineLanguage({
293
293
  typeConfig: cCppConfig,
294
294
  exportChecker: cCppExportChecker,
295
295
  importResolver: resolveCImport,
296
- importSemantics: 'wildcard',
296
+ importSemantics: 'wildcard-transitive',
297
297
  fieldExtractor: createFieldExtractor(cFieldConfig),
298
298
  methodExtractor: createMethodExtractor({
299
299
  ...cMethodConfig,
@@ -310,7 +310,7 @@ export const cppProvider = defineLanguage({
310
310
  typeConfig: cCppConfig,
311
311
  exportChecker: cCppExportChecker,
312
312
  importResolver: resolveCppImport,
313
- importSemantics: 'wildcard',
313
+ importSemantics: 'wildcard-transitive',
314
314
  mroStrategy: 'leftmost-base',
315
315
  fieldExtractor: createFieldExtractor(cppFieldConfig),
316
316
  methodExtractor: createMethodExtractor({
@@ -2,7 +2,7 @@
2
2
  * Dart Language Provider
3
3
  *
4
4
  * Dart traits:
5
- * - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
5
+ * - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
6
6
  * - exportChecker: public if no leading underscore
7
7
  * - Dart SDK imports (dart:*) and external packages are skipped
8
8
  * - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
@@ -2,7 +2,7 @@
2
2
  * Dart Language Provider
3
3
  *
4
4
  * Dart traits:
5
- * - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
5
+ * - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
6
6
  * - exportChecker: public if no leading underscore
7
7
  * - Dart SDK imports (dart:*) and external packages are skipped
8
8
  * - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
@@ -83,7 +83,7 @@ export const dartProvider = defineLanguage({
83
83
  typeConfig: dartConfig,
84
84
  exportChecker: dartExportChecker,
85
85
  importResolver: resolveDartImport,
86
- importSemantics: 'wildcard',
86
+ importSemantics: 'wildcard-leaf',
87
87
  fieldExtractor: createFieldExtractor(dartFieldConfig),
88
88
  methodExtractor: createMethodExtractor(dartMethodConfig),
89
89
  classExtractor: createClassExtractor({
@@ -5,7 +5,7 @@
5
5
  * LanguageProvider, following the Strategy pattern used by the pipeline.
6
6
  *
7
7
  * Key Go traits:
8
- * - importSemantics: 'wildcard' (Go imports entire packages)
8
+ * - importSemantics: 'wildcard-leaf' (Go imports entire packages)
9
9
  * - callRouter: present (Go method calls may need routing)
10
10
  */
11
11
  export declare const goProvider: import("../language-provider.js").LanguageProvider;
@@ -5,7 +5,7 @@
5
5
  * LanguageProvider, following the Strategy pattern used by the pipeline.
6
6
  *
7
7
  * Key Go traits:
8
- * - importSemantics: 'wildcard' (Go imports entire packages)
8
+ * - importSemantics: 'wildcard-leaf' (Go imports entire packages)
9
9
  * - callRouter: present (Go method calls may need routing)
10
10
  */
11
11
  import { SupportedLanguages } from '../../../_shared/index.js';
@@ -26,7 +26,7 @@ export const goProvider = defineLanguage({
26
26
  typeConfig: goConfig,
27
27
  exportChecker: goExportChecker,
28
28
  importResolver: resolveGoImport,
29
- importSemantics: 'wildcard',
29
+ importSemantics: 'wildcard-leaf',
30
30
  fieldExtractor: createFieldExtractor(goFieldConfig),
31
31
  methodExtractor: createMethodExtractor(goMethodConfig),
32
32
  classExtractor: createClassExtractor({
@@ -99,7 +99,7 @@ export const rubyProvider = defineLanguage({
99
99
  exportChecker: rubyExportChecker,
100
100
  importResolver: resolveRubyImport,
101
101
  callRouter: routeRubyCall,
102
- importSemantics: 'wildcard',
102
+ importSemantics: 'wildcard-leaf',
103
103
  resolveEnclosingOwner(node) {
104
104
  // Ruby singleton_class (class << self) should resolve to the enclosing
105
105
  // class or module for owner/container resolution (HAS_METHOD edges, class IDs).
@@ -5,7 +5,7 @@
5
5
  * LanguageProvider, following the Strategy pattern used by the pipeline.
6
6
  *
7
7
  * Key Swift traits:
8
- * - importSemantics: 'wildcard' (Swift imports entire modules)
8
+ * - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
9
9
  * - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
10
10
  * - implicitImportWirer: all files in the same SPM target see each other
11
11
  */
@@ -5,7 +5,7 @@
5
5
  * LanguageProvider, following the Strategy pattern used by the pipeline.
6
6
  *
7
7
  * Key Swift traits:
8
- * - importSemantics: 'wildcard' (Swift imports entire modules)
8
+ * - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
9
9
  * - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
10
10
  * - implicitImportWirer: all files in the same SPM target see each other
11
11
  */
@@ -221,7 +221,7 @@ export const swiftProvider = defineLanguage({
221
221
  typeConfig: swiftConfig,
222
222
  exportChecker: swiftExportChecker,
223
223
  importResolver: resolveSwiftImport,
224
- importSemantics: 'wildcard',
224
+ importSemantics: 'wildcard-leaf',
225
225
  heritageDefaultEdge: 'IMPLEMENTS',
226
226
  fieldExtractor: createFieldExtractor(swiftFieldConfig),
227
227
  methodExtractor: createMethodExtractor({
@@ -14,12 +14,47 @@
14
14
  */
15
15
  import type { KnowledgeGraph } from '../../graph/types.js';
16
16
  import type { createResolutionContext } from '../model/resolution-context.js';
17
- import { SupportedLanguages } from '../../../_shared/index.js';
17
+ import type { SupportedLanguages } from '../../../_shared/index.js';
18
18
  /** Check if a language uses wildcard (whole-module) import semantics. */
19
19
  export declare function isWildcardImportLanguage(lang: SupportedLanguages): boolean;
20
20
  /** Check if a language needs synthesis before call resolution.
21
21
  * True for wildcard-import languages AND namespace-import languages (Python). */
22
22
  export declare function needsSynthesis(lang: SupportedLanguages): boolean;
23
+ /**
24
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
25
+ *
26
+ * Textual-include languages chain symbols through files: if `dict.c` includes
27
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
28
+ * all three files. This helper walks the include graph (combining both the
29
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
30
+ * closure is stable.
31
+ *
32
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
33
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
34
+ * on a first-seen-wins basis, so this closure's ordering determines which
35
+ * declaration wins when multiple headers export the same name (e.g. overloaded
36
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
37
+ * different headers). We therefore:
38
+ * 1. Seed the closure with direct imports in declaration order (matches the
39
+ * order of `#include` directives in the source file).
40
+ * 2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
41
+ * closer headers are seen before deeper ones.
42
+ *
43
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
44
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
45
+ * include guards.
46
+ *
47
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
48
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
49
+ * where one translation unit can transitively reach tens of thousands of
50
+ * headers. Partial closures still yield useful bindings for the cluster of
51
+ * headers closest to the importer, which is what overload resolution and
52
+ * cross-file call resolution care about.
53
+ *
54
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
55
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
56
+ */
57
+ export declare function expandTransitiveIncludeClosure(directImports: Iterable<string>, importMap: ReadonlyMap<string, ReadonlySet<string>>, graphImports: ReadonlyMap<string, ReadonlySet<string>>): Set<string>;
23
58
  /**
24
59
  * Synthesize namedImportMap entries for languages with whole-module imports.
25
60
  *
@@ -34,9 +34,26 @@ const IMPORTABLE_SYMBOL_LABELS = new Set([
34
34
  /** Max synthetic bindings per importing file — prevents memory bloat
35
35
  * for C/C++ files that include many large headers. */
36
36
  const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
37
+ /** Max files allowed in a single transitive include closure. Guards against
38
+ * OOM on pathological C/C++ codebases (boost, Linux kernel-style monoheaders)
39
+ * where a single translation unit can transitively reach many thousands of
40
+ * headers. When the cap is hit, BFS expansion stops early — the file still
41
+ * synthesizes bindings from the partial closure rather than failing. */
42
+ const MAX_TRANSITIVE_CLOSURE_SIZE = 5000;
43
+ /** Import semantics tags whose languages need synthesis of whole-module imports.
44
+ * `wildcard-transitive` (C/C++) and `wildcard-leaf` (Go, Ruby, Swift, Dart) are
45
+ * the file-based wildcard strategies. `explicit-reexport` is a scaffold tag —
46
+ * no provider uses it yet, but it goes through the same leaf-style synthesis
47
+ * path today because a re-exporter is still an importer; only the extra DAG
48
+ * walk to surface re-exported symbols is missing (future work). */
49
+ const WILDCARD_SEMANTICS = new Set([
50
+ 'wildcard-transitive',
51
+ 'wildcard-leaf',
52
+ 'explicit-reexport',
53
+ ]);
37
54
  /** Languages with whole-module import semantics (derived from providers at module load). */
38
55
  const WILDCARD_LANGUAGES = new Set(Object.values(providers)
39
- .filter((p) => p.importSemantics === 'wildcard')
56
+ .filter((p) => WILDCARD_SEMANTICS.has(p.importSemantics))
40
57
  .map((p) => p.id));
41
58
  /** Languages that need binding synthesis before call resolution. */
42
59
  const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
@@ -51,6 +68,82 @@ export function isWildcardImportLanguage(lang) {
51
68
  export function needsSynthesis(lang) {
52
69
  return SYNTHESIS_LANGUAGES.has(lang);
53
70
  }
71
+ // ── Strategy implementations ───────────────────────────────────────────────
72
+ /**
73
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
74
+ *
75
+ * Textual-include languages chain symbols through files: if `dict.c` includes
76
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
77
+ * all three files. This helper walks the include graph (combining both the
78
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
79
+ * closure is stable.
80
+ *
81
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
82
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
83
+ * on a first-seen-wins basis, so this closure's ordering determines which
84
+ * declaration wins when multiple headers export the same name (e.g. overloaded
85
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
86
+ * different headers). We therefore:
87
+ * 1. Seed the closure with direct imports in declaration order (matches the
88
+ * order of `#include` directives in the source file).
89
+ * 2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
90
+ * closer headers are seen before deeper ones.
91
+ *
92
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
93
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
94
+ * include guards.
95
+ *
96
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
97
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
98
+ * where one translation unit can transitively reach tens of thousands of
99
+ * headers. Partial closures still yield useful bindings for the cluster of
100
+ * headers closest to the importer, which is what overload resolution and
101
+ * cross-file call resolution care about.
102
+ *
103
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
104
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
105
+ */
106
+ export function expandTransitiveIncludeClosure(directImports, importMap, graphImports) {
107
+ const closure = new Set();
108
+ const queue = [];
109
+ let head = 0; // O(1) dequeue: advance the head index instead of shift()-ing.
110
+ const tryEnqueue = (file) => {
111
+ if (closure.has(file))
112
+ return true;
113
+ if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
114
+ return false;
115
+ closure.add(file);
116
+ queue.push(file);
117
+ return true;
118
+ };
119
+ // Seed direct imports in declaration order (see JSDoc on order-sensitivity).
120
+ for (const f of directImports) {
121
+ if (!tryEnqueue(f))
122
+ break;
123
+ }
124
+ // True BFS for transitive reach: head-index FIFO preserves the "closer
125
+ // headers first" ordering that overload resolution depends on.
126
+ while (head < queue.length) {
127
+ if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
128
+ break;
129
+ const file = queue[head++];
130
+ const nested = importMap.get(file);
131
+ if (nested) {
132
+ for (const n of nested) {
133
+ if (!tryEnqueue(n))
134
+ break;
135
+ }
136
+ }
137
+ const nestedGraph = graphImports.get(file);
138
+ if (nestedGraph) {
139
+ for (const n of nestedGraph) {
140
+ if (!tryEnqueue(n))
141
+ break;
142
+ }
143
+ }
144
+ }
145
+ return closure;
146
+ }
54
147
  // ── Main synthesis function ────────────────────────────────────────────────
55
148
  /**
56
149
  * Synthesize namedImportMap entries for languages with whole-module imports.
@@ -133,16 +226,61 @@ export function synthesizeWildcardImportBindings(graph, ctx) {
133
226
  }
134
227
  }
135
228
  };
136
- // Synthesize from ctx.importMap (Ruby, C/C++, Swift file-based imports)
229
+ /**
230
+ * Dispatch wildcard synthesis by the file's language provider strategy.
231
+ *
232
+ * Strategy tags (see `ImportSemantics`):
233
+ * - `wildcard-transitive`: expand the include closure first (C/C++ #include
234
+ * chains — e.g. `dict.c` → `server.h` → `dict.h` so `dictFind` resolves
235
+ * across header chains)
236
+ * - `wildcard-leaf`: synthesize from direct imports only (Go, Ruby, Swift, Dart)
237
+ * - `explicit-reexport`: scaffold tag; falls through to leaf behavior.
238
+ * TODO(#821): implement re-export DAG walk for TS `export *` / Rust
239
+ * `pub use`. The leaf fallthrough preserves today's TS/Rust behavior
240
+ * (their direct imports still synthesize correctly); only the extra
241
+ * re-export DAG walk for barrel-file correctness is missing.
242
+ * - `namespace` / `named`: no-op here (namespace handled in Loop 3 below,
243
+ * named needs no synthesis).
244
+ *
245
+ * Used by both Loop 1 (ctx.importMap) and Loop 2 (graphImports) so a future
246
+ * transitive-import language whose edges arrive via graphImports gets closure
247
+ * expansion consistently regardless of edge source.
248
+ */
249
+ const dispatchSynthesis = (filePath, importedFiles, provider) => {
250
+ switch (provider.importSemantics) {
251
+ case 'wildcard-transitive':
252
+ synthesizeForFile(filePath, expandTransitiveIncludeClosure(importedFiles, ctx.importMap, graphImports));
253
+ return;
254
+ case 'wildcard-leaf':
255
+ case 'explicit-reexport':
256
+ synthesizeForFile(filePath, importedFiles);
257
+ return;
258
+ case 'namespace':
259
+ case 'named':
260
+ return;
261
+ default: {
262
+ const _exhaustive = provider.importSemantics;
263
+ void _exhaustive;
264
+ }
265
+ }
266
+ };
267
+ // Loop 1: synthesize from ctx.importMap (Ruby, C/C++, Swift, Dart file-based imports).
137
268
  for (const [filePath, importedFiles] of ctx.importMap) {
138
269
  const lang = getLanguageFromFilename(filePath);
139
270
  if (!lang || !isWildcardImportLanguage(lang))
140
271
  continue;
141
- synthesizeForFile(filePath, importedFiles);
272
+ const provider = getProviderForFile(filePath);
273
+ if (!provider)
274
+ continue;
275
+ dispatchSynthesis(filePath, importedFiles, provider);
142
276
  }
143
- // Synthesize from graph IMPORTS edges (Go and other wildcard-import languages)
277
+ // Loop 2: synthesize from graph IMPORTS edges (Go and other wildcard-import
278
+ // languages whose edges live in the graph rather than ctx.importMap).
144
279
  for (const [filePath, importedFiles] of graphImports) {
145
- synthesizeForFile(filePath, importedFiles);
280
+ const provider = getProviderForFile(filePath);
281
+ if (!provider)
282
+ continue;
283
+ dispatchSynthesis(filePath, importedFiles, provider);
146
284
  }
147
285
  // Build Python module-alias maps for namespace-import languages.
148
286
  // `import models` in app.py → moduleAliasMap['app.py']['models'] = 'models.py'
@@ -246,14 +246,17 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
246
246
  Interface: interfaceWriter,
247
247
  CodeElement: codeElemWriter,
248
248
  };
249
- const seenFileIds = new Set();
249
+ // Deduplicate all node types — the pipeline can produce duplicate IDs across
250
+ // all symbol types (Class, Method, Function, etc.), not just File nodes.
251
+ // A single Set covering every label prevents PK violations on COPY.
252
+ const seenNodeIds = new Set();
250
253
  // --- SINGLE PASS over all nodes ---
251
254
  for (const node of graph.iterNodes()) {
255
+ if (seenNodeIds.has(node.id))
256
+ continue;
257
+ seenNodeIds.add(node.id);
252
258
  switch (node.label) {
253
259
  case 'File': {
254
- if (seenFileIds.has(node.id))
255
- break;
256
- seenFileIds.add(node.id);
257
260
  const content = await extractContent(node, contentCache);
258
261
  await fileWriter.addRow([
259
262
  escapeCSVField(node.id),
@@ -1,5 +1,33 @@
1
1
  import lbug from '@ladybugdb/core';
2
2
  import { KnowledgeGraph } from '../graph/types.js';
3
+ /** Factory for creating WriteStreams — injectable for testing. */
4
+ export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
5
+ /** Result of splitting the relationship CSV into per-label-pair files. */
6
+ export interface RelCsvSplitResult {
7
+ relHeader: string;
8
+ relsByPairMeta: Map<string, {
9
+ csvPath: string;
10
+ rows: number;
11
+ }>;
12
+ pairWriteStreams: Map<string, import('fs').WriteStream>;
13
+ skippedRels: number;
14
+ totalValidRels: number;
15
+ }
16
+ /**
17
+ * Split a relationship CSV into per-label-pair files on disk.
18
+ *
19
+ * Streams the CSV line-by-line, routing each relationship to a file named
20
+ * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
21
+ * drain listener per stream at a time, and readline resumes only when ALL
22
+ * backpressured streams have drained.
23
+ *
24
+ * @param csvPath Path to the combined relationship CSV
25
+ * @param csvDir Directory to write per-pair CSV files
26
+ * @param validTables Set of valid node table names
27
+ * @param getNodeLabel Function to extract the label from a node ID
28
+ * @param wsFactory Optional WriteStream factory (defaults to fs.createWriteStream)
29
+ */
30
+ export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
3
31
  /** Expose the current Database for pool adapter reuse in tests. */
4
32
  export declare const getDatabase: () => lbug.Database | null;
5
33
  /**
@@ -70,8 +98,18 @@ export declare const loadCachedEmbeddings: () => Promise<{
70
98
  embeddings: Array<{
71
99
  nodeId: string;
72
100
  embedding: number[];
101
+ contentHash?: string;
73
102
  }>;
74
103
  }>;
104
+ /**
105
+ * Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
106
+ * Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
107
+ * Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
108
+ * Returns undefined if the CodeEmbedding table does not exist.
109
+ *
110
+ * @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
111
+ */
112
+ export declare const fetchExistingEmbeddingHashes: (execQuery: (cypher: string) => Promise<any[]>) => Promise<Map<string, string> | undefined>;
75
113
  export declare const closeLbug: () => Promise<void>;
76
114
  export declare const isLbugReady: () => boolean;
77
115
  /**