npm - gitnexus - Versions diffs - 1.6.1 → 1.6.2-rc.10 - Mend

gitnexus 1.6.1 → 1.6.2-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +73 -0
package/dist/cli/analyze.js +23 -1
package/dist/core/embeddings/embedder.js +5 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +12 -3
package/dist/core/embeddings/embedding-pipeline.js +79 -29
package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
package/dist/core/group/extractors/grpc-extractor.js +28 -13
package/dist/core/group/extractors/http-route-extractor.js +35 -5
package/dist/core/group/extractors/manifest-extractor.js +66 -9
package/dist/core/group/sync.js +49 -1
package/dist/core/ingestion/language-provider.d.ts +24 -5
package/dist/core/ingestion/languages/c-cpp.js +2 -2
package/dist/core/ingestion/languages/dart.d.ts +1 -1
package/dist/core/ingestion/languages/dart.js +2 -2
package/dist/core/ingestion/languages/go.d.ts +1 -1
package/dist/core/ingestion/languages/go.js +2 -2
package/dist/core/ingestion/languages/ruby.js +1 -1
package/dist/core/ingestion/languages/swift.d.ts +1 -1
package/dist/core/ingestion/languages/swift.js +2 -2
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
package/dist/core/lbug/csv-generator.js +7 -4
package/dist/core/lbug/lbug-adapter.d.ts +38 -0
package/dist/core/lbug/lbug-adapter.js +189 -65
package/dist/core/lbug/schema.d.ts +7 -0
package/dist/core/lbug/schema.js +9 -1
package/dist/core/run-analyze.js +18 -4
package/dist/mcp/core/embedder.js +5 -0
package/dist/server/api.js +9 -1
package/package.json +6 -4
package/scripts/build-tree-sitter-proto.cjs +82 -0
package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
package/vendor/tree-sitter-proto/package.json +1 -7

package/dist/core/ingestion/language-provider.d.ts CHANGED Viewed

@@ -21,8 +21,25 @@ import type { SyntaxNode } from './utils/ast-helpers.js';
 import type { NodeLabel } from '../../_shared/index.js';
 /** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */
 export type CaptureMap = Record<string, SyntaxNode | undefined>;
-/** How a language handles imports — determines wildcard synthesis behavior. */
-export type ImportSemantics = 'named' | 'wildcard' | 'namespace';
+/**
+ * How a language handles imports — determines wildcard synthesis behavior.
+ *
+ * Import resolution is a graph-traversal policy with multiple distinct strategies,
+ * analogous to MRO for method resolution. Each tag picks a strategy:
+ *
+ * | Tag                   | Mechanism                                      | Traversal           | Languages                                  |
+ * |-----------------------|------------------------------------------------|---------------------|--------------------------------------------|
+ * | `named`               | Per-symbol imports                             | None (use-site)     | JS/TS, Java, C#, Rust, PHP, Kotlin, Vue    |
+ * | `wildcard-transitive` | Textual paste, symbols chain through files     | BFS closure         | C, C++ (future: Obj-C, Fortran, Nim)       |
+ * | `wildcard-leaf`       | Whole public API, single hop                   | None (direct only)  | Go, Ruby, Swift, Dart                      |
+ * | `namespace`           | Qualified handle; symbols resolved at call site| None at import      | Python                                     |
+ * | `explicit-reexport`   | Opt-in per-symbol re-export (SCAFFOLD)         | Topological DAG     | (future: TS `export *`, Rust `pub use`)    |
+ *
+ * The `explicit-reexport` tag is a compile-time scaffold; no provider claims it yet.
+ * It falls through to `wildcard-leaf` behavior in synthesis so today's TS/Rust
+ * handling is unchanged. A future PR will implement the DAG walk for `export *`.
+ */
+export type ImportSemantics = 'named' | 'wildcard-transitive' | 'wildcard-leaf' | 'namespace' | 'explicit-reexport';
 /**
  * Everything a language needs to provide.
  * Required fields must be explicitly set; optional fields have defaults
@@ -51,10 +68,12 @@ interface LanguageProviderConfig {
     /** Named binding extraction from import statements.
      *  Default: undefined (language uses wildcard/whole-module imports). */
     readonly namedBindingExtractor?: NamedBindingExtractorFn;
-    /** How this language handles imports.
+    /** How this language handles imports. See `ImportSemantics` for the full taxonomy.
      *  - 'named': per-symbol imports (JS/TS, Java, C#, Rust, PHP, Kotlin)
-     *  - 'wildcard': whole-module imports, needs synthesis (Go, Ruby, C/C++, Swift)
-     *  - 'namespace': namespace imports, needs moduleAliasMap (Python)
+     *  - 'wildcard-transitive': textual-include closure; imports chain through files (C, C++)
+     *  - 'wildcard-leaf': whole-module single-hop imports; no transitive chaining (Go, Ruby, Swift, Dart)
+     *  - 'namespace': qualified namespace imports, needs moduleAliasMap (Python)
+     *  - 'explicit-reexport': opt-in per-symbol re-export (scaffold; no provider uses yet)
      *  Default: 'named'. */
     readonly importSemantics?: ImportSemantics;
     /** Language-specific transformation of raw import path text before resolution.

package/dist/core/ingestion/languages/c-cpp.js CHANGED Viewed

@@ -293,7 +293,7 @@ export const cProvider = defineLanguage({
     typeConfig: cCppConfig,
     exportChecker: cCppExportChecker,
     importResolver: resolveCImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-transitive',
     fieldExtractor: createFieldExtractor(cFieldConfig),
     methodExtractor: createMethodExtractor({
         ...cMethodConfig,
@@ -310,7 +310,7 @@ export const cppProvider = defineLanguage({
     typeConfig: cCppConfig,
     exportChecker: cCppExportChecker,
     importResolver: resolveCppImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-transitive',
     mroStrategy: 'leftmost-base',
     fieldExtractor: createFieldExtractor(cppFieldConfig),
     methodExtractor: createMethodExtractor({

package/dist/core/ingestion/languages/dart.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Dart Language Provider
  *
  * Dart traits:
- *   - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
+ *   - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
  *   - exportChecker: public if no leading underscore
  *   - Dart SDK imports (dart:*) and external packages are skipped
  *   - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body

package/dist/core/ingestion/languages/dart.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Dart Language Provider
  *
  * Dart traits:
- *   - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
+ *   - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
  *   - exportChecker: public if no leading underscore
  *   - Dart SDK imports (dart:*) and external packages are skipped
  *   - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
@@ -83,7 +83,7 @@ export const dartProvider = defineLanguage({
     typeConfig: dartConfig,
     exportChecker: dartExportChecker,
     importResolver: resolveDartImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     fieldExtractor: createFieldExtractor(dartFieldConfig),
     methodExtractor: createMethodExtractor(dartMethodConfig),
     classExtractor: createClassExtractor({

package/dist/core/ingestion/languages/go.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Go traits:
- *   - importSemantics: 'wildcard' (Go imports entire packages)
+ *   - importSemantics: 'wildcard-leaf' (Go imports entire packages)
  *   - callRouter: present (Go method calls may need routing)
  */
 export declare const goProvider: import("../language-provider.js").LanguageProvider;

package/dist/core/ingestion/languages/go.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Go traits:
- *   - importSemantics: 'wildcard' (Go imports entire packages)
+ *   - importSemantics: 'wildcard-leaf' (Go imports entire packages)
  *   - callRouter: present (Go method calls may need routing)
  */
 import { SupportedLanguages } from '../../../_shared/index.js';
@@ -26,7 +26,7 @@ export const goProvider = defineLanguage({
     typeConfig: goConfig,
     exportChecker: goExportChecker,
     importResolver: resolveGoImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     fieldExtractor: createFieldExtractor(goFieldConfig),
     methodExtractor: createMethodExtractor(goMethodConfig),
     classExtractor: createClassExtractor({

package/dist/core/ingestion/languages/ruby.js CHANGED Viewed

@@ -99,7 +99,7 @@ export const rubyProvider = defineLanguage({
     exportChecker: rubyExportChecker,
     importResolver: resolveRubyImport,
     callRouter: routeRubyCall,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     resolveEnclosingOwner(node) {
         // Ruby singleton_class (class << self) should resolve to the enclosing
         // class or module for owner/container resolution (HAS_METHOD edges, class IDs).

package/dist/core/ingestion/languages/swift.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Swift traits:
- *   - importSemantics: 'wildcard' (Swift imports entire modules)
+ *   - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
  *   - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
  *   - implicitImportWirer: all files in the same SPM target see each other
  */

package/dist/core/ingestion/languages/swift.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Swift traits:
- *   - importSemantics: 'wildcard' (Swift imports entire modules)
+ *   - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
  *   - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
  *   - implicitImportWirer: all files in the same SPM target see each other
  */
@@ -221,7 +221,7 @@ export const swiftProvider = defineLanguage({
     typeConfig: swiftConfig,
     exportChecker: swiftExportChecker,
     importResolver: resolveSwiftImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     heritageDefaultEdge: 'IMPLEMENTS',
     fieldExtractor: createFieldExtractor(swiftFieldConfig),
     methodExtractor: createMethodExtractor({

package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts CHANGED Viewed

@@ -14,12 +14,47 @@
  */
 import type { KnowledgeGraph } from '../../graph/types.js';
 import type { createResolutionContext } from '../model/resolution-context.js';
-import { SupportedLanguages } from '../../../_shared/index.js';
+import type { SupportedLanguages } from '../../../_shared/index.js';
 /** Check if a language uses wildcard (whole-module) import semantics. */
 export declare function isWildcardImportLanguage(lang: SupportedLanguages): boolean;
 /** Check if a language needs synthesis before call resolution.
  *  True for wildcard-import languages AND namespace-import languages (Python). */
 export declare function needsSynthesis(lang: SupportedLanguages): boolean;
+/**
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
+ *
+ * Textual-include languages chain symbols through files: if `dict.c` includes
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
+ * all three files. This helper walks the include graph (combining both the
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
+ * closure is stable.
+ *
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
+ * on a first-seen-wins basis, so this closure's ordering determines which
+ * declaration wins when multiple headers export the same name (e.g. overloaded
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
+ * different headers). We therefore:
+ *   1. Seed the closure with direct imports in declaration order (matches the
+ *      order of `#include` directives in the source file).
+ *   2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
+ *      closer headers are seen before deeper ones.
+ *
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
+ * include guards.
+ *
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
+ * where one translation unit can transitively reach tens of thousands of
+ * headers. Partial closures still yield useful bindings for the cluster of
+ * headers closest to the importer, which is what overload resolution and
+ * cross-file call resolution care about.
+ *
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
+ */
+export declare function expandTransitiveIncludeClosure(directImports: Iterable<string>, importMap: ReadonlyMap<string, ReadonlySet<string>>, graphImports: ReadonlyMap<string, ReadonlySet<string>>): Set<string>;
 /**
  * Synthesize namedImportMap entries for languages with whole-module imports.
  *

package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js CHANGED Viewed

@@ -34,9 +34,26 @@ const IMPORTABLE_SYMBOL_LABELS = new Set([
 /** Max synthetic bindings per importing file — prevents memory bloat
  *  for C/C++ files that include many large headers. */
 const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
+/** Max files allowed in a single transitive include closure. Guards against
+ *  OOM on pathological C/C++ codebases (boost, Linux kernel-style monoheaders)
+ *  where a single translation unit can transitively reach many thousands of
+ *  headers. When the cap is hit, BFS expansion stops early — the file still
+ *  synthesizes bindings from the partial closure rather than failing. */
+const MAX_TRANSITIVE_CLOSURE_SIZE = 5000;
+/** Import semantics tags whose languages need synthesis of whole-module imports.
+ *  `wildcard-transitive` (C/C++) and `wildcard-leaf` (Go, Ruby, Swift, Dart) are
+ *  the file-based wildcard strategies. `explicit-reexport` is a scaffold tag —
+ *  no provider uses it yet, but it goes through the same leaf-style synthesis
+ *  path today because a re-exporter is still an importer; only the extra DAG
+ *  walk to surface re-exported symbols is missing (future work). */
+const WILDCARD_SEMANTICS = new Set([
+    'wildcard-transitive',
+    'wildcard-leaf',
+    'explicit-reexport',
+]);
 /** Languages with whole-module import semantics (derived from providers at module load). */
 const WILDCARD_LANGUAGES = new Set(Object.values(providers)
-    .filter((p) => p.importSemantics === 'wildcard')
+    .filter((p) => WILDCARD_SEMANTICS.has(p.importSemantics))
     .map((p) => p.id));
 /** Languages that need binding synthesis before call resolution. */
 const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
@@ -51,6 +68,82 @@ export function isWildcardImportLanguage(lang) {
 export function needsSynthesis(lang) {
     return SYNTHESIS_LANGUAGES.has(lang);
 }
+// ── Strategy implementations ───────────────────────────────────────────────
+/**
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
+ *
+ * Textual-include languages chain symbols through files: if `dict.c` includes
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
+ * all three files. This helper walks the include graph (combining both the
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
+ * closure is stable.
+ *
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
+ * on a first-seen-wins basis, so this closure's ordering determines which
+ * declaration wins when multiple headers export the same name (e.g. overloaded
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
+ * different headers). We therefore:
+ *   1. Seed the closure with direct imports in declaration order (matches the
+ *      order of `#include` directives in the source file).
+ *   2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
+ *      closer headers are seen before deeper ones.
+ *
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
+ * include guards.
+ *
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
+ * where one translation unit can transitively reach tens of thousands of
+ * headers. Partial closures still yield useful bindings for the cluster of
+ * headers closest to the importer, which is what overload resolution and
+ * cross-file call resolution care about.
+ *
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
+ */
+export function expandTransitiveIncludeClosure(directImports, importMap, graphImports) {
+    const closure = new Set();
+    const queue = [];
+    let head = 0; // O(1) dequeue: advance the head index instead of shift()-ing.
+    const tryEnqueue = (file) => {
+        if (closure.has(file))
+            return true;
+        if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
+            return false;
+        closure.add(file);
+        queue.push(file);
+        return true;
+    };
+    // Seed direct imports in declaration order (see JSDoc on order-sensitivity).
+    for (const f of directImports) {
+        if (!tryEnqueue(f))
+            break;
+    }
+    // True BFS for transitive reach: head-index FIFO preserves the "closer
+    // headers first" ordering that overload resolution depends on.
+    while (head < queue.length) {
+        if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
+            break;
+        const file = queue[head++];
+        const nested = importMap.get(file);
+        if (nested) {
+            for (const n of nested) {
+                if (!tryEnqueue(n))
+                    break;
+            }
+        }
+        const nestedGraph = graphImports.get(file);
+        if (nestedGraph) {
+            for (const n of nestedGraph) {
+                if (!tryEnqueue(n))
+                    break;
+            }
+        }
+    }
+    return closure;
+}
 // ── Main synthesis function ────────────────────────────────────────────────
 /**
  * Synthesize namedImportMap entries for languages with whole-module imports.
@@ -133,16 +226,61 @@ export function synthesizeWildcardImportBindings(graph, ctx) {
             }
         }
     };
-    // Synthesize from ctx.importMap (Ruby, C/C++, Swift file-based imports)
+    /**
+     * Dispatch wildcard synthesis by the file's language provider strategy.
+     *
+     * Strategy tags (see `ImportSemantics`):
+     *   - `wildcard-transitive`: expand the include closure first (C/C++ #include
+     *     chains — e.g. `dict.c` → `server.h` → `dict.h` so `dictFind` resolves
+     *     across header chains)
+     *   - `wildcard-leaf`: synthesize from direct imports only (Go, Ruby, Swift, Dart)
+     *   - `explicit-reexport`: scaffold tag; falls through to leaf behavior.
+     *     TODO(#821): implement re-export DAG walk for TS `export *` / Rust
+     *     `pub use`. The leaf fallthrough preserves today's TS/Rust behavior
+     *     (their direct imports still synthesize correctly); only the extra
+     *     re-export DAG walk for barrel-file correctness is missing.
+     *   - `namespace` / `named`: no-op here (namespace handled in Loop 3 below,
+     *     named needs no synthesis).
+     *
+     * Used by both Loop 1 (ctx.importMap) and Loop 2 (graphImports) so a future
+     * transitive-import language whose edges arrive via graphImports gets closure
+     * expansion consistently regardless of edge source.
+     */
+    const dispatchSynthesis = (filePath, importedFiles, provider) => {
+        switch (provider.importSemantics) {
+            case 'wildcard-transitive':
+                synthesizeForFile(filePath, expandTransitiveIncludeClosure(importedFiles, ctx.importMap, graphImports));
+                return;
+            case 'wildcard-leaf':
+            case 'explicit-reexport':
+                synthesizeForFile(filePath, importedFiles);
+                return;
+            case 'namespace':
+            case 'named':
+                return;
+            default: {
+                const _exhaustive = provider.importSemantics;
+                void _exhaustive;
+            }
+        }
+    };
+    // Loop 1: synthesize from ctx.importMap (Ruby, C/C++, Swift, Dart file-based imports).
     for (const [filePath, importedFiles] of ctx.importMap) {
         const lang = getLanguageFromFilename(filePath);
         if (!lang || !isWildcardImportLanguage(lang))
             continue;
-        synthesizeForFile(filePath, importedFiles);
+        const provider = getProviderForFile(filePath);
+        if (!provider)
+            continue;
+        dispatchSynthesis(filePath, importedFiles, provider);
     }
-    // Synthesize from graph IMPORTS edges (Go and other wildcard-import languages)
+    // Loop 2: synthesize from graph IMPORTS edges (Go and other wildcard-import
+    // languages whose edges live in the graph rather than ctx.importMap).
     for (const [filePath, importedFiles] of graphImports) {
-        synthesizeForFile(filePath, importedFiles);
+        const provider = getProviderForFile(filePath);
+        if (!provider)
+            continue;
+        dispatchSynthesis(filePath, importedFiles, provider);
     }
     // Build Python module-alias maps for namespace-import languages.
     // `import models` in app.py → moduleAliasMap['app.py']['models'] = 'models.py'

package/dist/core/lbug/csv-generator.js CHANGED Viewed

@@ -246,14 +246,17 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
         Interface: interfaceWriter,
         CodeElement: codeElemWriter,
     };
-    const seenFileIds = new Set();
+    // Deduplicate all node types — the pipeline can produce duplicate IDs across
+    // all symbol types (Class, Method, Function, etc.), not just File nodes.
+    // A single Set covering every label prevents PK violations on COPY.
+    const seenNodeIds = new Set();
     // --- SINGLE PASS over all nodes ---
     for (const node of graph.iterNodes()) {
+        if (seenNodeIds.has(node.id))
+            continue;
+        seenNodeIds.add(node.id);
         switch (node.label) {
             case 'File': {
-                if (seenFileIds.has(node.id))
-                    break;
-                seenFileIds.add(node.id);
                 const content = await extractContent(node, contentCache);
                 await fileWriter.addRow([
                     escapeCSVField(node.id),

package/dist/core/lbug/lbug-adapter.d.ts CHANGED Viewed

@@ -1,5 +1,33 @@
 import lbug from '@ladybugdb/core';
 import { KnowledgeGraph } from '../graph/types.js';
+/** Factory for creating WriteStreams — injectable for testing. */
+export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
+/** Result of splitting the relationship CSV into per-label-pair files. */
+export interface RelCsvSplitResult {
+    relHeader: string;
+    relsByPairMeta: Map<string, {
+        csvPath: string;
+        rows: number;
+    }>;
+    pairWriteStreams: Map<string, import('fs').WriteStream>;
+    skippedRels: number;
+    totalValidRels: number;
+}
+/**
+ * Split a relationship CSV into per-label-pair files on disk.
+ *
+ * Streams the CSV line-by-line, routing each relationship to a file named
+ * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
+ * drain listener per stream at a time, and readline resumes only when ALL
+ * backpressured streams have drained.
+ *
+ * @param csvPath       Path to the combined relationship CSV
+ * @param csvDir        Directory to write per-pair CSV files
+ * @param validTables   Set of valid node table names
+ * @param getNodeLabel  Function to extract the label from a node ID
+ * @param wsFactory     Optional WriteStream factory (defaults to fs.createWriteStream)
+ */
+export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
 /** Expose the current Database for pool adapter reuse in tests. */
 export declare const getDatabase: () => lbug.Database | null;
 /**
@@ -70,8 +98,18 @@ export declare const loadCachedEmbeddings: () => Promise<{
     embeddings: Array<{
         nodeId: string;
         embedding: number[];
+        contentHash?: string;
     }>;
 }>;
+/**
+ * Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
+ * Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
+ * Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
+ * Returns undefined if the CodeEmbedding table does not exist.
+ *
+ * @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
+ */
+export declare const fetchExistingEmbeddingHashes: (execQuery: (cypher: string) => Promise<any[]>) => Promise<Map<string, string> | undefined>;
 export declare const closeLbug: () => Promise<void>;
 export declare const isLbugReady: () => boolean;
 /**