npm - gitnexus - Versions diffs - 1.6.1 → 1.6.2-rc.2 - Mend

gitnexus 1.6.1 → 1.6.2-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +73 -0
package/dist/cli/analyze.js +23 -1
package/dist/core/embeddings/embedding-pipeline.js +2 -2
package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
package/dist/core/group/extractors/grpc-extractor.js +28 -13
package/dist/core/group/extractors/http-route-extractor.js +35 -5
package/dist/core/group/extractors/manifest-extractor.js +46 -4
package/dist/core/ingestion/language-provider.d.ts +24 -5
package/dist/core/ingestion/languages/c-cpp.js +2 -2
package/dist/core/ingestion/languages/dart.d.ts +1 -1
package/dist/core/ingestion/languages/dart.js +2 -2
package/dist/core/ingestion/languages/go.d.ts +1 -1
package/dist/core/ingestion/languages/go.js +2 -2
package/dist/core/ingestion/languages/ruby.js +1 -1
package/dist/core/ingestion/languages/swift.d.ts +1 -1
package/dist/core/ingestion/languages/swift.js +2 -2
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
package/dist/core/lbug/csv-generator.js +7 -4
package/dist/core/lbug/lbug-adapter.d.ts +28 -0
package/dist/core/lbug/lbug-adapter.js +125 -62
package/dist/core/run-analyze.js +1 -1
package/dist/server/api.js +22 -1
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -234,6 +234,79 @@ Installed automatically by both `gitnexus analyze` (per-repo) and `gitnexus setu
 - Node.js >= 18
 - Git repository (uses git for commit tracking)
+## Release candidates
+Stable releases publish to the default `latest` dist-tag. When a pull request
+with non-documentation changes merges into `main`, an automated workflow also
+publishes a prerelease build under the `rc` dist-tag, so early adopters can
+try in-flight fixes without waiting for the next stable cut. (Docs-only
+merges are skipped.)
+```bash
+# Try the latest release candidate (pre-stable — may change at any time)
+npm install -g gitnexus@rc
+# — or —
+npx gitnexus@rc analyze
+```
+Release-candidate versions follow the standard semver prerelease format
+`X.Y.Z-rc.N`, where `X.Y.Z` is the next stable target (bumped from the
+current `latest` by patch by default; `minor` or `major` when kicking off a
+bigger cycle) and `N` increments per published rc. Example sequence:
+`1.6.2-rc.1`, `1.6.2-rc.2`, …, then once `1.6.2` ships stable,
+`1.6.3-rc.1`. See the [Releases page](https://github.com/abhigyanpatwari/GitNexus/releases)
+for the full list; stable `latest` is unaffected.
+## Troubleshooting
+### `Cannot destructure property 'package' of 'node.target' as it is null`
+This crash was caused by a dependency URL format that is incompatible with
+certain npm/arborist versions ([npm/cli#8126](https://github.com/npm/cli/issues/8126)).
+It is fixed in **gitnexus v1.6.2+**. Upgrade to the latest version:
+```bash
+npx gitnexus@latest analyze          # always uses the newest release
+# — or —
+npm install -g gitnexus@latest       # upgrade a global install
+```
+If you still hit npm install issues after upgrading, these generic workarounds
+may help:
+```bash
+npm install -g npm@latest            # update npm itself
+npm cache clean --force              # clear a possibly corrupt cache
+```
+### Installation fails with native module errors
+Some optional language grammars (Dart, Kotlin, Swift) require native compilation. If they fail, GitNexus still works — those languages will be skipped.
+If `npm install -g gitnexus` fails on native modules:
+```bash
+# Ensure build tools are available (Linux/macOS)
+# Ubuntu/Debian: sudo apt install python3 make g++
+# macOS: xcode-select --install
+# Retry installation
+npm install -g gitnexus
+```
+### Analysis runs out of memory
+For very large repositories:
+```bash
+# Increase Node.js heap size
+NODE_OPTIONS="--max-old-space-size=16384" npx gitnexus analyze
+# Exclude large directories
+echo "vendor/" >> .gitnexusignore
+echo "dist/" >> .gitnexusignore
+```
 ## Privacy
 - All processing happens locally on your machine

package/dist/cli/analyze.js CHANGED Viewed

@@ -232,7 +232,7 @@ export const analyzeCommand = async (inputPath, options) => {
         bar.stop();
         const msg = err.message || String(err);
         console.error(`\n  Analysis failed: ${msg}\n`);
-        // Provide helpful guidance for known large-repo failure modes
+        // Provide helpful guidance for known failure modes
         if (msg.includes('Maximum call stack size exceeded') ||
             msg.includes('call stack') ||
             msg.includes('Map maximum size') ||
@@ -248,6 +248,28 @@ export const analyzeCommand = async (inputPath, options) => {
             console.error('    3. Increase stack size: NODE_OPTIONS="--stack-size=4096"');
             console.error('');
         }
+        else if (msg.includes('ERESOLVE') || msg.includes('Could not resolve dependency')) {
+            // Note: the original arborist "Cannot destructure property 'package' of
+            // 'node.target'" crash happens inside npm *before* gitnexus code runs,
+            // so it can't be caught here.  This branch handles dependency-resolution
+            // errors that surface at runtime (e.g. dynamic require failures).
+            console.error('  This looks like an npm dependency resolution issue.');
+            console.error('  Suggestions:');
+            console.error('    1. Clear the npm cache:    npm cache clean --force');
+            console.error('    2. Update npm:             npm install -g npm@latest');
+            console.error('    3. Reinstall gitnexus:     npm install -g gitnexus@latest');
+            console.error('    4. Or try npx directly:    npx gitnexus@latest analyze');
+            console.error('');
+        }
+        else if (msg.includes('MODULE_NOT_FOUND') ||
+            msg.includes('Cannot find module') ||
+            msg.includes('ERR_MODULE_NOT_FOUND')) {
+            console.error('  A required module could not be loaded. The installation may be corrupt.');
+            console.error('  Suggestions:');
+            console.error('    1. Reinstall:   npm install -g gitnexus@latest');
+            console.error('    2. Clear cache: npm cache clean --force && npx gitnexus@latest analyze');
+            console.error('');
+        }
         process.exitCode = 1;
         return;
     }

package/dist/core/embeddings/embedding-pipeline.js CHANGED Viewed

@@ -67,8 +67,8 @@ const queryEmbeddableNodes = async (executeQuery) => {
  * that occurs when UPDATEing nodes with large content fields
  */
 const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
-    // INSERT into separate embedding table - much more memory efficient!
-    const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
+    // MERGE instead of CREATE — idempotent, handles concurrent analyzes and partial prior runs
+    const cypher = `MERGE (e:CodeEmbedding {nodeId: $nodeId}) SET e.embedding = $embedding`;
     const paramsList = updates.map((u) => ({ nodeId: u.id, embedding: u.embedding }));
     await executeWithReusedStatement(cypher, paramsList);
 };

package/dist/core/group/extractors/grpc-extractor.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export interface ProtoServiceInfo {
     protoPath: string;
 }
 export declare function buildProtoMap(repoPath: string): Promise<Map<string, ProtoServiceInfo[]>>;
-export declare function resolveProtoConflict(_serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
+export declare function resolveProtoConflict(serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
 export declare function serviceContractId(pkg: string, serviceName: string): string;
 export declare class GrpcExtractor implements ContractExtractor {
     type: "grpc";

package/dist/core/group/extractors/grpc-extractor.js CHANGED Viewed

@@ -263,23 +263,31 @@ export async function buildProtoMap(repoPath) {
     const { servicesByName } = await buildProtoContext(repoPath);
     return servicesByName;
 }
-export function resolveProtoConflict(_serviceName, sourceFilePath, candidates) {
+export function resolveProtoConflict(serviceName, sourceFilePath, candidates) {
     if (candidates.length === 0)
         return null;
     if (candidates.length === 1)
         return candidates[0];
     const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
-    let best = candidates[0];
-    let bestScore = -1;
-    for (const c of candidates) {
+    const scored = candidates.map((c) => {
         const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
-        const sharedRun = longestSharedSegmentRun(sourceDir, protoDir);
-        if (sharedRun > bestScore) {
-            bestScore = sharedRun;
-            best = c;
-        }
+        return { candidate: c, score: longestSharedSegmentRun(sourceDir, protoDir) };
+    });
+    let maxScore = -1;
+    for (const s of scored) {
+        if (s.score > maxScore)
+            maxScore = s.score;
     }
-    return best;
+    const winners = scored.filter((s) => s.score === maxScore);
+    // Path heuristic cannot uniquely identify a winner — refuse to guess.
+    // Ties (including all-zero ties) would otherwise silently merge unrelated
+    // services under a fabricated package-qualified contract id.
+    if (winners.length !== 1) {
+        const paths = candidates.map((c) => c.protoPath).join(', ');
+        console.warn(`[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`);
+        return null;
+    }
+    return winners[0].candidate;
 }
 export function serviceContractId(pkg, serviceName) {
     const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
@@ -339,7 +347,9 @@ export class GrpcExtractor {
                 continue;
             }
             for (const d of detections) {
-                out.push(this.detectionToContract(d, rel, protoMap));
+                const contract = this.detectionToContract(d, rel, protoMap);
+                if (contract)
+                    out.push(contract);
             }
         }
         return this.dedupe(out);
@@ -352,8 +362,13 @@ export class GrpcExtractor {
      * based on whether the proto map had an entry.
      */
     detectionToContract(d, filePath, protoMap) {
-        const candidates = protoMap.get(d.serviceName);
-        const proto = resolveProtoConflict(d.serviceName, filePath, candidates ?? []);
+        const candidates = protoMap.get(d.serviceName) ?? [];
+        const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
+        // If there were proto candidates but resolution was ambiguous, skip
+        // contract emission rather than fabricating a package-qualified id from
+        // an arbitrary candidate. resolveProtoConflict already warned.
+        if (candidates.length > 0 && proto === null)
+            return null;
         const pkg = proto?.package ?? '';
         const cid = d.methodName
             ? contractId(pkg, d.serviceName, d.methodName)

package/dist/core/group/extractors/http-route-extractor.js CHANGED Viewed

@@ -214,7 +214,29 @@ export class HttpRouteExtractor {
             const providerDetections = detections.filter((d) => d.role === 'provider');
             let handlerName = null;
             const normalizedRoute = normalizeHttpPath(routePath);
-            const match = providerDetections.find((d) => normalizeHttpPath(d.path) === normalizedRoute);
+            // Candidates share the same normalized path. When multiple
+            // detections at the same path exist (e.g. GET + POST /api/orders
+            // in one router), a blind `.find()` silently returned the first
+            // verb — attaching the wrong handler and, when method was not
+            // already pinned by the route reason, the wrong method too.
+            // Disambiguate by method when we know it; refuse to guess when
+            // we don't.
+            const candidates = providerDetections.filter((d) => normalizeHttpPath(d.path) === normalizedRoute);
+            let match;
+            const ambiguousCandidates = !method && candidates.length > 1;
+            if (method) {
+                match = candidates.find((d) => d.method === method);
+            }
+            else if (candidates.length === 1) {
+                match = candidates[0];
+            }
+            // else: multiple candidates + unknown method → leave match
+            // undefined so handlerName stays null and skip symbol
+            // enrichment below, keeping the file-basename fallback instead
+            // of letting pickSymbolUid silently pick the first Function /
+            // Method in the file (which reintroduces the mis-attribution
+            // we were trying to avoid). Method stays at the conservative
+            // 'GET' default set below.
             if (match) {
                 if (!method)
                     method = match.method;
@@ -228,7 +250,7 @@ export class HttpRouteExtractor {
             let symbolName = path.basename(filePath) || 'handler';
             let symPath = filePath;
             const fileId = row.fileId ?? row[0];
-            if (fileId) {
+            if (fileId && !ambiguousCandidates) {
                 try {
                     const syms = await db(CONTAINS_QUERY, { fileId });
                     if (syms.length > 0) {
@@ -308,9 +330,17 @@ export class HttpRouteExtractor {
             // Prefer the plugin's detected method if we can find a matching
             // fetch/axios call in the same file.
             const detections = filePath ? getDetections(filePath) : [];
-            const inferred = detections.find((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
-            if (inferred)
-                method = inferred.method;
+            // Symmetric to the provider path: if multiple consumer calls in
+            // the same file share the same normalized path (e.g. a GET
+            // fetch AND a POST fetch to `/api/orders`), `.find()` silently
+            // picked the first verb and keyed the contract id on the wrong
+            // method. With no upstream method signal here, refuse to guess
+            // when candidates are ambiguous — leave `method` at its
+            // conservative 'GET' default.
+            const consumerCandidates = detections.filter((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
+            if (consumerCandidates.length === 1) {
+                method = consumerCandidates[0].method;
+            }
             const cid = contractIdFor(method, pathNorm);
             let symbolUid = '';
             let symbolName = 'fetch';

package/dist/core/group/extractors/manifest-extractor.js CHANGED Viewed

@@ -16,6 +16,34 @@ function normalizeRoutePath(raw) {
         return '/';
     return collapsed.replace(/\/+$/, '');
 }
+/**
+ * Split a manifest HTTP contract into its optional `METHOD::` prefix and
+ * its path portion.
+ *
+ * `buildContractId` recommends the explicit-method form `GET::/api/orders`
+ * in group.yaml; if we hand that raw string to `normalizeRoutePath` we get
+ * `/GET::/api/orders`, which can never match `Route.name = "/api/orders"`
+ * in the graph. This helper extracts the path portion so the Cypher
+ * lookup uses the canonical route name.
+ *
+ * The method prefix regex mirrors `buildContractId` (line ~251) for
+ * symmetry: case-insensitive `[A-Za-z]+` followed by `::`. The captured
+ * method is upper-cased for downstream use; method-constrained matching
+ * against `HANDLES_ROUTE` is a future enhancement (not yet wired).
+ *
+ * Edge cases:
+ *  - `"::/api/orders"` — empty method portion, no alpha prefix match, so
+ *    the whole string is treated as a bare path (matches buildContractId
+ *    which also requires `[A-Za-z]+`).
+ *  - `"GET::"` — method with empty path, returns `{ method: 'GET', path: '' }`;
+ *    `normalizeRoutePath('')` resolves to `/` for caller.
+ */
+function parseHttpContract(raw) {
+    const match = raw.match(/^([A-Za-z]+)::/);
+    if (!match)
+        return { method: null, path: raw };
+    return { method: match[1].toUpperCase(), path: raw.slice(match[0].length) };
+}
 /**
  * Stable synthetic symbolUid for a manifest-declared contract whose target
  * symbol could not be resolved against the per-repo graph (resolveSymbol
@@ -111,7 +139,15 @@ export class ManifestExtractor {
                 // core/ingestion/pipeline.ts ensureSlash + generateId('Route', ...)).
                 // Normalize the manifest contract the same way so a user-written
                 // "/api/orders" matches "api/orders" in the graph.
-                const normalized = normalizeRoutePath(link.contract);
+                //
+                // The contract may also use the explicit-method form "GET::/api/orders"
+                // recommended by buildContractId. Strip the METHOD:: prefix before
+                // normalizing — otherwise `normalizeRoutePath('GET::/api/orders')`
+                // returns `/GET::/api/orders` and never matches Route.name. The
+                // captured method is not yet used to constrain the Cypher query
+                // (method-aware HANDLES_ROUTE matching is a future enhancement).
+                const parsed = parseHttpContract(link.contract);
+                const normalized = normalizeRoutePath(parsed.path);
                 rows = await executor(`MATCH (handler)-[r:CodeRelation {type: 'HANDLES_ROUTE'}]->(route:Route)
            WHERE route.name = $normalized
            RETURN handler.id AS uid, handler.name AS name, handler.filePath AS filePath
@@ -214,9 +250,15 @@ export class ManifestExtractor {
     buildContractId(type, contract) {
         switch (type) {
             case 'http': {
-                if (/^[A-Za-z]+::/.test(contract))
-                    return `http::${contract}`;
-                return `http::*::${contract}`;
+                // Canonicalize method casing and path separators so logically
+                // equivalent inputs (`get::/api/orders` vs `GET::/api/orders`,
+                // or trailing-slash variants) produce the same contractId and
+                // matching `manifestSymbolUid` fallback. Without this, raw
+                // user casing leaks into cross-impact join keys and fragments
+                // matches across repos.
+                const { method, path: rawPath } = parseHttpContract(contract);
+                const normalizedPath = normalizeRoutePath(rawPath);
+                return method ? `http::${method}::${normalizedPath}` : `http::*::${normalizedPath}`;
             }
             case 'grpc':
                 return `grpc::${contract}`;

package/dist/core/ingestion/language-provider.d.ts CHANGED Viewed

@@ -21,8 +21,25 @@ import type { SyntaxNode } from './utils/ast-helpers.js';
 import type { NodeLabel } from '../../_shared/index.js';
 /** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */
 export type CaptureMap = Record<string, SyntaxNode | undefined>;
-/** How a language handles imports — determines wildcard synthesis behavior. */
-export type ImportSemantics = 'named' | 'wildcard' | 'namespace';
+/**
+ * How a language handles imports — determines wildcard synthesis behavior.
+ *
+ * Import resolution is a graph-traversal policy with multiple distinct strategies,
+ * analogous to MRO for method resolution. Each tag picks a strategy:
+ *
+ * | Tag                   | Mechanism                                      | Traversal           | Languages                                  |
+ * |-----------------------|------------------------------------------------|---------------------|--------------------------------------------|
+ * | `named`               | Per-symbol imports                             | None (use-site)     | JS/TS, Java, C#, Rust, PHP, Kotlin, Vue    |
+ * | `wildcard-transitive` | Textual paste, symbols chain through files     | BFS closure         | C, C++ (future: Obj-C, Fortran, Nim)       |
+ * | `wildcard-leaf`       | Whole public API, single hop                   | None (direct only)  | Go, Ruby, Swift, Dart                      |
+ * | `namespace`           | Qualified handle; symbols resolved at call site| None at import      | Python                                     |
+ * | `explicit-reexport`   | Opt-in per-symbol re-export (SCAFFOLD)         | Topological DAG     | (future: TS `export *`, Rust `pub use`)    |
+ *
+ * The `explicit-reexport` tag is a compile-time scaffold; no provider claims it yet.
+ * It falls through to `wildcard-leaf` behavior in synthesis so today's TS/Rust
+ * handling is unchanged. A future PR will implement the DAG walk for `export *`.
+ */
+export type ImportSemantics = 'named' | 'wildcard-transitive' | 'wildcard-leaf' | 'namespace' | 'explicit-reexport';
 /**
  * Everything a language needs to provide.
  * Required fields must be explicitly set; optional fields have defaults
@@ -51,10 +68,12 @@ interface LanguageProviderConfig {
     /** Named binding extraction from import statements.
      *  Default: undefined (language uses wildcard/whole-module imports). */
     readonly namedBindingExtractor?: NamedBindingExtractorFn;
-    /** How this language handles imports.
+    /** How this language handles imports. See `ImportSemantics` for the full taxonomy.
      *  - 'named': per-symbol imports (JS/TS, Java, C#, Rust, PHP, Kotlin)
-     *  - 'wildcard': whole-module imports, needs synthesis (Go, Ruby, C/C++, Swift)
-     *  - 'namespace': namespace imports, needs moduleAliasMap (Python)
+     *  - 'wildcard-transitive': textual-include closure; imports chain through files (C, C++)
+     *  - 'wildcard-leaf': whole-module single-hop imports; no transitive chaining (Go, Ruby, Swift, Dart)
+     *  - 'namespace': qualified namespace imports, needs moduleAliasMap (Python)
+     *  - 'explicit-reexport': opt-in per-symbol re-export (scaffold; no provider uses yet)
      *  Default: 'named'. */
     readonly importSemantics?: ImportSemantics;
     /** Language-specific transformation of raw import path text before resolution.

package/dist/core/ingestion/languages/c-cpp.js CHANGED Viewed

@@ -293,7 +293,7 @@ export const cProvider = defineLanguage({
     typeConfig: cCppConfig,
     exportChecker: cCppExportChecker,
     importResolver: resolveCImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-transitive',
     fieldExtractor: createFieldExtractor(cFieldConfig),
     methodExtractor: createMethodExtractor({
         ...cMethodConfig,
@@ -310,7 +310,7 @@ export const cppProvider = defineLanguage({
     typeConfig: cCppConfig,
     exportChecker: cCppExportChecker,
     importResolver: resolveCppImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-transitive',
     mroStrategy: 'leftmost-base',
     fieldExtractor: createFieldExtractor(cppFieldConfig),
     methodExtractor: createMethodExtractor({

package/dist/core/ingestion/languages/dart.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Dart Language Provider
  *
  * Dart traits:
- *   - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
+ *   - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
  *   - exportChecker: public if no leading underscore
  *   - Dart SDK imports (dart:*) and external packages are skipped
  *   - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body

package/dist/core/ingestion/languages/dart.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Dart Language Provider
  *
  * Dart traits:
- *   - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
+ *   - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
  *   - exportChecker: public if no leading underscore
  *   - Dart SDK imports (dart:*) and external packages are skipped
  *   - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
@@ -83,7 +83,7 @@ export const dartProvider = defineLanguage({
     typeConfig: dartConfig,
     exportChecker: dartExportChecker,
     importResolver: resolveDartImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     fieldExtractor: createFieldExtractor(dartFieldConfig),
     methodExtractor: createMethodExtractor(dartMethodConfig),
     classExtractor: createClassExtractor({

package/dist/core/ingestion/languages/go.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Go traits:
- *   - importSemantics: 'wildcard' (Go imports entire packages)
+ *   - importSemantics: 'wildcard-leaf' (Go imports entire packages)
  *   - callRouter: present (Go method calls may need routing)
  */
 export declare const goProvider: import("../language-provider.js").LanguageProvider;

package/dist/core/ingestion/languages/go.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Go traits:
- *   - importSemantics: 'wildcard' (Go imports entire packages)
+ *   - importSemantics: 'wildcard-leaf' (Go imports entire packages)
  *   - callRouter: present (Go method calls may need routing)
  */
 import { SupportedLanguages } from '../../../_shared/index.js';
@@ -26,7 +26,7 @@ export const goProvider = defineLanguage({
     typeConfig: goConfig,
     exportChecker: goExportChecker,
     importResolver: resolveGoImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     fieldExtractor: createFieldExtractor(goFieldConfig),
     methodExtractor: createMethodExtractor(goMethodConfig),
     classExtractor: createClassExtractor({

package/dist/core/ingestion/languages/ruby.js CHANGED Viewed

@@ -99,7 +99,7 @@ export const rubyProvider = defineLanguage({
     exportChecker: rubyExportChecker,
     importResolver: resolveRubyImport,
     callRouter: routeRubyCall,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     resolveEnclosingOwner(node) {
         // Ruby singleton_class (class << self) should resolve to the enclosing
         // class or module for owner/container resolution (HAS_METHOD edges, class IDs).

package/dist/core/ingestion/languages/swift.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Swift traits:
- *   - importSemantics: 'wildcard' (Swift imports entire modules)
+ *   - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
  *   - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
  *   - implicitImportWirer: all files in the same SPM target see each other
  */

package/dist/core/ingestion/languages/swift.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * LanguageProvider, following the Strategy pattern used by the pipeline.
  *
  * Key Swift traits:
- *   - importSemantics: 'wildcard' (Swift imports entire modules)
+ *   - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
  *   - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
  *   - implicitImportWirer: all files in the same SPM target see each other
  */
@@ -221,7 +221,7 @@ export const swiftProvider = defineLanguage({
     typeConfig: swiftConfig,
     exportChecker: swiftExportChecker,
     importResolver: resolveSwiftImport,
-    importSemantics: 'wildcard',
+    importSemantics: 'wildcard-leaf',
     heritageDefaultEdge: 'IMPLEMENTS',
     fieldExtractor: createFieldExtractor(swiftFieldConfig),
     methodExtractor: createMethodExtractor({

package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts CHANGED Viewed

@@ -14,12 +14,47 @@
  */
 import type { KnowledgeGraph } from '../../graph/types.js';
 import type { createResolutionContext } from '../model/resolution-context.js';
-import { SupportedLanguages } from '../../../_shared/index.js';
+import type { SupportedLanguages } from '../../../_shared/index.js';
 /** Check if a language uses wildcard (whole-module) import semantics. */
 export declare function isWildcardImportLanguage(lang: SupportedLanguages): boolean;
 /** Check if a language needs synthesis before call resolution.
  *  True for wildcard-import languages AND namespace-import languages (Python). */
 export declare function needsSynthesis(lang: SupportedLanguages): boolean;
+/**
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
+ *
+ * Textual-include languages chain symbols through files: if `dict.c` includes
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
+ * all three files. This helper walks the include graph (combining both the
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
+ * closure is stable.
+ *
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
+ * on a first-seen-wins basis, so this closure's ordering determines which
+ * declaration wins when multiple headers export the same name (e.g. overloaded
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
+ * different headers). We therefore:
+ *   1. Seed the closure with direct imports in declaration order (matches the
+ *      order of `#include` directives in the source file).
+ *   2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
+ *      closer headers are seen before deeper ones.
+ *
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
+ * include guards.
+ *
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
+ * where one translation unit can transitively reach tens of thousands of
+ * headers. Partial closures still yield useful bindings for the cluster of
+ * headers closest to the importer, which is what overload resolution and
+ * cross-file call resolution care about.
+ *
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
+ */
+export declare function expandTransitiveIncludeClosure(directImports: Iterable<string>, importMap: ReadonlyMap<string, ReadonlySet<string>>, graphImports: ReadonlyMap<string, ReadonlySet<string>>): Set<string>;
 /**
  * Synthesize namedImportMap entries for languages with whole-module imports.
  *

package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js CHANGED Viewed

@@ -34,9 +34,26 @@ const IMPORTABLE_SYMBOL_LABELS = new Set([
 /** Max synthetic bindings per importing file — prevents memory bloat
  *  for C/C++ files that include many large headers. */
 const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
+/** Max files allowed in a single transitive include closure. Guards against
+ *  OOM on pathological C/C++ codebases (boost, Linux kernel-style monoheaders)
+ *  where a single translation unit can transitively reach many thousands of
+ *  headers. When the cap is hit, BFS expansion stops early — the file still
+ *  synthesizes bindings from the partial closure rather than failing. */
+const MAX_TRANSITIVE_CLOSURE_SIZE = 5000;
+/** Import semantics tags whose languages need synthesis of whole-module imports.
+ *  `wildcard-transitive` (C/C++) and `wildcard-leaf` (Go, Ruby, Swift, Dart) are
+ *  the file-based wildcard strategies. `explicit-reexport` is a scaffold tag —
+ *  no provider uses it yet, but it goes through the same leaf-style synthesis
+ *  path today because a re-exporter is still an importer; only the extra DAG
+ *  walk to surface re-exported symbols is missing (future work). */
+const WILDCARD_SEMANTICS = new Set([
+    'wildcard-transitive',
+    'wildcard-leaf',
+    'explicit-reexport',
+]);
 /** Languages with whole-module import semantics (derived from providers at module load). */
 const WILDCARD_LANGUAGES = new Set(Object.values(providers)
-    .filter((p) => p.importSemantics === 'wildcard')
+    .filter((p) => WILDCARD_SEMANTICS.has(p.importSemantics))
     .map((p) => p.id));
 /** Languages that need binding synthesis before call resolution. */
 const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
@@ -51,6 +68,82 @@ export function isWildcardImportLanguage(lang) {
 export function needsSynthesis(lang) {
     return SYNTHESIS_LANGUAGES.has(lang);
 }
+// ── Strategy implementations ───────────────────────────────────────────────
+/**
+ * Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
+ *
+ * Textual-include languages chain symbols through files: if `dict.c` includes
+ * `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
+ * all three files. This helper walks the include graph (combining both the
+ * ingestion-context `importMap` and the graph-level IMPORTS edges) until the
+ * closure is stable.
+ *
+ * **Order matters.** The returned `Set` preserves iteration order (insertion
+ * order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
+ * on a first-seen-wins basis, so this closure's ordering determines which
+ * declaration wins when multiple headers export the same name (e.g. overloaded
+ * free functions like `write_audit()` vs `write_audit(const char*)` in
+ * different headers). We therefore:
+ *   1. Seed the closure with direct imports in declaration order (matches the
+ *      order of `#include` directives in the source file).
+ *   2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
+ *      closer headers are seen before deeper ones.
+ *
+ * Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
+ * header includes, which are valid C/C++ when paired with `#pragma once` or
+ * include guards.
+ *
+ * Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
+ * prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
+ * where one translation unit can transitively reach tens of thousands of
+ * headers. Partial closures still yield useful bindings for the cluster of
+ * headers closest to the importer, which is what overload resolution and
+ * cross-file call resolution care about.
+ *
+ * Queue implementation: uses a head-index over a growing array (O(1) dequeue)
+ * instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
+ */
+export function expandTransitiveIncludeClosure(directImports, importMap, graphImports) {
+    const closure = new Set();
+    const queue = [];
+    let head = 0; // O(1) dequeue: advance the head index instead of shift()-ing.
+    const tryEnqueue = (file) => {
+        if (closure.has(file))
+            return true;
+        if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
+            return false;
+        closure.add(file);
+        queue.push(file);
+        return true;
+    };
+    // Seed direct imports in declaration order (see JSDoc on order-sensitivity).
+    for (const f of directImports) {
+        if (!tryEnqueue(f))
+            break;
+    }
+    // True BFS for transitive reach: head-index FIFO preserves the "closer
+    // headers first" ordering that overload resolution depends on.
+    while (head < queue.length) {
+        if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
+            break;
+        const file = queue[head++];
+        const nested = importMap.get(file);
+        if (nested) {
+            for (const n of nested) {
+                if (!tryEnqueue(n))
+                    break;
+            }
+        }
+        const nestedGraph = graphImports.get(file);
+        if (nestedGraph) {
+            for (const n of nestedGraph) {
+                if (!tryEnqueue(n))
+                    break;
+            }
+        }
+    }
+    return closure;
+}
 // ── Main synthesis function ────────────────────────────────────────────────
 /**
  * Synthesize namedImportMap entries for languages with whole-module imports.
@@ -133,16 +226,61 @@ export function synthesizeWildcardImportBindings(graph, ctx) {
             }
         }
     };
-    // Synthesize from ctx.importMap (Ruby, C/C++, Swift file-based imports)
+    /**
+     * Dispatch wildcard synthesis by the file's language provider strategy.
+     *
+     * Strategy tags (see `ImportSemantics`):
+     *   - `wildcard-transitive`: expand the include closure first (C/C++ #include
+     *     chains — e.g. `dict.c` → `server.h` → `dict.h` so `dictFind` resolves
+     *     across header chains)
+     *   - `wildcard-leaf`: synthesize from direct imports only (Go, Ruby, Swift, Dart)
+     *   - `explicit-reexport`: scaffold tag; falls through to leaf behavior.
+     *     TODO(#821): implement re-export DAG walk for TS `export *` / Rust
+     *     `pub use`. The leaf fallthrough preserves today's TS/Rust behavior
+     *     (their direct imports still synthesize correctly); only the extra
+     *     re-export DAG walk for barrel-file correctness is missing.
+     *   - `namespace` / `named`: no-op here (namespace handled in Loop 3 below,
+     *     named needs no synthesis).
+     *
+     * Used by both Loop 1 (ctx.importMap) and Loop 2 (graphImports) so a future
+     * transitive-import language whose edges arrive via graphImports gets closure
+     * expansion consistently regardless of edge source.
+     */
+    const dispatchSynthesis = (filePath, importedFiles, provider) => {
+        switch (provider.importSemantics) {
+            case 'wildcard-transitive':
+                synthesizeForFile(filePath, expandTransitiveIncludeClosure(importedFiles, ctx.importMap, graphImports));
+                return;
+            case 'wildcard-leaf':
+            case 'explicit-reexport':
+                synthesizeForFile(filePath, importedFiles);
+                return;
+            case 'namespace':
+            case 'named':
+                return;
+            default: {
+                const _exhaustive = provider.importSemantics;
+                void _exhaustive;
+            }
+        }
+    };
+    // Loop 1: synthesize from ctx.importMap (Ruby, C/C++, Swift, Dart file-based imports).
     for (const [filePath, importedFiles] of ctx.importMap) {
         const lang = getLanguageFromFilename(filePath);
         if (!lang || !isWildcardImportLanguage(lang))
             continue;
-        synthesizeForFile(filePath, importedFiles);
+        const provider = getProviderForFile(filePath);
+        if (!provider)
+            continue;
+        dispatchSynthesis(filePath, importedFiles, provider);
     }
-    // Synthesize from graph IMPORTS edges (Go and other wildcard-import languages)
+    // Loop 2: synthesize from graph IMPORTS edges (Go and other wildcard-import
+    // languages whose edges live in the graph rather than ctx.importMap).
     for (const [filePath, importedFiles] of graphImports) {
-        synthesizeForFile(filePath, importedFiles);
+        const provider = getProviderForFile(filePath);
+        if (!provider)
+            continue;
+        dispatchSynthesis(filePath, importedFiles, provider);
     }
     // Build Python module-alias maps for namespace-import languages.
     // `import models` in app.py → moduleAliasMap['app.py']['models'] = 'models.py'

package/dist/core/lbug/csv-generator.js CHANGED Viewed

@@ -246,14 +246,17 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
         Interface: interfaceWriter,
         CodeElement: codeElemWriter,
     };
-    const seenFileIds = new Set();
+    // Deduplicate all node types — the pipeline can produce duplicate IDs across
+    // all symbol types (Class, Method, Function, etc.), not just File nodes.
+    // A single Set covering every label prevents PK violations on COPY.
+    const seenNodeIds = new Set();
     // --- SINGLE PASS over all nodes ---
     for (const node of graph.iterNodes()) {
+        if (seenNodeIds.has(node.id))
+            continue;
+        seenNodeIds.add(node.id);
         switch (node.label) {
             case 'File': {
-                if (seenFileIds.has(node.id))
-                    break;
-                seenFileIds.add(node.id);
                 const content = await extractContent(node, contentCache);
                 await fileWriter.addRow([
                     escapeCSVField(node.id),

package/dist/core/lbug/lbug-adapter.d.ts CHANGED Viewed

@@ -1,5 +1,33 @@
 import lbug from '@ladybugdb/core';
 import { KnowledgeGraph } from '../graph/types.js';
+/** Factory for creating WriteStreams — injectable for testing. */
+export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
+/** Result of splitting the relationship CSV into per-label-pair files. */
+export interface RelCsvSplitResult {
+    relHeader: string;
+    relsByPairMeta: Map<string, {
+        csvPath: string;
+        rows: number;
+    }>;
+    pairWriteStreams: Map<string, import('fs').WriteStream>;
+    skippedRels: number;
+    totalValidRels: number;
+}
+/**
+ * Split a relationship CSV into per-label-pair files on disk.
+ *
+ * Streams the CSV line-by-line, routing each relationship to a file named
+ * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
+ * drain listener per stream at a time, and readline resumes only when ALL
+ * backpressured streams have drained.
+ *
+ * @param csvPath       Path to the combined relationship CSV
+ * @param csvDir        Directory to write per-pair CSV files
+ * @param validTables   Set of valid node table names
+ * @param getNodeLabel  Function to extract the label from a node ID
+ * @param wsFactory     Optional WriteStream factory (defaults to fs.createWriteStream)
+ */
+export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
 /** Expose the current Database for pool adapter reuse in tests. */
 export declare const getDatabase: () => lbug.Database | null;
 /**

package/dist/core/lbug/lbug-adapter.js CHANGED Viewed

@@ -5,6 +5,122 @@ import path from 'path';
 import lbug from '@ladybugdb/core';
 import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, } from './schema.js';
 import { streamAllCSVsToDisk } from './csv-generator.js';
+/**
+ * Split a relationship CSV into per-label-pair files on disk.
+ *
+ * Streams the CSV line-by-line, routing each relationship to a file named
+ * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
+ * drain listener per stream at a time, and readline resumes only when ALL
+ * backpressured streams have drained.
+ *
+ * @param csvPath       Path to the combined relationship CSV
+ * @param csvDir        Directory to write per-pair CSV files
+ * @param validTables   Set of valid node table names
+ * @param getNodeLabel  Function to extract the label from a node ID
+ * @param wsFactory     Optional WriteStream factory (defaults to fs.createWriteStream)
+ */
+export const splitRelCsvByLabelPair = async (csvPath, csvDir, validTables, getNodeLabel, wsFactory = (p) => createWriteStream(p, 'utf-8')) => {
+    let relHeader = '';
+    const relsByPairMeta = new Map();
+    const pairWriteStreams = new Map();
+    let skippedRels = 0;
+    let totalValidRels = 0;
+    await new Promise((resolve, reject) => {
+        const inputStream = createReadStream(csvPath, 'utf-8');
+        const rl = createInterface({
+            input: inputStream,
+            crlfDelay: Infinity,
+        });
+        // Track which streams are already waiting for drain to prevent
+        // listener accumulation. rl.pause() is not synchronous — buffered
+        // line events continue firing after pause(), and without this guard
+        // each line targeting the same pairKey would add another drain listener.
+        const waitingForDrain = new Set();
+        let settled = false;
+        const cleanup = (err) => {
+            if (settled)
+                return;
+            settled = true;
+            try {
+                rl.close();
+            }
+            catch { }
+            try {
+                inputStream.destroy();
+            }
+            catch { }
+            for (const ws of pairWriteStreams.values()) {
+                try {
+                    ws.destroy();
+                }
+                catch { }
+            }
+            reject(err);
+        };
+        let isFirst = true;
+        rl.on('line', (line) => {
+            if (isFirst) {
+                relHeader = line;
+                isFirst = false;
+                return;
+            }
+            if (!line.trim())
+                return;
+            const match = line.match(/"([^"]*)","([^"]*)"/);
+            if (!match) {
+                skippedRels++;
+                return;
+            }
+            const fromLabel = getNodeLabel(match[1]);
+            const toLabel = getNodeLabel(match[2]);
+            if (!validTables.has(fromLabel) || !validTables.has(toLabel)) {
+                skippedRels++;
+                return;
+            }
+            const pairKey = `${fromLabel}|${toLabel}`;
+            let ws = pairWriteStreams.get(pairKey);
+            if (!ws) {
+                const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`);
+                ws = wsFactory(pairCsvPath);
+                // If any per-pair WriteStream errors (disk full, EMFILE, etc.),
+                // tear down everything and reject the Promise. Without this handler,
+                // a stream error while rl is paused waiting for drain would cause
+                // the drain callback to never fire and the Promise to hang forever.
+                ws.on('error', cleanup);
+                ws.write(relHeader + '\n');
+                pairWriteStreams.set(pairKey, ws);
+                relsByPairMeta.set(pairKey, { csvPath: pairCsvPath, rows: 0 });
+            }
+            const ok = ws.write(line + '\n');
+            relsByPairMeta.get(pairKey).rows++;
+            totalValidRels++;
+            // Handle backpressure: pause reading when the write buffer is full,
+            // resume when the stream drains. Prevents unbounded memory growth
+            // on repos with millions of relationships.
+            // Guard with waitingForDrain to ensure only one drain listener is
+            // registered per stream at a time — rl.pause() doesn't stop buffered
+            // line events immediately. Only resume when ALL streams have drained
+            // to avoid writing into still-full streams.
+            if (!ok && !waitingForDrain.has(pairKey)) {
+                waitingForDrain.add(pairKey);
+                rl.pause();
+                ws.once('drain', () => {
+                    waitingForDrain.delete(pairKey);
+                    if (waitingForDrain.size === 0)
+                        rl.resume();
+                });
+            }
+        });
+        rl.on('close', () => {
+            if (!settled) {
+                settled = true;
+                resolve();
+            }
+        });
+        rl.on('error', cleanup);
+    });
+    return { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels };
+};
 let db = null;
 let conn = null;
 let currentDbPath = null;
@@ -215,69 +331,16 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
         }
     }
     // Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it)
-    // Stream-read the relation CSV line by line and write directly to per-pair
-    // temp files on disk. This avoids accumulating potentially millions of CSV
-    // lines in memory which could exceed V8 Map or array limits on large repos.
-    let relHeader = '';
-    const relsByPairMeta = new Map();
-    const pairWriteStreams = new Map();
-    let skippedRels = 0;
-    let totalValidRels = 0;
-    await new Promise((resolve, reject) => {
-        const rl = createInterface({
-            input: createReadStream(csvResult.relCsvPath, 'utf-8'),
-            crlfDelay: Infinity,
-        });
-        let isFirst = true;
-        rl.on('line', (line) => {
-            if (isFirst) {
-                relHeader = line;
-                isFirst = false;
-                return;
-            }
-            if (!line.trim())
-                return;
-            const match = line.match(/"([^"]*)","([^"]*)"/);
-            if (!match) {
-                skippedRels++;
-                return;
-            }
-            const fromLabel = getNodeLabel(match[1]);
-            const toLabel = getNodeLabel(match[2]);
-            if (!validTables.has(fromLabel) || !validTables.has(toLabel)) {
-                skippedRels++;
-                return;
-            }
-            const pairKey = `${fromLabel}|${toLabel}`;
-            let ws = pairWriteStreams.get(pairKey);
-            if (!ws) {
-                const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`);
-                ws = createWriteStream(pairCsvPath, 'utf-8');
-                ws.write(relHeader + '\n');
-                pairWriteStreams.set(pairKey, ws);
-                relsByPairMeta.set(pairKey, { csvPath: pairCsvPath, rows: 0 });
-            }
-            const ok = ws.write(line + '\n');
-            relsByPairMeta.get(pairKey).rows++;
-            totalValidRels++;
-            // Handle backpressure: pause reading when the write buffer is full,
-            // resume when the stream drains. Prevents unbounded memory growth
-            // on repos with millions of relationships.
-            if (!ok) {
-                rl.pause();
-                ws.once('drain', () => rl.resume());
-            }
-        });
-        rl.on('close', resolve);
-        rl.on('error', (err) => {
-            // Destroy all open write streams to avoid resource leaks
-            for (const ws of pairWriteStreams.values())
-                ws.destroy();
-            reject(err);
-        });
-    });
+    const { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels } = await splitRelCsvByLabelPair(csvResult.relCsvPath, csvDir, validTables, getNodeLabel);
     // Close all per-pair write streams before COPY
-    await Promise.all(Array.from(pairWriteStreams.values()).map((ws) => new Promise((resolve, reject) => ws.end((err) => (err ? reject(err) : resolve())))));
+    await Promise.all(Array.from(pairWriteStreams.values()).map((ws) => new Promise((resolve, reject) => {
+        const onError = (err) => reject(err);
+        ws.on('error', onError);
+        ws.end(() => {
+            ws.removeListener('error', onError);
+            resolve();
+        });
+    })));
     const insertedRels = totalValidRels;
     const warnings = [];
     if (insertedRels > 0) {

package/dist/core/run-analyze.js CHANGED Viewed

@@ -149,7 +149,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
                     const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH);
                     const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding }));
                     try {
-                        await executeWithReusedStatement(`CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, paramsList);
+                        await executeWithReusedStatement(`MERGE (e:CodeEmbedding {nodeId: $nodeId}) SET e.embedding = $embedding`, paramsList);
                     }
                     catch {
                         /* some may fail if node was removed, that's fine */

package/dist/server/api.js CHANGED Viewed

@@ -1277,6 +1277,26 @@ export const createServer = async (port, host = '127.0.0.1') => {
                     const lbugPath = path.join(entry.storagePath, 'lbug');
                     await withLbugDb(lbugPath, async () => {
                         const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js');
+                        // Skip nodes that already have embeddings — Kuzu forbids SET on vector-indexed properties.
+                        let skipNodeIds;
+                        try {
+                            const rows = await executeQuery('MATCH (e:CodeEmbedding) RETURN e.nodeId AS nodeId');
+                            if (rows && rows.length > 0) {
+                                skipNodeIds = new Set(rows.map((r) => r.nodeId ?? r[0]).filter(Boolean));
+                                console.log(`[embed] ${skipNodeIds.size} nodes already embedded — skipping in incremental run`);
+                            }
+                        }
+                        catch (err) {
+                            // Swallow only "table does not exist" — let real connection errors propagate.
+                            // Log so ops can see this path fire if Kuzu ever changes error wording.
+                            const msg = err?.message ?? '';
+                            if (msg.includes('does not exist') || msg.includes('not found')) {
+                                console.log(`[embed] CodeEmbedding table not yet present — full embedding run (${msg})`);
+                            }
+                            else {
+                                throw err;
+                            }
+                        }
                         await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, (p) => {
                             embedJobManager.updateJob(job.id, {
                                 progress: {
@@ -1293,7 +1313,8 @@ export const createServer = async (port, host = '127.0.0.1') => {
                                                     : `${p.phase} (${p.percent}%)`,
                                 },
                             });
-                        });
+                        }, {}, // config: use defaults (runEmbeddingPipeline signature: executeQuery, executeWithReusedStatement, onProgress, config, skipNodeIds)
+                        skipNodeIds);
                     });
                     clearTimeout(embedTimeout);
                     releaseRepoLock(repoLockPath);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gitnexus",
-  "version": "1.6.1",
+  "version": "1.6.2-rc.2",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",
@@ -84,7 +84,7 @@
     "uuid": "^13.0.0"
   },
   "optionalDependencies": {
-    "tree-sitter-dart": "https://github.com/UserNobody14/tree-sitter-dart/archive/80e23c07b64494f7e21090bb3450223ef0b192f4.tar.gz",
+    "tree-sitter-dart": "git+https://github.com/UserNobody14/tree-sitter-dart.git#80e23c07b64494f7e21090bb3450223ef0b192f4",
     "tree-sitter-kotlin": "^0.3.8",
     "tree-sitter-proto": "file:./vendor/tree-sitter-proto",
     "tree-sitter-swift": "^0.6.0"