npm - gitnexus - Versions diffs - 1.6.1 → 1.6.2-rc.10 - Mend

gitnexus 1.6.1 → 1.6.2-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +73 -0
package/dist/cli/analyze.js +23 -1
package/dist/core/embeddings/embedder.js +5 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +12 -3
package/dist/core/embeddings/embedding-pipeline.js +79 -29
package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
package/dist/core/group/extractors/grpc-extractor.js +28 -13
package/dist/core/group/extractors/http-route-extractor.js +35 -5
package/dist/core/group/extractors/manifest-extractor.js +66 -9
package/dist/core/group/sync.js +49 -1
package/dist/core/ingestion/language-provider.d.ts +24 -5
package/dist/core/ingestion/languages/c-cpp.js +2 -2
package/dist/core/ingestion/languages/dart.d.ts +1 -1
package/dist/core/ingestion/languages/dart.js +2 -2
package/dist/core/ingestion/languages/go.d.ts +1 -1
package/dist/core/ingestion/languages/go.js +2 -2
package/dist/core/ingestion/languages/ruby.js +1 -1
package/dist/core/ingestion/languages/swift.d.ts +1 -1
package/dist/core/ingestion/languages/swift.js +2 -2
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
package/dist/core/lbug/csv-generator.js +7 -4
package/dist/core/lbug/lbug-adapter.d.ts +38 -0
package/dist/core/lbug/lbug-adapter.js +189 -65
package/dist/core/lbug/schema.d.ts +7 -0
package/dist/core/lbug/schema.js +9 -1
package/dist/core/run-analyze.js +18 -4
package/dist/mcp/core/embedder.js +5 -0
package/dist/server/api.js +9 -1
package/package.json +6 -4
package/scripts/build-tree-sitter-proto.cjs +82 -0
package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
package/vendor/tree-sitter-proto/package.json +1 -7

package/README.md CHANGED Viewed

@@ -234,6 +234,79 @@ Installed automatically by both `gitnexus analyze` (per-repo) and `gitnexus setu
 - Node.js >= 18
 - Git repository (uses git for commit tracking)
+## Release candidates
+Stable releases publish to the default `latest` dist-tag. When a pull request
+with non-documentation changes merges into `main`, an automated workflow also
+publishes a prerelease build under the `rc` dist-tag, so early adopters can
+try in-flight fixes without waiting for the next stable cut. (Docs-only
+merges are skipped.)
+```bash
+# Try the latest release candidate (pre-stable — may change at any time)
+npm install -g gitnexus@rc
+# — or —
+npx gitnexus@rc analyze
+```
+Release-candidate versions follow the standard semver prerelease format
+`X.Y.Z-rc.N`, where `X.Y.Z` is the next stable target (bumped from the
+current `latest` by patch by default; `minor` or `major` when kicking off a
+bigger cycle) and `N` increments per published rc. Example sequence:
+`1.6.2-rc.1`, `1.6.2-rc.2`, …, then once `1.6.2` ships stable,
+`1.6.3-rc.1`. See the [Releases page](https://github.com/abhigyanpatwari/GitNexus/releases)
+for the full list; stable `latest` is unaffected.
+## Troubleshooting
+### `Cannot destructure property 'package' of 'node.target' as it is null`
+This crash was caused by a dependency URL format that is incompatible with
+certain npm/arborist versions ([npm/cli#8126](https://github.com/npm/cli/issues/8126)).
+It is fixed in **gitnexus v1.6.2+**. Upgrade to the latest version:
+```bash
+npx gitnexus@latest analyze          # always uses the newest release
+# — or —
+npm install -g gitnexus@latest       # upgrade a global install
+```
+If you still hit npm install issues after upgrading, these generic workarounds
+may help:
+```bash
+npm install -g npm@latest            # update npm itself
+npm cache clean --force              # clear a possibly corrupt cache
+```
+### Installation fails with native module errors
+Some optional language grammars (Dart, Kotlin, Swift) require native compilation. If they fail, GitNexus still works — those languages will be skipped.
+If `npm install -g gitnexus` fails on native modules:
+```bash
+# Ensure build tools are available (Linux/macOS)
+# Ubuntu/Debian: sudo apt install python3 make g++
+# macOS: xcode-select --install
+# Retry installation
+npm install -g gitnexus
+```
+### Analysis runs out of memory
+For very large repositories:
+```bash
+# Increase Node.js heap size
+NODE_OPTIONS="--max-old-space-size=16384" npx gitnexus analyze
+# Exclude large directories
+echo "vendor/" >> .gitnexusignore
+echo "dist/" >> .gitnexusignore
+```
 ## Privacy
 - All processing happens locally on your machine

package/dist/cli/analyze.js CHANGED Viewed

@@ -232,7 +232,7 @@ export const analyzeCommand = async (inputPath, options) => {
         bar.stop();
         const msg = err.message || String(err);
         console.error(`\n  Analysis failed: ${msg}\n`);
-        // Provide helpful guidance for known large-repo failure modes
+        // Provide helpful guidance for known failure modes
         if (msg.includes('Maximum call stack size exceeded') ||
             msg.includes('call stack') ||
             msg.includes('Map maximum size') ||
@@ -248,6 +248,28 @@ export const analyzeCommand = async (inputPath, options) => {
             console.error('    3. Increase stack size: NODE_OPTIONS="--stack-size=4096"');
             console.error('');
         }
+        else if (msg.includes('ERESOLVE') || msg.includes('Could not resolve dependency')) {
+            // Note: the original arborist "Cannot destructure property 'package' of
+            // 'node.target'" crash happens inside npm *before* gitnexus code runs,
+            // so it can't be caught here.  This branch handles dependency-resolution
+            // errors that surface at runtime (e.g. dynamic require failures).
+            console.error('  This looks like an npm dependency resolution issue.');
+            console.error('  Suggestions:');
+            console.error('    1. Clear the npm cache:    npm cache clean --force');
+            console.error('    2. Update npm:             npm install -g npm@latest');
+            console.error('    3. Reinstall gitnexus:     npm install -g gitnexus@latest');
+            console.error('    4. Or try npx directly:    npx gitnexus@latest analyze');
+            console.error('');
+        }
+        else if (msg.includes('MODULE_NOT_FOUND') ||
+            msg.includes('Cannot find module') ||
+            msg.includes('ERR_MODULE_NOT_FOUND')) {
+            console.error('  A required module could not be loaded. The installation may be corrupt.');
+            console.error('  Suggestions:');
+            console.error('    1. Reinstall:   npm install -g gitnexus@latest');
+            console.error('    2. Clear cache: npm cache clean --force && npx gitnexus@latest analyze');
+            console.error('');
+        }
         process.exitCode = 1;
         return;
     }

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -131,6 +131,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
         try {
             // Configure transformers.js environment
             env.allowLocalModels = false;
+            // Default cache to user-writable location. transformers.js defaults to
+            // ./node_modules/.cache inside its own install dir, which is unwritable
+            // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
+            // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
+            env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
             const isDev = process.env.NODE_ENV === 'development';
             if (isDev) {
                 console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);

package/dist/core/embeddings/embedding-pipeline.d.ts CHANGED Viewed

@@ -8,7 +8,14 @@
  * 4. Update LadybugDB with embeddings
  * 5. Create vector index for semantic search
  */
-import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
+import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult } from './types.js';
+/**
+ * Compute a stable content fingerprint for an embeddable node.
+ * Used to detect when the underlying text has changed so stale vectors
+ * can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
+ * vector-indexed rows).
+ */
+export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
 /**
  * Progress callback type
  */
@@ -20,9 +27,11 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
- * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
+ * @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
+ *        Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
+ *        and re-embedded; nodes not in the map are embedded fresh.
  */
-export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, existingEmbeddings?: Map<string, string>) => Promise<void>;
 /**
  * Perform semantic search using the vector index
  *

package/dist/core/embeddings/embedding-pipeline.js CHANGED Viewed

@@ -8,10 +8,23 @@
  * 4. Update LadybugDB with embeddings
  * 5. Create vector index for semantic search
  */
+import { createHash } from 'crypto';
 import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady, } from './embedder.js';
-import { generateBatchEmbeddingTexts } from './text-generator.js';
+import { generateEmbeddingText, generateBatchEmbeddingTexts } from './text-generator.js';
 import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
+import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, } from '../lbug/schema.js';
+import { loadVectorExtension } from '../lbug/lbug-adapter.js';
 const isDev = process.env.NODE_ENV === 'development';
+/**
+ * Compute a stable content fingerprint for an embeddable node.
+ * Used to detect when the underlying text has changed so stale vectors
+ * can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
+ * vector-indexed rows).
+ */
+export const contentHashForNode = (node, config = {}) => {
+    const text = generateEmbeddingText(node, config);
+    return createHash('sha1').update(text).digest('hex');
+};
 /**
  * Query all embeddable nodes from LadybugDB
  * Uses table-specific queries (File has different schema than code elements)
@@ -67,34 +80,26 @@ const queryEmbeddableNodes = async (executeQuery) => {
  * that occurs when UPDATEing nodes with large content fields
  */
 const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
-    // INSERT into separate embedding table - much more memory efficient!
-    const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
-    const paramsList = updates.map((u) => ({ nodeId: u.id, embedding: u.embedding }));
+    // MERGE instead of CREATE — idempotent, handles concurrent analyzes and partial prior runs
+    const cypher = `MERGE (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) SET e.embedding = $embedding, e.contentHash = $contentHash`;
+    const paramsList = updates.map((u) => ({
+        nodeId: u.id,
+        embedding: u.embedding,
+        contentHash: u.contentHash,
+    }));
     await executeWithReusedStatement(cypher, paramsList);
 };
 /**
  * Create the vector index for semantic search
- * Now indexes the separate CodeEmbedding table
+ * Now indexes the separate CodeEmbedding table.
+ * Delegates extension loading to lbug-adapter's loadVectorExtension(),
+ * which owns the VECTOR extension lifecycle and state tracking.
  */
-let vectorExtensionLoaded = false;
 const createVectorIndex = async (executeQuery) => {
-    // LadybugDB v0.15+ requires explicit VECTOR extension loading (once per session)
-    if (!vectorExtensionLoaded) {
-        try {
-            await executeQuery('INSTALL VECTOR');
-            await executeQuery('LOAD EXTENSION VECTOR');
-            vectorExtensionLoaded = true;
-        }
-        catch {
-            // Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not
-            vectorExtensionLoaded = true;
-        }
-    }
-    const cypher = `
-    CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
-  `;
+    // Delegate to the adapter which tracks loaded state and handles DB reconnect resets
+    await loadVectorExtension();
     try {
-        await executeQuery(cypher);
+        await executeQuery(CREATE_VECTOR_INDEX_QUERY);
     }
     catch (error) {
         // Index might already exist
@@ -110,9 +115,11 @@ const createVectorIndex = async (executeQuery) => {
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
- * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
+ * @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
+ *        Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
+ *        and re-embedded; nodes not in the map are embedded fresh.
  */
-export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
+export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, existingEmbeddings) => {
     const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
     try {
         // Phase 1: Load embedding model
@@ -141,12 +148,50 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
         }
         // Phase 2: Query embeddable nodes
         let nodes = await queryEmbeddableNodes(executeQuery);
-        // Incremental mode: filter out nodes that already have embeddings
-        if (skipNodeIds && skipNodeIds.size > 0) {
+        // Incremental mode: compare content hashes, delete stale rows, skip fresh ones.
+        // Computed hashes for stale nodes are cached so batchInsertEmbeddings can reuse them
+        // (avoids double computation).
+        const computedStaleHashes = new Map();
+        if (existingEmbeddings && existingEmbeddings.size > 0) {
             const beforeCount = nodes.length;
-            nodes = nodes.filter((n) => !skipNodeIds.has(n.id));
+            const staleNodeIds = [];
+            nodes = nodes.filter((n) => {
+                const existingHash = existingEmbeddings.get(n.id);
+                if (existingHash === undefined) {
+                    // New node — needs embedding
+                    return true;
+                }
+                const currentHash = contentHashForNode(n, finalConfig);
+                if (currentHash !== existingHash) {
+                    // Content changed — cache hash for reuse during insert, mark for DELETE + re-embed
+                    computedStaleHashes.set(n.id, currentHash);
+                    staleNodeIds.push(n.id);
+                    return true;
+                }
+                // Hash matches — skip (fresh); no need to cache hash for skipped nodes
+                return false;
+            });
+            // DELETE stale embedding rows so they can be re-inserted
+            // (Kuzu forbids SET on vector-indexed properties; DELETE-then-INSERT is the sanctioned pattern)
+            if (staleNodeIds.length > 0) {
+                if (isDev) {
+                    console.log(`🔄 Deleting ${staleNodeIds.length} stale embedding rows for re-embed`);
+                }
+                try {
+                    await executeWithReusedStatement(`MATCH (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) DELETE e`, staleNodeIds.map((nodeId) => ({ nodeId })));
+                }
+                catch (err) {
+                    // "does not exist" = rows already gone — safe to proceed.
+                    // All other errors risk vector-index corruption (Kuzu requires DELETE-before-INSERT
+                    // for vector-indexed properties) — propagate so the pipeline aborts cleanly.
+                    const msg = err instanceof Error ? err.message : String(err);
+                    if (!msg.includes('does not exist')) {
+                        throw new Error(`[embed] Failed to delete stale embedding rows — aborting to prevent vector-index corruption: ${msg}`);
+                    }
+                }
+            }
             if (isDev) {
-                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
+                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${existingEmbeddings.size} cached, ${staleNodeIds.length} stale, ${nodes.length} to embed`);
             }
         }
         const totalNodes = nodes.length;
@@ -154,6 +199,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             console.log(`📊 Found ${totalNodes} embeddable nodes`);
         }
         if (totalNodes === 0) {
+            // Ensure the vector index exists even when no new nodes need embedding.
+            // A prior crash or first-time incremental run may have left CodeEmbedding
+            // rows without ever reaching index creation.
+            await createVectorIndex(executeQuery);
             onProgress({
                 phase: 'ready',
                 percent: 100,
@@ -186,6 +235,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             const updates = batch.map((node, i) => ({
                 id: node.id,
                 embedding: embeddingToArray(embeddings[i]),
+                contentHash: computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig),
             }));
             await batchInsertEmbeddings(executeWithReusedStatement, updates);
             processedNodes += batch.length;
@@ -256,7 +306,7 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
     const queryVecStr = `[${queryVec.join(',')}]`;
     // Query the vector index on CodeEmbedding to get nodeIds and distances
     const vectorQuery = `
-    CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
+    CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
       CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${k})
     YIELD node AS emb, distance
     WITH emb, distance

package/dist/core/group/extractors/grpc-extractor.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export interface ProtoServiceInfo {
     protoPath: string;
 }
 export declare function buildProtoMap(repoPath: string): Promise<Map<string, ProtoServiceInfo[]>>;
-export declare function resolveProtoConflict(_serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
+export declare function resolveProtoConflict(serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
 export declare function serviceContractId(pkg: string, serviceName: string): string;
 export declare class GrpcExtractor implements ContractExtractor {
     type: "grpc";

package/dist/core/group/extractors/grpc-extractor.js CHANGED Viewed

@@ -263,23 +263,31 @@ export async function buildProtoMap(repoPath) {
     const { servicesByName } = await buildProtoContext(repoPath);
     return servicesByName;
 }
-export function resolveProtoConflict(_serviceName, sourceFilePath, candidates) {
+export function resolveProtoConflict(serviceName, sourceFilePath, candidates) {
     if (candidates.length === 0)
         return null;
     if (candidates.length === 1)
         return candidates[0];
     const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
-    let best = candidates[0];
-    let bestScore = -1;
-    for (const c of candidates) {
+    const scored = candidates.map((c) => {
         const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
-        const sharedRun = longestSharedSegmentRun(sourceDir, protoDir);
-        if (sharedRun > bestScore) {
-            bestScore = sharedRun;
-            best = c;
-        }
+        return { candidate: c, score: longestSharedSegmentRun(sourceDir, protoDir) };
+    });
+    let maxScore = -1;
+    for (const s of scored) {
+        if (s.score > maxScore)
+            maxScore = s.score;
     }
-    return best;
+    const winners = scored.filter((s) => s.score === maxScore);
+    // Path heuristic cannot uniquely identify a winner — refuse to guess.
+    // Ties (including all-zero ties) would otherwise silently merge unrelated
+    // services under a fabricated package-qualified contract id.
+    if (winners.length !== 1) {
+        const paths = candidates.map((c) => c.protoPath).join(', ');
+        console.warn(`[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`);
+        return null;
+    }
+    return winners[0].candidate;
 }
 export function serviceContractId(pkg, serviceName) {
     const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
@@ -339,7 +347,9 @@ export class GrpcExtractor {
                 continue;
             }
             for (const d of detections) {
-                out.push(this.detectionToContract(d, rel, protoMap));
+                const contract = this.detectionToContract(d, rel, protoMap);
+                if (contract)
+                    out.push(contract);
             }
         }
         return this.dedupe(out);
@@ -352,8 +362,13 @@ export class GrpcExtractor {
      * based on whether the proto map had an entry.
      */
     detectionToContract(d, filePath, protoMap) {
-        const candidates = protoMap.get(d.serviceName);
-        const proto = resolveProtoConflict(d.serviceName, filePath, candidates ?? []);
+        const candidates = protoMap.get(d.serviceName) ?? [];
+        const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
+        // If there were proto candidates but resolution was ambiguous, skip
+        // contract emission rather than fabricating a package-qualified id from
+        // an arbitrary candidate. resolveProtoConflict already warned.
+        if (candidates.length > 0 && proto === null)
+            return null;
         const pkg = proto?.package ?? '';
         const cid = d.methodName
             ? contractId(pkg, d.serviceName, d.methodName)

package/dist/core/group/extractors/http-route-extractor.js CHANGED Viewed

@@ -214,7 +214,29 @@ export class HttpRouteExtractor {
             const providerDetections = detections.filter((d) => d.role === 'provider');
             let handlerName = null;
             const normalizedRoute = normalizeHttpPath(routePath);
-            const match = providerDetections.find((d) => normalizeHttpPath(d.path) === normalizedRoute);
+            // Candidates share the same normalized path. When multiple
+            // detections at the same path exist (e.g. GET + POST /api/orders
+            // in one router), a blind `.find()` silently returned the first
+            // verb — attaching the wrong handler and, when method was not
+            // already pinned by the route reason, the wrong method too.
+            // Disambiguate by method when we know it; refuse to guess when
+            // we don't.
+            const candidates = providerDetections.filter((d) => normalizeHttpPath(d.path) === normalizedRoute);
+            let match;
+            const ambiguousCandidates = !method && candidates.length > 1;
+            if (method) {
+                match = candidates.find((d) => d.method === method);
+            }
+            else if (candidates.length === 1) {
+                match = candidates[0];
+            }
+            // else: multiple candidates + unknown method → leave match
+            // undefined so handlerName stays null and skip symbol
+            // enrichment below, keeping the file-basename fallback instead
+            // of letting pickSymbolUid silently pick the first Function /
+            // Method in the file (which reintroduces the mis-attribution
+            // we were trying to avoid). Method stays at the conservative
+            // 'GET' default set below.
             if (match) {
                 if (!method)
                     method = match.method;
@@ -228,7 +250,7 @@ export class HttpRouteExtractor {
             let symbolName = path.basename(filePath) || 'handler';
             let symPath = filePath;
             const fileId = row.fileId ?? row[0];
-            if (fileId) {
+            if (fileId && !ambiguousCandidates) {
                 try {
                     const syms = await db(CONTAINS_QUERY, { fileId });
                     if (syms.length > 0) {
@@ -308,9 +330,17 @@ export class HttpRouteExtractor {
             // Prefer the plugin's detected method if we can find a matching
             // fetch/axios call in the same file.
             const detections = filePath ? getDetections(filePath) : [];
-            const inferred = detections.find((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
-            if (inferred)
-                method = inferred.method;
+            // Symmetric to the provider path: if multiple consumer calls in
+            // the same file share the same normalized path (e.g. a GET
+            // fetch AND a POST fetch to `/api/orders`), `.find()` silently
+            // picked the first verb and keyed the contract id on the wrong
+            // method. With no upstream method signal here, refuse to guess
+            // when candidates are ambiguous — leave `method` at its
+            // conservative 'GET' default.
+            const consumerCandidates = detections.filter((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
+            if (consumerCandidates.length === 1) {
+                method = consumerCandidates[0].method;
+            }
             const cid = contractIdFor(method, pathNorm);
             let symbolUid = '';
             let symbolName = 'fetch';

package/dist/core/group/extractors/manifest-extractor.js CHANGED Viewed

@@ -16,6 +16,34 @@ function normalizeRoutePath(raw) {
         return '/';
     return collapsed.replace(/\/+$/, '');
 }
+/**
+ * Split a manifest HTTP contract into its optional `METHOD::` prefix and
+ * its path portion.
+ *
+ * `buildContractId` recommends the explicit-method form `GET::/api/orders`
+ * in group.yaml; if we hand that raw string to `normalizeRoutePath` we get
+ * `/GET::/api/orders`, which can never match `Route.name = "/api/orders"`
+ * in the graph. This helper extracts the path portion so the Cypher
+ * lookup uses the canonical route name.
+ *
+ * The method prefix regex mirrors `buildContractId` (line ~251) for
+ * symmetry: case-insensitive `[A-Za-z]+` followed by `::`. The captured
+ * method is upper-cased for downstream use; method-constrained matching
+ * against `HANDLES_ROUTE` is a future enhancement (not yet wired).
+ *
+ * Edge cases:
+ *  - `"::/api/orders"` — empty method portion, no alpha prefix match, so
+ *    the whole string is treated as a bare path (matches buildContractId
+ *    which also requires `[A-Za-z]+`).
+ *  - `"GET::"` — method with empty path, returns `{ method: 'GET', path: '' }`;
+ *    `normalizeRoutePath('')` resolves to `/` for caller.
+ */
+function parseHttpContract(raw) {
+    const match = raw.match(/^([A-Za-z]+)::/);
+    if (!match)
+        return { method: null, path: raw };
+    return { method: match[1].toUpperCase(), path: raw.slice(match[0].length) };
+}
 /**
  * Stable synthetic symbolUid for a manifest-declared contract whose target
  * symbol could not be resolved against the per-repo graph (resolveSymbol
@@ -40,14 +68,29 @@ export function manifestSymbolUid(repo, contractId) {
 }
 export class ManifestExtractor {
     async extractFromManifest(links, dbExecutors) {
-        const contracts = [];
-        const crossLinks = [];
-        for (const link of links) {
+        const resolveCache = new Map();
+        const resolveOnce = (repo, link) => {
+            const key = `${repo}\u0000${link.type}\u0000${link.contract}`;
+            let pending = resolveCache.get(key);
+            if (!pending) {
+                pending = this.resolveSymbol(repo, link, dbExecutors);
+                resolveCache.set(key, pending);
+            }
+            return pending;
+        };
+        const perLink = await Promise.all(links.map(async (link) => {
             const contractId = this.buildContractId(link.type, link.contract);
             const providerRepo = link.role === 'provider' ? link.from : link.to;
             const consumerRepo = link.role === 'provider' ? link.to : link.from;
-            const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors);
-            const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors);
+            const [providerSymbol, consumerSymbol] = await Promise.all([
+                resolveOnce(providerRepo, link),
+                resolveOnce(consumerRepo, link),
+            ]);
+            return { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol };
+        }));
+        const contracts = [];
+        const crossLinks = [];
+        for (const { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol, } of perLink) {
             const providerRef = providerSymbol || { filePath: '', name: link.contract };
             const consumerRef = consumerSymbol || { filePath: '', name: link.contract };
             // When the resolver finds a real graph symbol we keep its uid, otherwise
@@ -111,7 +154,15 @@ export class ManifestExtractor {
                 // core/ingestion/pipeline.ts ensureSlash + generateId('Route', ...)).
                 // Normalize the manifest contract the same way so a user-written
                 // "/api/orders" matches "api/orders" in the graph.
-                const normalized = normalizeRoutePath(link.contract);
+                //
+                // The contract may also use the explicit-method form "GET::/api/orders"
+                // recommended by buildContractId. Strip the METHOD:: prefix before
+                // normalizing — otherwise `normalizeRoutePath('GET::/api/orders')`
+                // returns `/GET::/api/orders` and never matches Route.name. The
+                // captured method is not yet used to constrain the Cypher query
+                // (method-aware HANDLES_ROUTE matching is a future enhancement).
+                const parsed = parseHttpContract(link.contract);
+                const normalized = normalizeRoutePath(parsed.path);
                 rows = await executor(`MATCH (handler)-[r:CodeRelation {type: 'HANDLES_ROUTE'}]->(route:Route)
            WHERE route.name = $normalized
            RETURN handler.id AS uid, handler.name AS name, handler.filePath AS filePath
@@ -214,9 +265,15 @@ export class ManifestExtractor {
     buildContractId(type, contract) {
         switch (type) {
             case 'http': {
-                if (/^[A-Za-z]+::/.test(contract))
-                    return `http::${contract}`;
-                return `http::*::${contract}`;
+                // Canonicalize method casing and path separators so logically
+                // equivalent inputs (`get::/api/orders` vs `GET::/api/orders`,
+                // or trailing-slash variants) produce the same contractId and
+                // matching `manifestSymbolUid` fallback. Without this, raw
+                // user casing leaks into cross-impact join keys and fragments
+                // matches across repos.
+                const { method, path: rawPath } = parseHttpContract(contract);
+                const normalizedPath = normalizeRoutePath(rawPath);
+                return method ? `http::${method}::${normalizedPath}` : `http::*::${normalizedPath}`;
             }
             case 'grpc':
                 return `grpc::${contract}`;

package/dist/core/group/sync.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { readRegistry } from '../../storage/repo-manager.js';
 import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
 import { GrpcExtractor } from './extractors/grpc-extractor.js';
 import { TopicExtractor } from './extractors/topic-extractor.js';
+import { ManifestExtractor } from './extractors/manifest-extractor.js';
 import { runExactMatch } from './matching.js';
 import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
 import { writeContractRegistry } from './storage.js';
@@ -34,10 +35,28 @@ function defaultResolveHandle(allEntries) {
         };
     };
 }
+/**
+ * Dedupe cross-links that point from the same consumer endpoint to the same
+ * provider endpoint for the same contract. Preserves first-seen order so the
+ * caller controls precedence (e.g., pass manifest links first).
+ */
+function dedupeCrossLinks(links) {
+    const seen = new Set();
+    const out = [];
+    for (const link of links) {
+        const key = `${link.from.repo}::${link.from.symbolUid}|${link.to.repo}::${link.to.symbolUid}|${link.type}|${link.contractId}`;
+        if (seen.has(key))
+            continue;
+        seen.add(key);
+        out.push(link);
+    }
+    return out;
+}
 export async function syncGroup(config, opts) {
     const missingRepos = [];
     const repoSnapshots = {};
     let autoContracts = [];
+    let manifestCrossLinks = [];
     let dbExecutors;
     const eo = opts?.extractorOverride;
     if (eo && eo.length === 0) {
@@ -124,8 +143,37 @@ export async function syncGroup(config, opts) {
             }
         }
     }
+    // Process manifest links declared in group.yaml.
+    // ManifestExtractor is fully implemented but was never wired into this
+    // pipeline — config.links were parsed and validated but silently dropped.
+    // Placed after the DB try/finally: resolveSymbol falls back to synthetic
+    // UIDs when dbExecutors is undefined or a pool is closed, so cross-links
+    // are always generated regardless of whether real DB executors are available.
+    if (config.links.length > 0) {
+        // Warn about dangling links that reference repos not declared in config.repos.
+        // They still generate cross-links via synthetic UIDs (determinism is preserved),
+        // but the operator probably meant something that now silently does nothing useful.
+        const knownRepos = new Set(Object.keys(config.repos));
+        for (const link of config.links) {
+            const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r));
+            if (dangling.length > 0) {
+                console.warn(`[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`);
+            }
+        }
+        const manifestEx = new ManifestExtractor();
+        const manifestResult = await manifestEx.extractFromManifest(config.links, dbExecutors);
+        autoContracts.push(...manifestResult.contracts);
+        manifestCrossLinks = manifestResult.crossLinks;
+        if (opts?.verbose) {
+            console.log(`  manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`);
+        }
+    }
     const { matched, unmatched } = runExactMatch(autoContracts);
-    const crossLinks = matched;
+    // Dedupe cross-links. Manifest contracts participate in runExactMatch, so a
+    // manifest-declared link can also emit a matchType:'exact' CrossLink with the
+    // same endpoints. Prefer the manifest version — it reflects operator intent
+    // and carries matchType:'manifest' which downstream consumers may rely on.
+    const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched]);
     const allContracts = autoContracts;
     const registry = {
         version: 1,