gitnexus 1.6.1 → 1.6.2-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +73 -0
  2. package/dist/cli/analyze.js +23 -1
  3. package/dist/core/embeddings/embedder.js +5 -0
  4. package/dist/core/embeddings/embedding-pipeline.d.ts +12 -3
  5. package/dist/core/embeddings/embedding-pipeline.js +79 -29
  6. package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
  7. package/dist/core/group/extractors/grpc-extractor.js +28 -13
  8. package/dist/core/group/extractors/http-route-extractor.js +35 -5
  9. package/dist/core/group/extractors/manifest-extractor.js +66 -9
  10. package/dist/core/group/sync.js +49 -1
  11. package/dist/core/ingestion/language-provider.d.ts +24 -5
  12. package/dist/core/ingestion/languages/c-cpp.js +2 -2
  13. package/dist/core/ingestion/languages/dart.d.ts +1 -1
  14. package/dist/core/ingestion/languages/dart.js +2 -2
  15. package/dist/core/ingestion/languages/go.d.ts +1 -1
  16. package/dist/core/ingestion/languages/go.js +2 -2
  17. package/dist/core/ingestion/languages/ruby.js +1 -1
  18. package/dist/core/ingestion/languages/swift.d.ts +1 -1
  19. package/dist/core/ingestion/languages/swift.js +2 -2
  20. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
  21. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
  22. package/dist/core/lbug/csv-generator.js +7 -4
  23. package/dist/core/lbug/lbug-adapter.d.ts +38 -0
  24. package/dist/core/lbug/lbug-adapter.js +189 -65
  25. package/dist/core/lbug/schema.d.ts +7 -0
  26. package/dist/core/lbug/schema.js +9 -1
  27. package/dist/core/run-analyze.js +18 -4
  28. package/dist/mcp/core/embedder.js +5 -0
  29. package/dist/server/api.js +9 -1
  30. package/package.json +6 -4
  31. package/scripts/build-tree-sitter-proto.cjs +82 -0
  32. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  33. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  34. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  35. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  36. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  37. package/vendor/tree-sitter-proto/package.json +1 -7
package/README.md CHANGED
@@ -234,6 +234,79 @@ Installed automatically by both `gitnexus analyze` (per-repo) and `gitnexus setu
234
234
  - Node.js >= 18
235
235
  - Git repository (uses git for commit tracking)
236
236
 
237
+ ## Release candidates
238
+
239
+ Stable releases publish to the default `latest` dist-tag. When a pull request
240
+ with non-documentation changes merges into `main`, an automated workflow also
241
+ publishes a prerelease build under the `rc` dist-tag, so early adopters can
242
+ try in-flight fixes without waiting for the next stable cut. (Docs-only
243
+ merges are skipped.)
244
+
245
+ ```bash
246
+ # Try the latest release candidate (pre-stable — may change at any time)
247
+ npm install -g gitnexus@rc
248
+ # — or —
249
+ npx gitnexus@rc analyze
250
+ ```
251
+
252
+ Release-candidate versions follow the standard semver prerelease format
253
+ `X.Y.Z-rc.N`, where `X.Y.Z` is the next stable target (bumped from the
254
+ current `latest` by patch by default; `minor` or `major` when kicking off a
255
+ bigger cycle) and `N` increments per published rc. Example sequence:
256
+ `1.6.2-rc.1`, `1.6.2-rc.2`, …, then once `1.6.2` ships stable,
257
+ `1.6.3-rc.1`. See the [Releases page](https://github.com/abhigyanpatwari/GitNexus/releases)
258
+ for the full list; stable `latest` is unaffected.
259
+
260
+ ## Troubleshooting
261
+
262
+ ### `Cannot destructure property 'package' of 'node.target' as it is null`
263
+
264
+ This crash was caused by a dependency URL format that is incompatible with
265
+ certain npm/arborist versions ([npm/cli#8126](https://github.com/npm/cli/issues/8126)).
266
+ It is fixed in **gitnexus v1.6.2+**. Upgrade to the latest version:
267
+
268
+ ```bash
269
+ npx gitnexus@latest analyze # always uses the newest release
270
+ # — or —
271
+ npm install -g gitnexus@latest # upgrade a global install
272
+ ```
273
+
274
+ If you still hit npm install issues after upgrading, these generic workarounds
275
+ may help:
276
+
277
+ ```bash
278
+ npm install -g npm@latest # update npm itself
279
+ npm cache clean --force # clear a possibly corrupt cache
280
+ ```
281
+
282
+ ### Installation fails with native module errors
283
+
284
+ Some optional language grammars (Dart, Kotlin, Swift) require native compilation. If they fail, GitNexus still works — those languages will be skipped.
285
+
286
+ If `npm install -g gitnexus` fails on native modules:
287
+
288
+ ```bash
289
+ # Ensure build tools are available (Linux/macOS)
290
+ # Ubuntu/Debian: sudo apt install python3 make g++
291
+ # macOS: xcode-select --install
292
+
293
+ # Retry installation
294
+ npm install -g gitnexus
295
+ ```
296
+
297
+ ### Analysis runs out of memory
298
+
299
+ For very large repositories:
300
+
301
+ ```bash
302
+ # Increase Node.js heap size
303
+ NODE_OPTIONS="--max-old-space-size=16384" npx gitnexus analyze
304
+
305
+ # Exclude large directories
306
+ echo "vendor/" >> .gitnexusignore
307
+ echo "dist/" >> .gitnexusignore
308
+ ```
309
+
237
310
  ## Privacy
238
311
 
239
312
  - All processing happens locally on your machine
@@ -232,7 +232,7 @@ export const analyzeCommand = async (inputPath, options) => {
232
232
  bar.stop();
233
233
  const msg = err.message || String(err);
234
234
  console.error(`\n Analysis failed: ${msg}\n`);
235
- // Provide helpful guidance for known large-repo failure modes
235
+ // Provide helpful guidance for known failure modes
236
236
  if (msg.includes('Maximum call stack size exceeded') ||
237
237
  msg.includes('call stack') ||
238
238
  msg.includes('Map maximum size') ||
@@ -248,6 +248,28 @@ export const analyzeCommand = async (inputPath, options) => {
248
248
  console.error(' 3. Increase stack size: NODE_OPTIONS="--stack-size=4096"');
249
249
  console.error('');
250
250
  }
251
+ else if (msg.includes('ERESOLVE') || msg.includes('Could not resolve dependency')) {
252
+ // Note: the original arborist "Cannot destructure property 'package' of
253
+ // 'node.target'" crash happens inside npm *before* gitnexus code runs,
254
+ // so it can't be caught here. This branch handles dependency-resolution
255
+ // errors that surface at runtime (e.g. dynamic require failures).
256
+ console.error(' This looks like an npm dependency resolution issue.');
257
+ console.error(' Suggestions:');
258
+ console.error(' 1. Clear the npm cache: npm cache clean --force');
259
+ console.error(' 2. Update npm: npm install -g npm@latest');
260
+ console.error(' 3. Reinstall gitnexus: npm install -g gitnexus@latest');
261
+ console.error(' 4. Or try npx directly: npx gitnexus@latest analyze');
262
+ console.error('');
263
+ }
264
+ else if (msg.includes('MODULE_NOT_FOUND') ||
265
+ msg.includes('Cannot find module') ||
266
+ msg.includes('ERR_MODULE_NOT_FOUND')) {
267
+ console.error(' A required module could not be loaded. The installation may be corrupt.');
268
+ console.error(' Suggestions:');
269
+ console.error(' 1. Reinstall: npm install -g gitnexus@latest');
270
+ console.error(' 2. Clear cache: npm cache clean --force && npx gitnexus@latest analyze');
271
+ console.error('');
272
+ }
251
273
  process.exitCode = 1;
252
274
  return;
253
275
  }
@@ -131,6 +131,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
131
131
  try {
132
132
  // Configure transformers.js environment
133
133
  env.allowLocalModels = false;
134
+ // Default cache to user-writable location. transformers.js defaults to
135
+ // ./node_modules/.cache inside its own install dir, which is unwritable
136
+ // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
137
+ // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
138
+ env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
134
139
  const isDev = process.env.NODE_ENV === 'development';
135
140
  if (isDev) {
136
141
  console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
@@ -8,7 +8,14 @@
8
8
  * 4. Update LadybugDB with embeddings
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
- import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
11
+ import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult } from './types.js';
12
+ /**
13
+ * Compute a stable content fingerprint for an embeddable node.
14
+ * Used to detect when the underlying text has changed so stale vectors
15
+ * can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
16
+ * vector-indexed rows).
17
+ */
18
+ export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
12
19
  /**
13
20
  * Progress callback type
14
21
  */
@@ -20,9 +27,11 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
20
27
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
21
28
  * @param onProgress - Callback for progress updates
22
29
  * @param config - Optional configuration override
23
- * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
30
+ * @param existingEmbeddings - Optional map of nodeId contentHash for incremental mode.
31
+ * Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
32
+ * and re-embedded; nodes not in the map are embedded fresh.
24
33
  */
25
- export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
34
+ export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, existingEmbeddings?: Map<string, string>) => Promise<void>;
26
35
  /**
27
36
  * Perform semantic search using the vector index
28
37
  *
@@ -8,10 +8,23 @@
8
8
  * 4. Update LadybugDB with embeddings
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
+ import { createHash } from 'crypto';
11
12
  import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady, } from './embedder.js';
12
- import { generateBatchEmbeddingTexts } from './text-generator.js';
13
+ import { generateEmbeddingText, generateBatchEmbeddingTexts } from './text-generator.js';
13
14
  import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
15
+ import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, } from '../lbug/schema.js';
16
+ import { loadVectorExtension } from '../lbug/lbug-adapter.js';
14
17
  const isDev = process.env.NODE_ENV === 'development';
18
+ /**
19
+ * Compute a stable content fingerprint for an embeddable node.
20
+ * Used to detect when the underlying text has changed so stale vectors
21
+ * can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
22
+ * vector-indexed rows).
23
+ */
24
+ export const contentHashForNode = (node, config = {}) => {
25
+ const text = generateEmbeddingText(node, config);
26
+ return createHash('sha1').update(text).digest('hex');
27
+ };
15
28
  /**
16
29
  * Query all embeddable nodes from LadybugDB
17
30
  * Uses table-specific queries (File has different schema than code elements)
@@ -67,34 +80,26 @@ const queryEmbeddableNodes = async (executeQuery) => {
67
80
  * that occurs when UPDATEing nodes with large content fields
68
81
  */
69
82
  const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
70
- // INSERT into separate embedding table - much more memory efficient!
71
- const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
72
- const paramsList = updates.map((u) => ({ nodeId: u.id, embedding: u.embedding }));
83
+ // MERGE instead of CREATE idempotent, handles concurrent analyzes and partial prior runs
84
+ const cypher = `MERGE (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) SET e.embedding = $embedding, e.contentHash = $contentHash`;
85
+ const paramsList = updates.map((u) => ({
86
+ nodeId: u.id,
87
+ embedding: u.embedding,
88
+ contentHash: u.contentHash,
89
+ }));
73
90
  await executeWithReusedStatement(cypher, paramsList);
74
91
  };
75
92
  /**
76
93
  * Create the vector index for semantic search
77
- * Now indexes the separate CodeEmbedding table
94
+ * Now indexes the separate CodeEmbedding table.
95
+ * Delegates extension loading to lbug-adapter's loadVectorExtension(),
96
+ * which owns the VECTOR extension lifecycle and state tracking.
78
97
  */
79
- let vectorExtensionLoaded = false;
80
98
  const createVectorIndex = async (executeQuery) => {
81
- // LadybugDB v0.15+ requires explicit VECTOR extension loading (once per session)
82
- if (!vectorExtensionLoaded) {
83
- try {
84
- await executeQuery('INSTALL VECTOR');
85
- await executeQuery('LOAD EXTENSION VECTOR');
86
- vectorExtensionLoaded = true;
87
- }
88
- catch {
89
- // Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not
90
- vectorExtensionLoaded = true;
91
- }
92
- }
93
- const cypher = `
94
- CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
95
- `;
99
+ // Delegate to the adapter which tracks loaded state and handles DB reconnect resets
100
+ await loadVectorExtension();
96
101
  try {
97
- await executeQuery(cypher);
102
+ await executeQuery(CREATE_VECTOR_INDEX_QUERY);
98
103
  }
99
104
  catch (error) {
100
105
  // Index might already exist
@@ -110,9 +115,11 @@ const createVectorIndex = async (executeQuery) => {
110
115
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
111
116
  * @param onProgress - Callback for progress updates
112
117
  * @param config - Optional configuration override
113
- * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
118
+ * @param existingEmbeddings - Optional map of nodeId contentHash for incremental mode.
119
+ * Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
120
+ * and re-embedded; nodes not in the map are embedded fresh.
114
121
  */
115
- export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
122
+ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, existingEmbeddings) => {
116
123
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
117
124
  try {
118
125
  // Phase 1: Load embedding model
@@ -141,12 +148,50 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
141
148
  }
142
149
  // Phase 2: Query embeddable nodes
143
150
  let nodes = await queryEmbeddableNodes(executeQuery);
144
- // Incremental mode: filter out nodes that already have embeddings
145
- if (skipNodeIds && skipNodeIds.size > 0) {
151
+ // Incremental mode: compare content hashes, delete stale rows, skip fresh ones.
152
+ // Computed hashes for stale nodes are cached so batchInsertEmbeddings can reuse them
153
+ // (avoids double computation).
154
+ const computedStaleHashes = new Map();
155
+ if (existingEmbeddings && existingEmbeddings.size > 0) {
146
156
  const beforeCount = nodes.length;
147
- nodes = nodes.filter((n) => !skipNodeIds.has(n.id));
157
+ const staleNodeIds = [];
158
+ nodes = nodes.filter((n) => {
159
+ const existingHash = existingEmbeddings.get(n.id);
160
+ if (existingHash === undefined) {
161
+ // New node — needs embedding
162
+ return true;
163
+ }
164
+ const currentHash = contentHashForNode(n, finalConfig);
165
+ if (currentHash !== existingHash) {
166
+ // Content changed — cache hash for reuse during insert, mark for DELETE + re-embed
167
+ computedStaleHashes.set(n.id, currentHash);
168
+ staleNodeIds.push(n.id);
169
+ return true;
170
+ }
171
+ // Hash matches — skip (fresh); no need to cache hash for skipped nodes
172
+ return false;
173
+ });
174
+ // DELETE stale embedding rows so they can be re-inserted
175
+ // (Kuzu forbids SET on vector-indexed properties; DELETE-then-INSERT is the sanctioned pattern)
176
+ if (staleNodeIds.length > 0) {
177
+ if (isDev) {
178
+ console.log(`🔄 Deleting ${staleNodeIds.length} stale embedding rows for re-embed`);
179
+ }
180
+ try {
181
+ await executeWithReusedStatement(`MATCH (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) DELETE e`, staleNodeIds.map((nodeId) => ({ nodeId })));
182
+ }
183
+ catch (err) {
184
+ // "does not exist" = rows already gone — safe to proceed.
185
+ // All other errors risk vector-index corruption (Kuzu requires DELETE-before-INSERT
186
+ // for vector-indexed properties) — propagate so the pipeline aborts cleanly.
187
+ const msg = err instanceof Error ? err.message : String(err);
188
+ if (!msg.includes('does not exist')) {
189
+ throw new Error(`[embed] Failed to delete stale embedding rows — aborting to prevent vector-index corruption: ${msg}`);
190
+ }
191
+ }
192
+ }
148
193
  if (isDev) {
149
- console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
194
+ console.log(`📦 Incremental embeddings: ${beforeCount} total, ${existingEmbeddings.size} cached, ${staleNodeIds.length} stale, ${nodes.length} to embed`);
150
195
  }
151
196
  }
152
197
  const totalNodes = nodes.length;
@@ -154,6 +199,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
154
199
  console.log(`📊 Found ${totalNodes} embeddable nodes`);
155
200
  }
156
201
  if (totalNodes === 0) {
202
+ // Ensure the vector index exists even when no new nodes need embedding.
203
+ // A prior crash or first-time incremental run may have left CodeEmbedding
204
+ // rows without ever reaching index creation.
205
+ await createVectorIndex(executeQuery);
157
206
  onProgress({
158
207
  phase: 'ready',
159
208
  percent: 100,
@@ -186,6 +235,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
186
235
  const updates = batch.map((node, i) => ({
187
236
  id: node.id,
188
237
  embedding: embeddingToArray(embeddings[i]),
238
+ contentHash: computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig),
189
239
  }));
190
240
  await batchInsertEmbeddings(executeWithReusedStatement, updates);
191
241
  processedNodes += batch.length;
@@ -256,7 +306,7 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
256
306
  const queryVecStr = `[${queryVec.join(',')}]`;
257
307
  // Query the vector index on CodeEmbedding to get nodeIds and distances
258
308
  const vectorQuery = `
259
- CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
309
+ CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
260
310
  CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${k})
261
311
  YIELD node AS emb, distance
262
312
  WITH emb, distance
@@ -7,7 +7,7 @@ export interface ProtoServiceInfo {
7
7
  protoPath: string;
8
8
  }
9
9
  export declare function buildProtoMap(repoPath: string): Promise<Map<string, ProtoServiceInfo[]>>;
10
- export declare function resolveProtoConflict(_serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
10
+ export declare function resolveProtoConflict(serviceName: string, sourceFilePath: string, candidates: ProtoServiceInfo[]): ProtoServiceInfo | null;
11
11
  export declare function serviceContractId(pkg: string, serviceName: string): string;
12
12
  export declare class GrpcExtractor implements ContractExtractor {
13
13
  type: "grpc";
@@ -263,23 +263,31 @@ export async function buildProtoMap(repoPath) {
263
263
  const { servicesByName } = await buildProtoContext(repoPath);
264
264
  return servicesByName;
265
265
  }
266
- export function resolveProtoConflict(_serviceName, sourceFilePath, candidates) {
266
+ export function resolveProtoConflict(serviceName, sourceFilePath, candidates) {
267
267
  if (candidates.length === 0)
268
268
  return null;
269
269
  if (candidates.length === 1)
270
270
  return candidates[0];
271
271
  const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
272
- let best = candidates[0];
273
- let bestScore = -1;
274
- for (const c of candidates) {
272
+ const scored = candidates.map((c) => {
275
273
  const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
276
- const sharedRun = longestSharedSegmentRun(sourceDir, protoDir);
277
- if (sharedRun > bestScore) {
278
- bestScore = sharedRun;
279
- best = c;
280
- }
274
+ return { candidate: c, score: longestSharedSegmentRun(sourceDir, protoDir) };
275
+ });
276
+ let maxScore = -1;
277
+ for (const s of scored) {
278
+ if (s.score > maxScore)
279
+ maxScore = s.score;
281
280
  }
282
- return best;
281
+ const winners = scored.filter((s) => s.score === maxScore);
282
+ // Path heuristic cannot uniquely identify a winner — refuse to guess.
283
+ // Ties (including all-zero ties) would otherwise silently merge unrelated
284
+ // services under a fabricated package-qualified contract id.
285
+ if (winners.length !== 1) {
286
+ const paths = candidates.map((c) => c.protoPath).join(', ');
287
+ console.warn(`[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`);
288
+ return null;
289
+ }
290
+ return winners[0].candidate;
283
291
  }
284
292
  export function serviceContractId(pkg, serviceName) {
285
293
  const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
@@ -339,7 +347,9 @@ export class GrpcExtractor {
339
347
  continue;
340
348
  }
341
349
  for (const d of detections) {
342
- out.push(this.detectionToContract(d, rel, protoMap));
350
+ const contract = this.detectionToContract(d, rel, protoMap);
351
+ if (contract)
352
+ out.push(contract);
343
353
  }
344
354
  }
345
355
  return this.dedupe(out);
@@ -352,8 +362,13 @@ export class GrpcExtractor {
352
362
  * based on whether the proto map had an entry.
353
363
  */
354
364
  detectionToContract(d, filePath, protoMap) {
355
- const candidates = protoMap.get(d.serviceName);
356
- const proto = resolveProtoConflict(d.serviceName, filePath, candidates ?? []);
365
+ const candidates = protoMap.get(d.serviceName) ?? [];
366
+ const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
367
+ // If there were proto candidates but resolution was ambiguous, skip
368
+ // contract emission rather than fabricating a package-qualified id from
369
+ // an arbitrary candidate. resolveProtoConflict already warned.
370
+ if (candidates.length > 0 && proto === null)
371
+ return null;
357
372
  const pkg = proto?.package ?? '';
358
373
  const cid = d.methodName
359
374
  ? contractId(pkg, d.serviceName, d.methodName)
@@ -214,7 +214,29 @@ export class HttpRouteExtractor {
214
214
  const providerDetections = detections.filter((d) => d.role === 'provider');
215
215
  let handlerName = null;
216
216
  const normalizedRoute = normalizeHttpPath(routePath);
217
- const match = providerDetections.find((d) => normalizeHttpPath(d.path) === normalizedRoute);
217
+ // Candidates share the same normalized path. When multiple
218
+ // detections at the same path exist (e.g. GET + POST /api/orders
219
+ // in one router), a blind `.find()` silently returned the first
220
+ // verb — attaching the wrong handler and, when method was not
221
+ // already pinned by the route reason, the wrong method too.
222
+ // Disambiguate by method when we know it; refuse to guess when
223
+ // we don't.
224
+ const candidates = providerDetections.filter((d) => normalizeHttpPath(d.path) === normalizedRoute);
225
+ let match;
226
+ const ambiguousCandidates = !method && candidates.length > 1;
227
+ if (method) {
228
+ match = candidates.find((d) => d.method === method);
229
+ }
230
+ else if (candidates.length === 1) {
231
+ match = candidates[0];
232
+ }
233
+ // else: multiple candidates + unknown method → leave match
234
+ // undefined so handlerName stays null and skip symbol
235
+ // enrichment below, keeping the file-basename fallback instead
236
+ // of letting pickSymbolUid silently pick the first Function /
237
+ // Method in the file (which reintroduces the mis-attribution
238
+ // we were trying to avoid). Method stays at the conservative
239
+ // 'GET' default set below.
218
240
  if (match) {
219
241
  if (!method)
220
242
  method = match.method;
@@ -228,7 +250,7 @@ export class HttpRouteExtractor {
228
250
  let symbolName = path.basename(filePath) || 'handler';
229
251
  let symPath = filePath;
230
252
  const fileId = row.fileId ?? row[0];
231
- if (fileId) {
253
+ if (fileId && !ambiguousCandidates) {
232
254
  try {
233
255
  const syms = await db(CONTAINS_QUERY, { fileId });
234
256
  if (syms.length > 0) {
@@ -308,9 +330,17 @@ export class HttpRouteExtractor {
308
330
  // Prefer the plugin's detected method if we can find a matching
309
331
  // fetch/axios call in the same file.
310
332
  const detections = filePath ? getDetections(filePath) : [];
311
- const inferred = detections.find((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
312
- if (inferred)
313
- method = inferred.method;
333
+ // Symmetric to the provider path: if multiple consumer calls in
334
+ // the same file share the same normalized path (e.g. a GET
335
+ // fetch AND a POST fetch to `/api/orders`), `.find()` silently
336
+ // picked the first verb and keyed the contract id on the wrong
337
+ // method. With no upstream method signal here, refuse to guess
338
+ // when candidates are ambiguous — leave `method` at its
339
+ // conservative 'GET' default.
340
+ const consumerCandidates = detections.filter((d) => d.role === 'consumer' && normalizeConsumerPath(d.path) === pathNorm);
341
+ if (consumerCandidates.length === 1) {
342
+ method = consumerCandidates[0].method;
343
+ }
314
344
  const cid = contractIdFor(method, pathNorm);
315
345
  let symbolUid = '';
316
346
  let symbolName = 'fetch';
@@ -16,6 +16,34 @@ function normalizeRoutePath(raw) {
16
16
  return '/';
17
17
  return collapsed.replace(/\/+$/, '');
18
18
  }
19
+ /**
20
+ * Split a manifest HTTP contract into its optional `METHOD::` prefix and
21
+ * its path portion.
22
+ *
23
+ * `buildContractId` recommends the explicit-method form `GET::/api/orders`
24
+ * in group.yaml; if we hand that raw string to `normalizeRoutePath` we get
25
+ * `/GET::/api/orders`, which can never match `Route.name = "/api/orders"`
26
+ * in the graph. This helper extracts the path portion so the Cypher
27
+ * lookup uses the canonical route name.
28
+ *
29
+ * The method prefix regex mirrors `buildContractId` (line ~251) for
30
+ * symmetry: case-insensitive `[A-Za-z]+` followed by `::`. The captured
31
+ * method is upper-cased for downstream use; method-constrained matching
32
+ * against `HANDLES_ROUTE` is a future enhancement (not yet wired).
33
+ *
34
+ * Edge cases:
35
+ * - `"::/api/orders"` — empty method portion, no alpha prefix match, so
36
+ * the whole string is treated as a bare path (matches buildContractId
37
+ * which also requires `[A-Za-z]+`).
38
+ * - `"GET::"` — method with empty path, returns `{ method: 'GET', path: '' }`;
39
+ * `normalizeRoutePath('')` resolves to `/` for caller.
40
+ */
41
+ function parseHttpContract(raw) {
42
+ const match = raw.match(/^([A-Za-z]+)::/);
43
+ if (!match)
44
+ return { method: null, path: raw };
45
+ return { method: match[1].toUpperCase(), path: raw.slice(match[0].length) };
46
+ }
19
47
  /**
20
48
  * Stable synthetic symbolUid for a manifest-declared contract whose target
21
49
  * symbol could not be resolved against the per-repo graph (resolveSymbol
@@ -40,14 +68,29 @@ export function manifestSymbolUid(repo, contractId) {
40
68
  }
41
69
  export class ManifestExtractor {
42
70
  async extractFromManifest(links, dbExecutors) {
43
- const contracts = [];
44
- const crossLinks = [];
45
- for (const link of links) {
71
+ const resolveCache = new Map();
72
+ const resolveOnce = (repo, link) => {
73
+ const key = `${repo}\u0000${link.type}\u0000${link.contract}`;
74
+ let pending = resolveCache.get(key);
75
+ if (!pending) {
76
+ pending = this.resolveSymbol(repo, link, dbExecutors);
77
+ resolveCache.set(key, pending);
78
+ }
79
+ return pending;
80
+ };
81
+ const perLink = await Promise.all(links.map(async (link) => {
46
82
  const contractId = this.buildContractId(link.type, link.contract);
47
83
  const providerRepo = link.role === 'provider' ? link.from : link.to;
48
84
  const consumerRepo = link.role === 'provider' ? link.to : link.from;
49
- const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors);
50
- const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors);
85
+ const [providerSymbol, consumerSymbol] = await Promise.all([
86
+ resolveOnce(providerRepo, link),
87
+ resolveOnce(consumerRepo, link),
88
+ ]);
89
+ return { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol };
90
+ }));
91
+ const contracts = [];
92
+ const crossLinks = [];
93
+ for (const { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol, } of perLink) {
51
94
  const providerRef = providerSymbol || { filePath: '', name: link.contract };
52
95
  const consumerRef = consumerSymbol || { filePath: '', name: link.contract };
53
96
  // When the resolver finds a real graph symbol we keep its uid, otherwise
@@ -111,7 +154,15 @@ export class ManifestExtractor {
111
154
  // core/ingestion/pipeline.ts ensureSlash + generateId('Route', ...)).
112
155
  // Normalize the manifest contract the same way so a user-written
113
156
  // "/api/orders" matches "api/orders" in the graph.
114
- const normalized = normalizeRoutePath(link.contract);
157
+ //
158
+ // The contract may also use the explicit-method form "GET::/api/orders"
159
+ // recommended by buildContractId. Strip the METHOD:: prefix before
160
+ // normalizing — otherwise `normalizeRoutePath('GET::/api/orders')`
161
+ // returns `/GET::/api/orders` and never matches Route.name. The
162
+ // captured method is not yet used to constrain the Cypher query
163
+ // (method-aware HANDLES_ROUTE matching is a future enhancement).
164
+ const parsed = parseHttpContract(link.contract);
165
+ const normalized = normalizeRoutePath(parsed.path);
115
166
  rows = await executor(`MATCH (handler)-[r:CodeRelation {type: 'HANDLES_ROUTE'}]->(route:Route)
116
167
  WHERE route.name = $normalized
117
168
  RETURN handler.id AS uid, handler.name AS name, handler.filePath AS filePath
@@ -214,9 +265,15 @@ export class ManifestExtractor {
214
265
  buildContractId(type, contract) {
215
266
  switch (type) {
216
267
  case 'http': {
217
- if (/^[A-Za-z]+::/.test(contract))
218
- return `http::${contract}`;
219
- return `http::*::${contract}`;
268
+ // Canonicalize method casing and path separators so logically
269
+ // equivalent inputs (`get::/api/orders` vs `GET::/api/orders`,
270
+ // or trailing-slash variants) produce the same contractId and
271
+ // matching `manifestSymbolUid` fallback. Without this, raw
272
+ // user casing leaks into cross-impact join keys and fragments
273
+ // matches across repos.
274
+ const { method, path: rawPath } = parseHttpContract(contract);
275
+ const normalizedPath = normalizeRoutePath(rawPath);
276
+ return method ? `http::${method}::${normalizedPath}` : `http::*::${normalizedPath}`;
220
277
  }
221
278
  case 'grpc':
222
279
  return `grpc::${contract}`;
@@ -6,6 +6,7 @@ import { readRegistry } from '../../storage/repo-manager.js';
6
6
  import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
7
7
  import { GrpcExtractor } from './extractors/grpc-extractor.js';
8
8
  import { TopicExtractor } from './extractors/topic-extractor.js';
9
+ import { ManifestExtractor } from './extractors/manifest-extractor.js';
9
10
  import { runExactMatch } from './matching.js';
10
11
  import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
11
12
  import { writeContractRegistry } from './storage.js';
@@ -34,10 +35,28 @@ function defaultResolveHandle(allEntries) {
34
35
  };
35
36
  };
36
37
  }
38
+ /**
39
+ * Dedupe cross-links that point from the same consumer endpoint to the same
40
+ * provider endpoint for the same contract. Preserves first-seen order so the
41
+ * caller controls precedence (e.g., pass manifest links first).
42
+ */
43
+ function dedupeCrossLinks(links) {
44
+ const seen = new Set();
45
+ const out = [];
46
+ for (const link of links) {
47
+ const key = `${link.from.repo}::${link.from.symbolUid}|${link.to.repo}::${link.to.symbolUid}|${link.type}|${link.contractId}`;
48
+ if (seen.has(key))
49
+ continue;
50
+ seen.add(key);
51
+ out.push(link);
52
+ }
53
+ return out;
54
+ }
37
55
  export async function syncGroup(config, opts) {
38
56
  const missingRepos = [];
39
57
  const repoSnapshots = {};
40
58
  let autoContracts = [];
59
+ let manifestCrossLinks = [];
41
60
  let dbExecutors;
42
61
  const eo = opts?.extractorOverride;
43
62
  if (eo && eo.length === 0) {
@@ -124,8 +143,37 @@ export async function syncGroup(config, opts) {
124
143
  }
125
144
  }
126
145
  }
146
+ // Process manifest links declared in group.yaml.
147
+ // ManifestExtractor is fully implemented but was never wired into this
148
+ // pipeline — config.links were parsed and validated but silently dropped.
149
+ // Placed after the DB try/finally: resolveSymbol falls back to synthetic
150
+ // UIDs when dbExecutors is undefined or a pool is closed, so cross-links
151
+ // are always generated regardless of whether real DB executors are available.
152
+ if (config.links.length > 0) {
153
+ // Warn about dangling links that reference repos not declared in config.repos.
154
+ // They still generate cross-links via synthetic UIDs (determinism is preserved),
155
+ // but the operator probably meant something that now silently does nothing useful.
156
+ const knownRepos = new Set(Object.keys(config.repos));
157
+ for (const link of config.links) {
158
+ const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r));
159
+ if (dangling.length > 0) {
160
+ console.warn(`[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`);
161
+ }
162
+ }
163
+ const manifestEx = new ManifestExtractor();
164
+ const manifestResult = await manifestEx.extractFromManifest(config.links, dbExecutors);
165
+ autoContracts.push(...manifestResult.contracts);
166
+ manifestCrossLinks = manifestResult.crossLinks;
167
+ if (opts?.verbose) {
168
+ console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`);
169
+ }
170
+ }
127
171
  const { matched, unmatched } = runExactMatch(autoContracts);
128
- const crossLinks = matched;
172
+ // Dedupe cross-links. Manifest contracts participate in runExactMatch, so a
173
+ // manifest-declared link can also emit a matchType:'exact' CrossLink with the
174
+ // same endpoints. Prefer the manifest version — it reflects operator intent
175
+ // and carries matchType:'manifest' which downstream consumers may rely on.
176
+ const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched]);
129
177
  const allContracts = autoContracts;
130
178
  const registry = {
131
179
  version: 1,