gitnexus 1.6.2-rc.1 → 1.6.2-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,8 +67,8 @@ const queryEmbeddableNodes = async (executeQuery) => {
67
67
  * that occurs when UPDATEing nodes with large content fields
68
68
  */
69
69
  const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
70
- // INSERT into separate embedding table - much more memory efficient!
71
- const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
70
+ // MERGE instead of CREATE idempotent, handles concurrent analyzes and partial prior runs
71
+ const cypher = `MERGE (e:CodeEmbedding {nodeId: $nodeId}) SET e.embedding = $embedding`;
72
72
  const paramsList = updates.map((u) => ({ nodeId: u.id, embedding: u.embedding }));
73
73
  await executeWithReusedStatement(cypher, paramsList);
74
74
  };
@@ -246,14 +246,17 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
246
246
  Interface: interfaceWriter,
247
247
  CodeElement: codeElemWriter,
248
248
  };
249
- const seenFileIds = new Set();
249
+ // Deduplicate all node types — the pipeline can produce duplicate IDs across
250
+ // all symbol types (Class, Method, Function, etc.), not just File nodes.
251
+ // A single Set covering every label prevents PK violations on COPY.
252
+ const seenNodeIds = new Set();
250
253
  // --- SINGLE PASS over all nodes ---
251
254
  for (const node of graph.iterNodes()) {
255
+ if (seenNodeIds.has(node.id))
256
+ continue;
257
+ seenNodeIds.add(node.id);
252
258
  switch (node.label) {
253
259
  case 'File': {
254
- if (seenFileIds.has(node.id))
255
- break;
256
- seenFileIds.add(node.id);
257
260
  const content = await extractContent(node, contentCache);
258
261
  await fileWriter.addRow([
259
262
  escapeCSVField(node.id),
@@ -149,7 +149,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
149
149
  const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH);
150
150
  const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding }));
151
151
  try {
152
- await executeWithReusedStatement(`CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, paramsList);
152
+ await executeWithReusedStatement(`MERGE (e:CodeEmbedding {nodeId: $nodeId}) SET e.embedding = $embedding`, paramsList);
153
153
  }
154
154
  catch {
155
155
  /* some may fail if node was removed, that's fine */
@@ -1277,6 +1277,26 @@ export const createServer = async (port, host = '127.0.0.1') => {
1277
1277
  const lbugPath = path.join(entry.storagePath, 'lbug');
1278
1278
  await withLbugDb(lbugPath, async () => {
1279
1279
  const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js');
1280
+ // Skip nodes that already have embeddings — Kuzu forbids SET on vector-indexed properties.
1281
+ let skipNodeIds;
1282
+ try {
1283
+ const rows = await executeQuery('MATCH (e:CodeEmbedding) RETURN e.nodeId AS nodeId');
1284
+ if (rows && rows.length > 0) {
1285
+ skipNodeIds = new Set(rows.map((r) => r.nodeId ?? r[0]).filter(Boolean));
1286
+ console.log(`[embed] ${skipNodeIds.size} nodes already embedded — skipping in incremental run`);
1287
+ }
1288
+ }
1289
+ catch (err) {
1290
+ // Swallow only "table does not exist" — let real connection errors propagate.
1291
+ // Log so ops can see this path fire if Kuzu ever changes error wording.
1292
+ const msg = err?.message ?? '';
1293
+ if (msg.includes('does not exist') || msg.includes('not found')) {
1294
+ console.log(`[embed] CodeEmbedding table not yet present — full embedding run (${msg})`);
1295
+ }
1296
+ else {
1297
+ throw err;
1298
+ }
1299
+ }
1280
1300
  await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, (p) => {
1281
1301
  embedJobManager.updateJob(job.id, {
1282
1302
  progress: {
@@ -1293,7 +1313,8 @@ export const createServer = async (port, host = '127.0.0.1') => {
1293
1313
  : `${p.phase} (${p.percent}%)`,
1294
1314
  },
1295
1315
  });
1296
- });
1316
+ }, {}, // config: use defaults (runEmbeddingPipeline signature: executeQuery, executeWithReusedStatement, onProgress, config, skipNodeIds)
1317
+ skipNodeIds);
1297
1318
  });
1298
1319
  clearTimeout(embedTimeout);
1299
1320
  releaseRepoLock(repoLockPath);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.2-rc.1",
3
+ "version": "1.6.2-rc.2",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",