@codragraph/cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +2 -2
  2. package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
  4. package/dist/_shared/cgdb/schema-constants.js +67 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -0
  6. package/dist/_shared/index.d.ts +2 -2
  7. package/dist/_shared/index.js +1 -1
  8. package/dist/cli/analyze.js +3 -3
  9. package/dist/cli/graphstore.js +21 -21
  10. package/dist/cli/index-repo.js +3 -3
  11. package/dist/cli/wiki.js +3 -3
  12. package/dist/core/augmentation/engine.js +7 -7
  13. package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
  14. package/dist/core/cgdb/cgdb-adapter.js +1320 -0
  15. package/dist/core/cgdb/content-read.d.ts +46 -0
  16. package/dist/core/cgdb/content-read.js +64 -0
  17. package/dist/core/cgdb/csv-generator.d.ts +29 -0
  18. package/dist/core/cgdb/csv-generator.js +492 -0
  19. package/dist/core/cgdb/pool-adapter.d.ts +93 -0
  20. package/dist/core/cgdb/pool-adapter.js +550 -0
  21. package/dist/core/cgdb/schema.d.ts +62 -0
  22. package/dist/core/cgdb/schema.js +502 -0
  23. package/dist/core/embeddings/embedding-pipeline.js +4 -4
  24. package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
  25. package/dist/core/graphstore/cgdb-row-source.js +141 -0
  26. package/dist/core/graphstore/index.d.ts +1 -1
  27. package/dist/core/graphstore/index.js +3 -3
  28. package/dist/core/group/bridge-db.d.ts +2 -2
  29. package/dist/core/group/bridge-db.js +18 -18
  30. package/dist/core/group/bridge-schema.d.ts +4 -4
  31. package/dist/core/group/bridge-schema.js +4 -4
  32. package/dist/core/group/cross-impact.js +3 -3
  33. package/dist/core/group/sync.js +4 -4
  34. package/dist/core/run-analyze.js +24 -24
  35. package/dist/core/search/bm25-index.d.ts +3 -3
  36. package/dist/core/search/bm25-index.js +9 -9
  37. package/dist/core/search/hybrid-search.js +2 -2
  38. package/dist/core/wiki/generator.d.ts +2 -2
  39. package/dist/core/wiki/generator.js +4 -4
  40. package/dist/core/wiki/graph-queries.d.ts +2 -2
  41. package/dist/core/wiki/graph-queries.js +5 -5
  42. package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
  43. package/dist/mcp/core/cgdb-adapter.js +5 -0
  44. package/dist/mcp/core/embedder.js +1 -1
  45. package/dist/mcp/local/local-backend.d.ts +2 -2
  46. package/dist/mcp/local/local-backend.js +15 -15
  47. package/dist/mcp/server.js +3 -3
  48. package/dist/mcp/tools.js +1 -1
  49. package/dist/server/analyze-worker.js +2 -2
  50. package/dist/server/api.js +31 -31
  51. package/dist/storage/repo-manager.d.ts +4 -4
  52. package/dist/storage/repo-manager.js +5 -5
  53. package/hooks/claude/codragraph-hook.cjs +4 -4
  54. package/package.json +3 -3
  55. package/scripts/build.js +8 -9
  56. package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
  57. package/vendor/tree-sitter-proto/src/node-types.json +1 -1
@@ -0,0 +1,1320 @@
1
+ import fs from 'fs/promises';
2
+ import { createReadStream, createWriteStream } from 'fs';
3
+ import { createInterface } from 'readline';
4
+ import { once } from 'events';
5
+ import { finished } from 'stream/promises';
6
+ import path from 'path';
7
+ import cgdb from '@ladybugdb/core';
8
+ import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
9
+ import { streamAllCSVsToDisk } from './csv-generator.js';
10
+ /**
11
+ * Split a relationship CSV into per-label-pair files on disk.
12
+ *
13
+ * Streams the CSV line-by-line, routing each relationship to a file named
14
+ * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
15
+ * drain listener per stream at a time, and readline resumes only when ALL
16
+ * backpressured streams have drained.
17
+ *
18
+ * @param csvPath Path to the combined relationship CSV
19
+ * @param csvDir Directory to write per-pair CSV files
20
+ * @param validTables Set of valid node table names
21
+ * @param getNodeLabel Function to extract the label from a node ID
22
+ * @param wsFactory Optional WriteStream factory (defaults to fs.createWriteStream)
23
+ */
24
+ export const splitRelCsvByLabelPair = async (csvPath, csvDir, validTables, getNodeLabel, wsFactory = (p) => createWriteStream(p, 'utf-8')) => {
25
+ let relHeader = '';
26
+ const relsByPairMeta = new Map();
27
+ const pairWriteStreams = new Map();
28
+ let skippedRels = 0;
29
+ let totalValidRels = 0;
30
+ const inputStream = createReadStream(csvPath, 'utf-8');
31
+ const rl = createInterface({ input: inputStream, crlfDelay: Infinity });
32
+ // If any pair WriteStream errors (disk full, EMFILE, etc.) or the input
33
+ // stream fails, we need to abort the pending `once(ws, 'drain')` await.
34
+ // An AbortController gives us one signal to cancel all pending waits
35
+ // without a custom state machine.
36
+ const abortOnError = new AbortController();
37
+ let streamError = null;
38
+ const markStreamError = (err) => {
39
+ streamError ??= err;
40
+ abortOnError.abort(err);
41
+ };
42
+ try {
43
+ // `for await (const line of rl)` replaces the old manual
44
+ // on('line')/pause()/resume()/waitingForDrain state machine: readline's
45
+ // async iterator naturally serializes line delivery with our awaits, so
46
+ // at most one ws can be in backpressure at a time and we just await its
47
+ // 'drain' event.
48
+ let isFirst = true;
49
+ for await (const line of rl) {
50
+ if (streamError)
51
+ throw streamError;
52
+ if (isFirst) {
53
+ relHeader = line;
54
+ isFirst = false;
55
+ continue;
56
+ }
57
+ if (!line.trim())
58
+ continue;
59
+ const match = line.match(/"([^"]*)","([^"]*)"/);
60
+ if (!match) {
61
+ skippedRels++;
62
+ continue;
63
+ }
64
+ const fromLabel = getNodeLabel(match[1]);
65
+ const toLabel = getNodeLabel(match[2]);
66
+ if (!validTables.has(fromLabel) || !validTables.has(toLabel)) {
67
+ skippedRels++;
68
+ continue;
69
+ }
70
+ const pairKey = `${fromLabel}|${toLabel}`;
71
+ let ws = pairWriteStreams.get(pairKey);
72
+ if (!ws) {
73
+ const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`);
74
+ ws = wsFactory(pairCsvPath);
75
+ ws.on('error', markStreamError);
76
+ pairWriteStreams.set(pairKey, ws);
77
+ relsByPairMeta.set(pairKey, { csvPath: pairCsvPath, rows: 0 });
78
+ if (!ws.write(relHeader + '\n')) {
79
+ await once(ws, 'drain', { signal: abortOnError.signal });
80
+ }
81
+ }
82
+ if (!ws.write(line + '\n')) {
83
+ await once(ws, 'drain', { signal: abortOnError.signal });
84
+ }
85
+ relsByPairMeta.get(pairKey).rows++;
86
+ totalValidRels++;
87
+ }
88
+ if (streamError)
89
+ throw streamError;
90
+ }
91
+ catch (err) {
92
+ // Tear down everything so no fd is left dangling. If the abort was caused
93
+ // by a stream error, rethrow that error (more actionable than AbortError).
94
+ for (const ws of pairWriteStreams.values())
95
+ ws.destroy();
96
+ inputStream.destroy();
97
+ throw streamError ?? err;
98
+ }
99
+ finally {
100
+ // Readline 'close' fires before the underlying fs.ReadStream releases its
101
+ // fd — on Windows that race caused ENOTEMPTY on the parent dir.
102
+ // stream/promises.finished is the stdlib "wait until this stream is fully
103
+ // closed" primitive and handles both success and error paths.
104
+ await finished(inputStream).catch(() => { });
105
+ }
106
+ return { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels };
107
+ };
108
+ let db = null;
109
+ let conn = null;
110
+ let currentDbPath = null;
111
+ let ftsLoaded = false;
112
+ let vectorExtensionLoaded = false;
113
+ /**
114
+ * In-process cache of FTS indexes that have been ensured against the current
115
+ * connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips inside a
116
+ * single CLI/MCP session — the first call to `ensureFTSIndex` for a given
117
+ * `(tableName, indexName)` pays the LadybugDB cost (~440 ms even when the
118
+ * index already exists on disk), subsequent calls are a Set lookup. Cleared
119
+ * by `closeCgdb` so a re-init starts fresh.
120
+ *
121
+ * Key format: `${tableName}:${indexName}`.
122
+ */
123
+ const ensuredFTSIndexes = new Set();
124
+ /**
125
+ * Check if an error indicates a missing column or table (schema-level problem)
126
+ * rather than a transient/connection error. Used for legacy DB fallback logic.
127
+ */
128
+ const isMissingColumnOrTableError = (msg) => msg.includes('does not exist') ||
129
+ // Kuzu-specific: "(table|column|property) ... not found" — narrow enough to avoid
130
+ // matching transient errors like "connection not found" or "key not found".
131
+ /(table|column|property).*not found/i.test(msg);
132
+ /** Expose the current Database for pool adapter reuse in tests. */
133
+ export const getDatabase = () => db;
134
+ // Global session lock for operations that touch module-level cgdb globals.
135
+ // This guarantees no DB switch can happen while an operation is running.
136
+ let sessionLock = Promise.resolve();
137
+ /** Number of times to retry on a BUSY / lock-held error before giving up. */
138
+ const DB_LOCK_RETRY_ATTEMPTS = 3;
139
+ /** Base back-off in ms between BUSY retries (multiplied by attempt number). */
140
+ const DB_LOCK_RETRY_DELAY_MS = 500;
141
+ /**
142
+ * Return true when the error message indicates that another process holds
143
+ * an exclusive lock on the LadybugDB file (e.g. `codragraph analyze` or
144
+ * `codragraph serve` running at the same time).
145
+ */
146
+ export const isDbBusyError = (err) => {
147
+ const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
148
+ return (msg.includes('busy') ||
149
+ msg.includes('lock') ||
150
+ msg.includes('already in use') ||
151
+ msg.includes('could not set lock'));
152
+ };
153
+ const runWithSessionLock = async (operation) => {
154
+ const previous = sessionLock;
155
+ let release = null;
156
+ sessionLock = new Promise((resolve) => {
157
+ release = resolve;
158
+ });
159
+ await previous;
160
+ try {
161
+ return await operation();
162
+ }
163
+ finally {
164
+ release?.();
165
+ }
166
+ };
167
+ const normalizeCopyPath = (filePath) => filePath.replace(/\\/g, '/');
168
+ export const initCgdb = async (dbPath) => {
169
+ return runWithSessionLock(() => ensureCgdbInitialized(dbPath));
170
+ };
171
+ /**
172
+ * Execute multiple queries against one repo DB atomically.
173
+ * While the callback runs, no other request can switch the active DB.
174
+ *
175
+ * Automatically retries up to DB_LOCK_RETRY_ATTEMPTS times when the
176
+ * database is busy (e.g. `codragraph analyze` holds the write lock).
177
+ * Each retry waits DB_LOCK_RETRY_DELAY_MS * attempt milliseconds.
178
+ */
179
+ export const withCgdbDb = async (dbPath, operation) => {
180
+ let lastError;
181
+ for (let attempt = 1; attempt <= DB_LOCK_RETRY_ATTEMPTS; attempt++) {
182
+ try {
183
+ return await runWithSessionLock(async () => {
184
+ await ensureCgdbInitialized(dbPath);
185
+ return operation();
186
+ });
187
+ }
188
+ catch (err) {
189
+ lastError = err;
190
+ if (!isDbBusyError(err) || attempt === DB_LOCK_RETRY_ATTEMPTS) {
191
+ throw err;
192
+ }
193
+ // Close stale connection inside the session lock to prevent race conditions
194
+ // with concurrent operations that might acquire the lock between cleanup steps
195
+ await runWithSessionLock(async () => {
196
+ try {
197
+ if (conn)
198
+ await conn.close();
199
+ }
200
+ catch {
201
+ /* best-effort */
202
+ }
203
+ try {
204
+ if (db)
205
+ await db.close();
206
+ }
207
+ catch {
208
+ /* best-effort */
209
+ }
210
+ conn = null;
211
+ db = null;
212
+ currentDbPath = null;
213
+ ftsLoaded = false;
214
+ vectorExtensionLoaded = false;
215
+ });
216
+ // Sleep outside the lock — no need to block others while waiting
217
+ await new Promise((resolve) => setTimeout(resolve, DB_LOCK_RETRY_DELAY_MS * attempt));
218
+ }
219
+ }
220
+ // This line is unreachable — the loop either returns or throws inside,
221
+ // but TypeScript needs an explicit throw to satisfy the return type.
222
+ throw lastError;
223
+ };
224
+ const ensureCgdbInitialized = async (dbPath) => {
225
+ if (conn && currentDbPath === dbPath) {
226
+ return { db, conn };
227
+ }
228
+ await doInitCgdb(dbPath);
229
+ return { db, conn };
230
+ };
231
+ const doInitCgdb = async (dbPath) => {
232
+ // Different database requested — close the old one first
233
+ if (conn || db) {
234
+ try {
235
+ if (conn)
236
+ await conn.close();
237
+ }
238
+ catch { }
239
+ try {
240
+ if (db)
241
+ await db.close();
242
+ }
243
+ catch { }
244
+ conn = null;
245
+ db = null;
246
+ currentDbPath = null;
247
+ ftsLoaded = false;
248
+ vectorExtensionLoaded = false;
249
+ }
250
+ // LadybugDB stores the database as a single file (not a directory).
251
+ // If the path already exists, it must be a valid LadybugDB database file.
252
+ // Remove stale empty directories or files from older versions.
253
+ try {
254
+ const stat = await fs.lstat(dbPath);
255
+ if (stat.isSymbolicLink()) {
256
+ // Never follow symlinks — just remove the link itself
257
+ await fs.unlink(dbPath);
258
+ }
259
+ else if (stat.isDirectory()) {
260
+ // Verify path is within expected storage directory before deleting
261
+ const realPath = await fs.realpath(dbPath);
262
+ const parentDir = path.dirname(dbPath);
263
+ const realParent = await fs.realpath(parentDir);
264
+ if (!realPath.startsWith(realParent + path.sep) && realPath !== realParent) {
265
+ throw new Error(`Refusing to delete ${dbPath}: resolved path ${realPath} is outside storage directory`);
266
+ }
267
+ // Old-style directory database or empty leftover - remove it
268
+ await fs.rm(dbPath, { recursive: true, force: true });
269
+ }
270
+ // If it's a file, assume it's an existing LadybugDB database - LadybugDB will open it
271
+ }
272
+ catch {
273
+ // Path doesn't exist, which is what LadybugDB wants for a new database
274
+ }
275
+ // Ensure parent directory exists
276
+ const parentDir = path.dirname(dbPath);
277
+ await fs.mkdir(parentDir, { recursive: true });
278
+ db = new cgdb.Database(dbPath);
279
+ conn = trackConnection(new cgdb.Connection(db));
280
+ for (const schemaQuery of SCHEMA_QUERIES) {
281
+ try {
282
+ await conn.query(schemaQuery);
283
+ }
284
+ catch (err) {
285
+ // Only ignore "already exists" errors - log everything else
286
+ const msg = err instanceof Error ? err.message : String(err);
287
+ if (!msg.includes('already exists')) {
288
+ console.warn(`⚠️ Schema creation warning: ${msg.slice(0, 120)}`);
289
+ }
290
+ }
291
+ }
292
+ // Load VECTOR extension for semantic search support
293
+ await loadVectorExtension();
294
+ currentDbPath = dbPath;
295
+ return { db, conn };
296
+ };
297
+ export const loadGraphToCgdb = async (graph, repoPath, storagePath, onProgress, options) => {
298
+ if (!conn) {
299
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
300
+ }
301
+ const log = onProgress || (() => { });
302
+ const csvDir = path.join(storagePath, 'csv');
303
+ log('Streaming CSVs to disk...');
304
+ const csvResult = await streamAllCSVsToDisk(graph, repoPath, csvDir, options?.compress);
305
+ const validTables = new Set(NODE_TABLES);
306
+ const getNodeLabel = (nodeId) => {
307
+ if (nodeId.startsWith('comm_'))
308
+ return 'Community';
309
+ if (nodeId.startsWith('proc_'))
310
+ return 'Process';
311
+ return nodeId.split(':')[0];
312
+ };
313
+ // Bulk COPY all node CSVs (sequential — LadybugDB allows only one write txn at a time)
314
+ const nodeFiles = [...csvResult.nodeFiles.entries()];
315
+ const totalSteps = nodeFiles.length + 1; // +1 for relationships
316
+ let stepsDone = 0;
317
+ for (const [table, { csvPath, rows }] of nodeFiles) {
318
+ stepsDone++;
319
+ log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`);
320
+ const normalizedPath = normalizeCopyPath(csvPath);
321
+ const copyQuery = getCopyQuery(table, normalizedPath);
322
+ try {
323
+ await conn.query(copyQuery);
324
+ }
325
+ catch (_err) {
326
+ try {
327
+ const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
328
+ await conn.query(retryQuery);
329
+ }
330
+ catch (retryErr) {
331
+ const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
332
+ throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`);
333
+ }
334
+ }
335
+ }
336
+ // Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it)
337
+ const { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels } = await splitRelCsvByLabelPair(csvResult.relCsvPath, csvDir, validTables, getNodeLabel);
338
+ // Close all per-pair write streams before COPY. `stream/promises.finished`
339
+ // resolves on the stream's 'finish' event and rejects on 'error' — replaces
340
+ // a hand-rolled promisification with the stdlib primitive.
341
+ await Promise.all(Array.from(pairWriteStreams.values()).map(async (ws) => {
342
+ ws.end();
343
+ await finished(ws);
344
+ }));
345
+ const insertedRels = totalValidRels;
346
+ const warnings = [];
347
+ if (insertedRels > 0) {
348
+ log(`Loading edges: ${insertedRels.toLocaleString()} across ${relsByPairMeta.size} types`);
349
+ let pairIdx = 0;
350
+ let failedPairEdges = 0;
351
+ const failedPairCsvPaths = new Set();
352
+ for (const [pairKey, { csvPath: pairCsvPath, rows }] of relsByPairMeta) {
353
+ pairIdx++;
354
+ const [fromLabel, toLabel] = pairKey.split('|');
355
+ const normalizedPath = normalizeCopyPath(pairCsvPath);
356
+ const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
357
+ if (pairIdx % 5 === 0 || rows > 1000) {
358
+ log(`Loading edges: ${pairIdx}/${relsByPairMeta.size} types (${fromLabel} -> ${toLabel})`);
359
+ }
360
+ try {
361
+ await conn.query(copyQuery);
362
+ }
363
+ catch (_err) {
364
+ try {
365
+ const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)');
366
+ await conn.query(retryQuery);
367
+ }
368
+ catch (retryErr) {
369
+ const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr);
370
+ warnings.push(`${fromLabel}->${toLabel} (${rows} edges): ${retryMsg.slice(0, 80)}`);
371
+ failedPairEdges += rows;
372
+ failedPairCsvPaths.add(pairCsvPath);
373
+ }
374
+ }
375
+ // Only delete if not in failedPairCsvPaths (needed for fallback)
376
+ if (!failedPairCsvPaths.has(pairCsvPath)) {
377
+ try {
378
+ await fs.unlink(pairCsvPath);
379
+ }
380
+ catch { }
381
+ }
382
+ }
383
+ if (failedPairCsvPaths.size > 0) {
384
+ log(`Inserting ${failedPairEdges} edges individually (missing schema pairs)`);
385
+ // Read failed pair files and merge for fallback inserts
386
+ const allLines = [relHeader];
387
+ for (const failedPath of failedPairCsvPaths) {
388
+ try {
389
+ const content = await fs.readFile(failedPath, 'utf-8');
390
+ const lines = content.split('\n');
391
+ // Skip header line (first) and empty lines
392
+ for (let i = 1; i < lines.length; i++) {
393
+ if (lines[i].trim())
394
+ allLines.push(lines[i]);
395
+ }
396
+ }
397
+ catch { }
398
+ try {
399
+ await fs.unlink(failedPath);
400
+ }
401
+ catch { }
402
+ }
403
+ if (allLines.length > 1) {
404
+ await fallbackRelationshipInserts(allLines, validTables, getNodeLabel);
405
+ }
406
+ }
407
+ }
408
+ // Cleanup all CSVs
409
+ try {
410
+ await fs.unlink(csvResult.relCsvPath);
411
+ }
412
+ catch { }
413
+ for (const [, { csvPath }] of csvResult.nodeFiles) {
414
+ try {
415
+ await fs.unlink(csvPath);
416
+ }
417
+ catch { }
418
+ }
419
+ try {
420
+ const remaining = await fs.readdir(csvDir);
421
+ for (const f of remaining) {
422
+ try {
423
+ await fs.unlink(path.join(csvDir, f));
424
+ }
425
+ catch { }
426
+ }
427
+ }
428
+ catch { }
429
+ try {
430
+ await fs.rmdir(csvDir);
431
+ }
432
+ catch { }
433
+ return { success: true, insertedRels, skippedRels, warnings };
434
+ };
435
+ // LadybugDB default ESCAPE is '\' (backslash), but our CSV uses RFC 4180 escaping ("" for literal quotes).
436
+ // Source code content is full of backslashes which confuse the auto-detection.
437
+ // We MUST explicitly set ESCAPE='"' to use RFC 4180 escaping, and disable auto_detect to prevent
438
+ // LadybugDB from overriding our settings based on sample rows.
439
+ const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
440
+ // Multi-language table names that were created with backticks in CODE_ELEMENT_BASE
441
+ // and must always be referenced with backticks in queries
442
+ const BACKTICK_TABLES = new Set([
443
+ 'Struct',
444
+ 'Enum',
445
+ 'Macro',
446
+ 'Typedef',
447
+ 'Union',
448
+ 'Namespace',
449
+ 'Trait',
450
+ 'Impl',
451
+ 'TypeAlias',
452
+ 'Const',
453
+ 'Static',
454
+ 'Property',
455
+ 'Record',
456
+ 'Delegate',
457
+ 'Annotation',
458
+ 'Constructor',
459
+ 'Template',
460
+ 'Module',
461
+ ]);
462
+ const escapeTableName = (table) => {
463
+ return BACKTICK_TABLES.has(table) ? `\`${table}\`` : table;
464
+ };
465
+ /** Fallback: insert relationships one-by-one if COPY fails */
466
+ const fallbackRelationshipInserts = async (validRelLines, validTables, getNodeLabel) => {
467
+ if (!conn)
468
+ return;
469
+ const escapeLabel = (label) => {
470
+ return BACKTICK_TABLES.has(label) ? `\`${label}\`` : label;
471
+ };
472
+ for (let i = 1; i < validRelLines.length; i++) {
473
+ const line = validRelLines[i];
474
+ try {
475
+ const match = line.match(/"([^"]*)","([^"]*)","([^"]*)",([0-9.]+),"([^"]*)",([0-9-]+)/);
476
+ if (!match)
477
+ continue;
478
+ const [, fromId, toId, relType, confidenceStr, reason, stepStr] = match;
479
+ const fromLabel = getNodeLabel(fromId);
480
+ const toLabel = getNodeLabel(toId);
481
+ if (!validTables.has(fromLabel) || !validTables.has(toLabel))
482
+ continue;
483
+ const confidence = parseFloat(confidenceStr) || 1.0;
484
+ const step = parseInt(stepStr) || 0;
485
+ const esc = (s) => s.replace(/'/g, "''").replace(/\\/g, '\\\\').replace(/\n/g, '\\n').replace(/\r/g, '\\r');
486
+ await conn.query(`
487
+ MATCH (a:${escapeLabel(fromLabel)} {id: '${esc(fromId)}' }),
488
+ (b:${escapeLabel(toLabel)} {id: '${esc(toId)}' })
489
+ CREATE (a)-[:${REL_TABLE_NAME} {type: '${esc(relType)}', confidence: ${confidence}, reason: '${esc(reason)}', step: ${step}}]->(b)
490
+ `);
491
+ }
492
+ catch {
493
+ // skip
494
+ }
495
+ }
496
+ };
497
+ /** Tables with isExported column (TypeScript/JS-native types) */
498
+ const TABLES_WITH_EXPORTED = new Set([
499
+ 'Function',
500
+ 'Class',
501
+ 'Interface',
502
+ 'Method',
503
+ 'CodeElement',
504
+ ]);
505
+ const getCopyQuery = (table, filePath) => {
506
+ const t = escapeTableName(table);
507
+ // RFC 0001 Phase 2: every content-bearing table also lists
508
+ // `contentEncoding` immediately after `content` to match the schema +
509
+ // CSV layout. Tables without a content column (Folder, Community,
510
+ // Process, Route, Tool) are unchanged.
511
+ if (table === 'File') {
512
+ return `COPY ${t}(id, name, filePath, content, contentEncoding) FROM "${filePath}" ${COPY_CSV_OPTS}`;
513
+ }
514
+ if (table === 'Folder') {
515
+ return `COPY ${t}(id, name, filePath) FROM "${filePath}" ${COPY_CSV_OPTS}`;
516
+ }
517
+ if (table === 'Community') {
518
+ return `COPY ${t}(id, label, heuristicLabel, keywords, description, enrichedBy, cohesion, symbolCount) FROM "${filePath}" ${COPY_CSV_OPTS}`;
519
+ }
520
+ if (table === 'Process') {
521
+ return `COPY ${t}(id, label, heuristicLabel, processType, stepCount, communities, entryPointId, terminalId) FROM "${filePath}" ${COPY_CSV_OPTS}`;
522
+ }
523
+ if (table === 'Section') {
524
+ return `COPY ${t}(id, name, filePath, startLine, endLine, level, content, contentEncoding, description) FROM "${filePath}" ${COPY_CSV_OPTS}`;
525
+ }
526
+ if (table === 'Route') {
527
+ return `COPY ${t}(id, name, filePath, responseKeys, errorKeys, middleware) FROM "${filePath}" ${COPY_CSV_OPTS}`;
528
+ }
529
+ if (table === 'Tool') {
530
+ return `COPY ${t}(id, name, filePath, description) FROM "${filePath}" ${COPY_CSV_OPTS}`;
531
+ }
532
+ if (table === 'Method') {
533
+ return `COPY ${t}(id, name, filePath, startLine, endLine, isExported, content, contentEncoding, description, parameterCount, returnType) FROM "${filePath}" ${COPY_CSV_OPTS}`;
534
+ }
535
+ // TypeScript/JS code element tables have isExported; multi-language tables do not
536
+ if (TABLES_WITH_EXPORTED.has(table)) {
537
+ return `COPY ${t}(id, name, filePath, startLine, endLine, isExported, content, contentEncoding, description) FROM "${filePath}" ${COPY_CSV_OPTS}`;
538
+ }
539
+ // Multi-language tables (Struct, Impl, Trait, Macro, etc.)
540
+ return `COPY ${t}(id, name, filePath, startLine, endLine, content, contentEncoding, description) FROM "${filePath}" ${COPY_CSV_OPTS}`;
541
+ };
542
+ /**
543
+ * Insert a single node to LadybugDB
544
+ * @param label - Node type (File, Function, Class, etc.)
545
+ * @param properties - Node properties
546
+ * @param dbPath - Path to LadybugDB database (optional if already initialized)
547
+ */
548
+ export const insertNodeToCgdb = async (label, properties, dbPath) => {
549
+ // Use provided dbPath or fall back to module-level db
550
+ const targetDbPath = dbPath || (db ? undefined : null);
551
+ if (!targetDbPath && !db) {
552
+ throw new Error('LadybugDB not initialized. Provide dbPath or call initCgdb first.');
553
+ }
554
+ try {
555
+ const escapeValue = (v) => {
556
+ if (v === null || v === undefined)
557
+ return 'NULL';
558
+ if (typeof v === 'number')
559
+ return String(v);
560
+ // Escape backslashes first (for Windows paths), then single quotes
561
+ return `'${String(v).replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/\n/g, '\\n').replace(/\r/g, '\\r')}'`;
562
+ };
563
+ // Build INSERT query based on node type
564
+ const t = escapeTableName(label);
565
+ let query;
566
+ if (label === 'File') {
567
+ query = `CREATE (n:File {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, content: ${escapeValue(properties.content || '')}})`;
568
+ }
569
+ else if (label === 'Folder') {
570
+ query = `CREATE (n:Folder {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}})`;
571
+ }
572
+ else if (label === 'Section') {
573
+ const descPart = properties.description
574
+ ? `, description: ${escapeValue(properties.description)}`
575
+ : '';
576
+ query = `CREATE (n:Section {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, level: ${properties.level || 1}, content: ${escapeValue(properties.content || '')}${descPart}})`;
577
+ }
578
+ else if (TABLES_WITH_EXPORTED.has(label)) {
579
+ const descPart = properties.description
580
+ ? `, description: ${escapeValue(properties.description)}`
581
+ : '';
582
+ query = `CREATE (n:${t} {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, isExported: ${!!properties.isExported}, content: ${escapeValue(properties.content || '')}${descPart}})`;
583
+ }
584
+ else {
585
+ // Multi-language tables (Struct, Impl, Trait, Macro, etc.) — no isExported
586
+ const descPart = properties.description
587
+ ? `, description: ${escapeValue(properties.description)}`
588
+ : '';
589
+ query = `CREATE (n:${t} {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, content: ${escapeValue(properties.content || '')}${descPart}})`;
590
+ }
591
+ // Use per-query connection if dbPath provided (avoids lock conflicts)
592
+ if (targetDbPath) {
593
+ const tempDb = new cgdb.Database(targetDbPath);
594
+ const tempConn = trackConnection(new cgdb.Connection(tempDb));
595
+ try {
596
+ const qr = await tempConn.query(query);
597
+ await closeQueryResult(qr);
598
+ return true;
599
+ }
600
+ finally {
601
+ try {
602
+ await tempConn.close();
603
+ }
604
+ catch { }
605
+ try {
606
+ await tempDb.close();
607
+ }
608
+ catch { }
609
+ }
610
+ }
611
+ else if (conn) {
612
+ // Use existing persistent connection (when called from analyze)
613
+ await conn.query(query);
614
+ return true;
615
+ }
616
+ return false;
617
+ }
618
+ catch (e) {
619
+ // Node may already exist or other error
620
+ console.error(`Failed to insert ${label} node:`, e.message);
621
+ return false;
622
+ }
623
+ };
624
+ /**
625
+ * Batch insert multiple nodes to LadybugDB using a single connection
626
+ * @param nodes - Array of {label, properties} to insert
627
+ * @param dbPath - Path to LadybugDB database
628
+ * @returns Object with success count and error count
629
+ */
630
+ export const batchInsertNodesToCgdb = async (nodes, dbPath) => {
631
+ if (nodes.length === 0)
632
+ return { inserted: 0, failed: 0 };
633
+ const escapeValue = (v) => {
634
+ if (v === null || v === undefined)
635
+ return 'NULL';
636
+ if (typeof v === 'number')
637
+ return String(v);
638
+ // Escape backslashes first (for Windows paths), then single quotes, then newlines
639
+ return `'${String(v).replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/\n/g, '\\n').replace(/\r/g, '\\r')}'`;
640
+ };
641
+ // Open a single connection for all inserts
642
+ const tempDb = new cgdb.Database(dbPath);
643
+ const tempConn = trackConnection(new cgdb.Connection(tempDb));
644
+ let inserted = 0;
645
+ let failed = 0;
646
+ try {
647
+ for (const { label, properties } of nodes) {
648
+ try {
649
+ let query;
650
+ // Use MERGE instead of CREATE for upsert behavior (handles duplicates gracefully)
651
+ const t = escapeTableName(label);
652
+ if (label === 'File') {
653
+ query = `MERGE (n:File {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.content = ${escapeValue(properties.content || '')}`;
654
+ }
655
+ else if (label === 'Folder') {
656
+ query = `MERGE (n:Folder {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}`;
657
+ }
658
+ else if (label === 'Section') {
659
+ const descPart = properties.description
660
+ ? `, n.description = ${escapeValue(properties.description)}`
661
+ : '';
662
+ query = `MERGE (n:Section {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.level = ${properties.level || 1}, n.content = ${escapeValue(properties.content || '')}${descPart}`;
663
+ }
664
+ else if (TABLES_WITH_EXPORTED.has(label)) {
665
+ const descPart = properties.description
666
+ ? `, n.description = ${escapeValue(properties.description)}`
667
+ : '';
668
+ query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.isExported = ${!!properties.isExported}, n.content = ${escapeValue(properties.content || '')}${descPart}`;
669
+ }
670
+ else {
671
+ const descPart = properties.description
672
+ ? `, n.description = ${escapeValue(properties.description)}`
673
+ : '';
674
+ query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.content = ${escapeValue(properties.content || '')}${descPart}`;
675
+ }
676
+ await tempConn.query(query);
677
+ inserted++;
678
+ }
679
+ catch (_e) {
680
+ // Don't console.error here - it corrupts MCP JSON-RPC on stderr
681
+ failed++;
682
+ }
683
+ }
684
+ }
685
+ finally {
686
+ try {
687
+ await tempConn.close();
688
+ }
689
+ catch { }
690
+ try {
691
+ await tempDb.close();
692
+ }
693
+ catch { }
694
+ }
695
+ return { inserted, failed };
696
+ };
697
+ /**
698
+ * Track every live QueryResult / PreparedStatement returned by the
699
+ * connection, and force-close them in `closeCgdb()` BEFORE `conn.close()`
700
+ * runs.
701
+ *
702
+ * Why this exists: per upstream Kuzu issue #5316 the close-order is
703
+ * QueryResult & PreparedStatement → Connection → Database
704
+ * Leaking any of these to GC means V8 finalizes the native handles AFTER
705
+ * `db.close()` has run, which corrupts memory and segfaults the process
706
+ * at exit (Windows: 0xC0000005, Linux: SIGSEGV). Kuzu was archived
707
+ * 2025-10-10 and LadybugDB inherits the same C++ core, so the
708
+ * close-discipline is on us — neither upstream is going to ship a fix.
709
+ *
710
+ * Per-call-site close (the `try/finally { closeQueryResult }` blocks
711
+ * below) handles the happy path. The tracking Set is a backstop for any
712
+ * call site that forgets, AND for the fire-and-forget DDL / COPY /
713
+ * extension-load queries that drop the QueryResult on the floor.
714
+ */
715
+ const liveCgdbHandles = new Set();
716
+ const closeQueryResult = async (qr) => {
717
+ if (!qr)
718
+ return;
719
+ const candidates = Array.isArray(qr) ? qr : [qr];
720
+ for (const r of candidates) {
721
+ liveCgdbHandles.delete(r);
722
+ try {
723
+ const close = r?.close;
724
+ if (typeof close === 'function')
725
+ await Promise.resolve(close.call(r));
726
+ }
727
+ catch {
728
+ /* best-effort */
729
+ }
730
+ }
731
+ };
732
+ const closeStmt = async (stmt) => {
733
+ if (!stmt)
734
+ return;
735
+ liveCgdbHandles.delete(stmt);
736
+ try {
737
+ const close = stmt?.close;
738
+ if (typeof close === 'function')
739
+ await Promise.resolve(close.call(stmt));
740
+ }
741
+ catch {
742
+ /* best-effort */
743
+ }
744
+ };
745
+ /**
746
+ * Wrap `conn.query` and `conn.execute` to register every returned
747
+ * QueryResult in the live-handle tracker. PreparedStatements from
748
+ * `conn.prepare` are tracked the same way. Idempotent — safe to call
749
+ * twice on the same Connection.
750
+ */
751
+ const trackConnection = (rawConn) => {
752
+ const c = rawConn;
753
+ if (c.__cgdbTracked)
754
+ return rawConn;
755
+ c.__cgdbTracked = true;
756
+ const wrap = (orig) => async function (...args) {
757
+ const qr = await orig.apply(this, args);
758
+ if (qr && typeof qr === 'object') {
759
+ if (Array.isArray(qr))
760
+ qr.forEach((r) => r && liveCgdbHandles.add(r));
761
+ else
762
+ liveCgdbHandles.add(qr);
763
+ }
764
+ return qr;
765
+ };
766
+ if (typeof c.query === 'function')
767
+ c.query = wrap(c.query);
768
+ if (typeof c.execute === 'function')
769
+ c.execute = wrap(c.execute);
770
+ if (typeof c.prepare === 'function')
771
+ c.prepare = wrap(c.prepare);
772
+ return rawConn;
773
+ };
774
+ export const executeQuery = async (cypher) => {
775
+ if (!conn) {
776
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
777
+ }
778
+ const queryResult = await conn.query(cypher);
779
+ // LadybugDB uses getAll() instead of hasNext()/getNext()
780
+ // Query returns QueryResult for single queries, QueryResult[] for multi-statement
781
+ const result = Array.isArray(queryResult) ? queryResult[0] : queryResult;
782
+ try {
783
+ return await result.getAll();
784
+ }
785
+ finally {
786
+ await closeQueryResult(queryResult);
787
+ }
788
+ };
789
+ export const streamQuery = async (cypher, onRow) => {
790
+ if (!conn) {
791
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
792
+ }
793
+ const queryResult = await conn.query(cypher);
794
+ const result = Array.isArray(queryResult) ? queryResult[0] : queryResult;
795
+ let rowCount = 0;
796
+ try {
797
+ while (await result.hasNext()) {
798
+ const row = await result.getNext();
799
+ await onRow(row);
800
+ rowCount++;
801
+ }
802
+ return rowCount;
803
+ }
804
+ finally {
805
+ try {
806
+ await result.close();
807
+ }
808
+ catch {
809
+ // Best-effort cleanup only.
810
+ }
811
+ }
812
+ };
813
+ /**
814
+ * Execute a single parameterized query (prepare/execute pattern).
815
+ * Prevents Cypher injection by binding values as parameters.
816
+ */
817
+ export const executePrepared = async (cypher, params) => {
818
+ if (!conn) {
819
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
820
+ }
821
+ const stmt = await conn.prepare(cypher);
822
+ if (!stmt.isSuccess()) {
823
+ const errMsg = await stmt.getErrorMessage();
824
+ await closeStmt(stmt);
825
+ throw new Error(`Prepare failed: ${errMsg}`);
826
+ }
827
+ const queryResult = await conn.execute(stmt, params);
828
+ const result = Array.isArray(queryResult) ? queryResult[0] : queryResult;
829
+ try {
830
+ return await result.getAll();
831
+ }
832
+ finally {
833
+ await closeQueryResult(queryResult);
834
+ await closeStmt(stmt);
835
+ }
836
+ };
837
+ export const executeWithReusedStatement = async (cypher, paramsList) => {
838
+ if (!conn) {
839
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
840
+ }
841
+ if (paramsList.length === 0)
842
+ return;
843
+ const SUB_BATCH_SIZE = 4;
844
+ for (let i = 0; i < paramsList.length; i += SUB_BATCH_SIZE) {
845
+ const subBatch = paramsList.slice(i, i + SUB_BATCH_SIZE);
846
+ const stmt = await conn.prepare(cypher);
847
+ if (!stmt.isSuccess()) {
848
+ const errMsg = await stmt.getErrorMessage();
849
+ await closeStmt(stmt);
850
+ throw new Error(`Prepare failed: ${errMsg}`);
851
+ }
852
+ try {
853
+ for (const params of subBatch) {
854
+ // `conn.execute` returns a QueryResult — close it as soon as
855
+ // we don't need its rows. Old code dropped the reference to GC,
856
+ // which is what segfaulted the process at exit on Windows
857
+ // (Kuzu issue #5316: QueryResult & PreparedStatement → Connection
858
+ // → Database close-order is required for native-binding safety).
859
+ const qr = await conn.execute(stmt, params);
860
+ await closeQueryResult(qr);
861
+ }
862
+ }
863
+ catch (e) {
864
+ // Log the error and continue with next batch
865
+ console.warn('Batch execution error:', e);
866
+ }
867
+ finally {
868
+ await closeStmt(stmt);
869
+ }
870
+ }
871
+ };
872
+ export const getCgdbStats = async () => {
873
+ if (!conn)
874
+ return { nodes: 0, edges: 0 };
875
+ let totalNodes = 0;
876
+ for (const tableName of NODE_TABLES) {
877
+ let queryResult;
878
+ try {
879
+ queryResult = await conn.query(`MATCH (n:${escapeTableName(tableName)}) RETURN count(n) AS cnt`);
880
+ const nodeResult = Array.isArray(queryResult) ? queryResult[0] : queryResult;
881
+ const nodeRows = await nodeResult.getAll();
882
+ if (nodeRows.length > 0) {
883
+ totalNodes += Number(nodeRows[0]?.cnt ?? nodeRows[0]?.[0] ?? 0);
884
+ }
885
+ }
886
+ catch {
887
+ // ignore
888
+ }
889
+ finally {
890
+ await closeQueryResult(queryResult);
891
+ }
892
+ }
893
+ let totalEdges = 0;
894
+ let edgeQueryResult;
895
+ try {
896
+ edgeQueryResult = await conn.query(`MATCH ()-[r:${REL_TABLE_NAME}]->() RETURN count(r) AS cnt`);
897
+ const edgeResult = Array.isArray(edgeQueryResult) ? edgeQueryResult[0] : edgeQueryResult;
898
+ const edgeRows = await edgeResult.getAll();
899
+ if (edgeRows.length > 0) {
900
+ totalEdges = Number(edgeRows[0]?.cnt ?? edgeRows[0]?.[0] ?? 0);
901
+ }
902
+ }
903
+ catch {
904
+ // ignore
905
+ }
906
+ finally {
907
+ await closeQueryResult(edgeQueryResult);
908
+ }
909
+ return { nodes: totalNodes, edges: totalEdges };
910
+ };
911
+ /**
912
+ * Load cached embeddings from LadybugDB before a rebuild.
913
+ * Returns all embedding vectors so they can be re-inserted after the graph is reloaded,
914
+ * avoiding expensive re-embedding of unchanged nodes.
915
+ *
916
+ * Detects old schema (no chunkIndex column) and returns empty cache to trigger rebuild.
917
+ */
918
+ export const loadCachedEmbeddings = async () => {
919
+ if (!conn) {
920
+ return { embeddingNodeIds: new Set(), embeddings: [] };
921
+ }
922
+ const embeddingNodeIds = new Set();
923
+ const embeddings = [];
924
+ try {
925
+ // Schema migration detection: query with new columns to verify schema version.
926
+ // Old schema only had (nodeId, embedding); new schema adds (id, chunkIndex, startLine, endLine, contentHash).
927
+ // If the query fails (column missing), we return empty cache to force a full rebuild.
928
+ try {
929
+ const check = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex LIMIT 1`);
930
+ const checkResult = Array.isArray(check) ? check[0] : check;
931
+ await checkResult.getAll();
932
+ }
933
+ catch {
934
+ return { embeddingNodeIds: new Set(), embeddings: [] };
935
+ }
936
+ // Try to read contentHash alongside chunk columns
937
+ let rows;
938
+ let hasContentHash = true;
939
+ try {
940
+ rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding, e.contentHash AS contentHash`);
941
+ }
942
+ catch (err) {
943
+ // Fallback for legacy DBs without contentHash column
944
+ const msg = err?.message ?? '';
945
+ if (isMissingColumnOrTableError(msg)) {
946
+ hasContentHash = false;
947
+ rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding`);
948
+ }
949
+ else {
950
+ throw err;
951
+ }
952
+ }
953
+ const result = Array.isArray(rows) ? rows[0] : rows;
954
+ for (const row of await result.getAll()) {
955
+ const nodeId = String(row.nodeId ?? row[0] ?? '');
956
+ if (!nodeId)
957
+ continue;
958
+ embeddingNodeIds.add(nodeId);
959
+ const embedding = row.embedding ?? row[4];
960
+ if (embedding) {
961
+ embeddings.push({
962
+ nodeId,
963
+ chunkIndex: Number(row.chunkIndex ?? row[1] ?? 0),
964
+ startLine: Number(row.startLine ?? row[2] ?? 0),
965
+ endLine: Number(row.endLine ?? row[3] ?? 0),
966
+ embedding: Array.isArray(embedding)
967
+ ? embedding.map(Number)
968
+ : Array.from(embedding).map(Number),
969
+ contentHash: hasContentHash ? (row.contentHash ?? row[5] ?? undefined) : undefined,
970
+ });
971
+ }
972
+ }
973
+ }
974
+ catch {
975
+ /* embedding table may not exist */
976
+ }
977
+ return { embeddingNodeIds, embeddings };
978
+ };
979
+ /**
980
+ * Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
981
+ * Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
982
+ * Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
983
+ * Returns undefined if the CodeEmbedding table does not exist.
984
+ *
985
+ * @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
986
+ */
987
+ export const fetchExistingEmbeddingHashes = async (execQuery) => {
988
+ try {
989
+ const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.contentHash AS contentHash`);
990
+ if (!rows || rows.length === 0)
991
+ return undefined;
992
+ const map = new Map();
993
+ for (const r of rows) {
994
+ const nodeId = r.nodeId ?? r[0];
995
+ const chunkIndex = r.chunkIndex ?? r[1];
996
+ const startLine = r.startLine ?? r[2];
997
+ const endLine = r.endLine ?? r[3];
998
+ const hash = r.contentHash ?? r[4] ?? STALE_HASH_SENTINEL;
999
+ if (nodeId) {
1000
+ const hasChunkMetadata = chunkIndex !== undefined &&
1001
+ chunkIndex !== null &&
1002
+ startLine !== undefined &&
1003
+ startLine !== null &&
1004
+ endLine !== undefined &&
1005
+ endLine !== null;
1006
+ // Empty/null contentHash or missing chunk metadata means legacy row — treat as stale.
1007
+ map.set(nodeId, hasChunkMetadata && hash ? hash : STALE_HASH_SENTINEL);
1008
+ }
1009
+ }
1010
+ return map;
1011
+ }
1012
+ catch (err) {
1013
+ const msg = err?.message ?? '';
1014
+ if (isMissingColumnOrTableError(msg)) {
1015
+ // Legacy rows missing chunk-aware columns — treat every row as stale.
1016
+ try {
1017
+ const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId`);
1018
+ if (!rows || rows.length === 0)
1019
+ return undefined;
1020
+ const map = new Map();
1021
+ for (const r of rows) {
1022
+ const nodeId = r.nodeId ?? r[0];
1023
+ if (nodeId)
1024
+ map.set(nodeId, STALE_HASH_SENTINEL);
1025
+ }
1026
+ console.log(`[embed] ${map.size} nodes in legacy DB (missing chunk-aware columns) — all treated as stale`);
1027
+ return map;
1028
+ }
1029
+ catch (fallbackErr) {
1030
+ const fallbackMsg = fallbackErr?.message ?? '';
1031
+ if (isMissingColumnOrTableError(fallbackMsg)) {
1032
+ console.log(`[embed] CodeEmbedding table not yet present — full embedding run (${fallbackMsg})`);
1033
+ return undefined;
1034
+ }
1035
+ throw fallbackErr;
1036
+ }
1037
+ }
1038
+ throw err;
1039
+ }
1040
+ };
1041
+ export const closeCgdb = async () => {
1042
+ // Drain every live QueryResult / PreparedStatement BEFORE closing the
1043
+ // Connection or the Database. Required by upstream Kuzu (#5316) — the
1044
+ // close-order is `QueryResult/PreparedStatement → Connection → Database`,
1045
+ // and any outstanding handle held when `db.close()` runs corrupts memory
1046
+ // and crashes the process at exit (Windows: 0xC0000005, POSIX: SIGSEGV).
1047
+ // Snapshot the Set so concurrent close() calls don't deadlock the iteration.
1048
+ const handles = Array.from(liveCgdbHandles);
1049
+ liveCgdbHandles.clear();
1050
+ for (const h of handles) {
1051
+ try {
1052
+ const close = h?.close;
1053
+ if (typeof close === 'function')
1054
+ await Promise.resolve(close.call(h));
1055
+ }
1056
+ catch {
1057
+ /* best-effort */
1058
+ }
1059
+ }
1060
+ if (conn) {
1061
+ try {
1062
+ await conn.close();
1063
+ }
1064
+ catch { }
1065
+ conn = null;
1066
+ }
1067
+ if (db) {
1068
+ try {
1069
+ await db.close();
1070
+ }
1071
+ catch { }
1072
+ db = null;
1073
+ }
1074
+ currentDbPath = null;
1075
+ ftsLoaded = false;
1076
+ vectorExtensionLoaded = false;
1077
+ ensuredFTSIndexes.clear();
1078
+ };
1079
+ export const isCgdbReady = () => conn !== null && db !== null;
1080
+ /**
1081
+ * Delete all nodes (and their relationships) for a specific file from LadybugDB
1082
+ * @param filePath - The file path to delete nodes for
1083
+ * @param dbPath - Optional path to LadybugDB for per-query connection
1084
+ * @returns Object with counts of deleted nodes
1085
+ */
1086
+ export const deleteNodesForFile = async (filePath, dbPath) => {
1087
+ const usePerQuery = !!dbPath;
1088
+ // Set up connection (either use existing or create per-query)
1089
+ let tempDb = null;
1090
+ let tempConn = null;
1091
+ let targetConn = conn;
1092
+ if (usePerQuery) {
1093
+ tempDb = new cgdb.Database(dbPath);
1094
+ tempConn = trackConnection(new cgdb.Connection(tempDb));
1095
+ targetConn = tempConn;
1096
+ }
1097
+ else if (!conn) {
1098
+ throw new Error('LadybugDB not initialized. Provide dbPath or call initCgdb first.');
1099
+ }
1100
+ try {
1101
+ let deletedNodes = 0;
1102
+ const escapedPath = filePath.replace(/'/g, "''");
1103
+ // Delete nodes from each table that has filePath
1104
+ // DETACH DELETE removes the node and all its relationships
1105
+ for (const tableName of NODE_TABLES) {
1106
+ // Skip tables that don't have filePath (Community, Process)
1107
+ if (tableName === 'Community' || tableName === 'Process')
1108
+ continue;
1109
+ try {
1110
+ // First count how many we'll delete
1111
+ const tn = escapeTableName(tableName);
1112
+ const countResult = await targetConn.query(`MATCH (n:${tn}) WHERE n.filePath = '${escapedPath}' RETURN count(n) AS cnt`);
1113
+ const result = Array.isArray(countResult) ? countResult[0] : countResult;
1114
+ const rows = await result.getAll();
1115
+ const count = Number(rows[0]?.cnt ?? rows[0]?.[0] ?? 0);
1116
+ if (count > 0) {
1117
+ // Delete nodes (and implicitly their relationships via DETACH)
1118
+ await targetConn.query(`MATCH (n:${tn}) WHERE n.filePath = '${escapedPath}' DETACH DELETE n`);
1119
+ deletedNodes += count;
1120
+ }
1121
+ }
1122
+ catch (_e) {
1123
+ // Some tables may not support this query, skip
1124
+ }
1125
+ }
1126
+ // Also delete any embeddings for nodes in this file
1127
+ try {
1128
+ await targetConn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) WHERE e.nodeId STARTS WITH '${escapedPath}' DELETE e`);
1129
+ }
1130
+ catch {
1131
+ // Embedding table may not exist or nodeId format may differ
1132
+ }
1133
+ return { deletedNodes };
1134
+ }
1135
+ finally {
1136
+ // Close per-query connection if used
1137
+ if (tempConn) {
1138
+ try {
1139
+ await tempConn.close();
1140
+ }
1141
+ catch { }
1142
+ }
1143
+ if (tempDb) {
1144
+ try {
1145
+ await tempDb.close();
1146
+ }
1147
+ catch { }
1148
+ }
1149
+ }
1150
+ };
1151
+ export const getEmbeddingTableName = () => EMBEDDING_TABLE_NAME;
1152
+ // ============================================================================
1153
+ // Full-Text Search (FTS) Functions
1154
+ // ============================================================================
1155
+ /**
1156
+ * Load the FTS extension (required before using FTS functions).
1157
+ * Safe to call multiple times — tracks loaded state via module-level ftsLoaded.
1158
+ */
1159
+ export const loadFTSExtension = async () => {
1160
+ if (ftsLoaded)
1161
+ return;
1162
+ if (!conn) {
1163
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
1164
+ }
1165
+ try {
1166
+ // Try loading locally first (no network required)
1167
+ await conn.query('LOAD EXTENSION fts');
1168
+ ftsLoaded = true;
1169
+ }
1170
+ catch {
1171
+ // Fall back to install + load (requires network)
1172
+ try {
1173
+ await conn.query('INSTALL fts');
1174
+ await conn.query('LOAD EXTENSION fts');
1175
+ ftsLoaded = true;
1176
+ }
1177
+ catch (err) {
1178
+ const msg = err?.message || '';
1179
+ if (msg.includes('already loaded') ||
1180
+ msg.includes('already installed') ||
1181
+ msg.includes('already exists')) {
1182
+ ftsLoaded = true;
1183
+ }
1184
+ else {
1185
+ console.error('CodraGraph: FTS extension load failed:', msg);
1186
+ }
1187
+ }
1188
+ }
1189
+ };
1190
+ /**
1191
+ * Load the VECTOR extension (required before using QUERY_VECTOR_INDEX).
1192
+ * Safe to call multiple times -- tracks loaded state via module-level vectorExtensionLoaded.
1193
+ */
1194
+ export const loadVectorExtension = async () => {
1195
+ if (vectorExtensionLoaded)
1196
+ return;
1197
+ if (!conn) {
1198
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
1199
+ }
1200
+ try {
1201
+ await conn.query('INSTALL VECTOR');
1202
+ await conn.query('LOAD EXTENSION VECTOR');
1203
+ vectorExtensionLoaded = true;
1204
+ }
1205
+ catch (err) {
1206
+ const msg = err?.message || '';
1207
+ if (msg.includes('already loaded') ||
1208
+ msg.includes('already installed') ||
1209
+ msg.includes('already exists')) {
1210
+ vectorExtensionLoaded = true;
1211
+ }
1212
+ else {
1213
+ console.error('CodraGraph: VECTOR extension load failed:', msg);
1214
+ }
1215
+ }
1216
+ };
1217
+ /**
1218
+ * Create a full-text search index on a table
1219
+ * @param tableName - The node table name (e.g., 'File', 'CodeSymbol')
1220
+ * @param indexName - Name for the FTS index
1221
+ * @param properties - List of properties to index (e.g., ['name', 'code'])
1222
+ * @param stemmer - Stemming algorithm (default: 'porter')
1223
+ */
1224
+ export const createFTSIndex = async (tableName, indexName, properties, stemmer = 'porter') => {
1225
+ if (!conn) {
1226
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
1227
+ }
1228
+ await loadFTSExtension();
1229
+ const propList = properties.map((p) => `'${p}'`).join(', ');
1230
+ const query = `CALL CREATE_FTS_INDEX('${tableName}', '${indexName}', [${propList}], stemmer := '${stemmer}')`;
1231
+ try {
1232
+ await conn.query(query);
1233
+ }
1234
+ catch (e) {
1235
+ if (!e.message?.includes('already exists')) {
1236
+ throw e;
1237
+ }
1238
+ }
1239
+ };
1240
+ /**
1241
+ * Lazy-create an FTS index, caching the fact in-process.
1242
+ *
1243
+ * Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed
1244
+ * LadybugDB cost up-front (it dominates analyze on small repos). Instead,
1245
+ * the cost is moved to the first `query`/`context` call in a session,
1246
+ * where it's amortised across many lookups.
1247
+ *
1248
+ * Safe to call repeatedly — the in-process Set guarantees only the first
1249
+ * call hits LadybugDB. `closeCgdb` clears the cache so re-init starts fresh.
1250
+ */
1251
+ export const ensureFTSIndex = async (tableName, indexName, properties, stemmer = 'porter') => {
1252
+ const key = `${tableName}:${indexName}`;
1253
+ if (ensuredFTSIndexes.has(key))
1254
+ return;
1255
+ await createFTSIndex(tableName, indexName, properties, stemmer);
1256
+ ensuredFTSIndexes.add(key);
1257
+ };
1258
+ /**
1259
+ * Query a full-text search index
1260
+ * @param tableName - The node table name
1261
+ * @param indexName - FTS index name
1262
+ * @param query - Search query string
1263
+ * @param limit - Maximum results
1264
+ * @param conjunctive - If true, all terms must match (AND); if false, any term matches (OR)
1265
+ * @returns Array of { node properties, score }
1266
+ */
1267
+ export const queryFTS = async (tableName, indexName, query, limit = 20, conjunctive = false) => {
1268
+ if (!conn) {
1269
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
1270
+ }
1271
+ // Escape backslashes and single quotes to prevent Cypher injection
1272
+ const escapedQuery = query.replace(/\\/g, '\\\\').replace(/'/g, "''");
1273
+ const cypher = `
1274
+ CALL QUERY_FTS_INDEX('${tableName}', '${indexName}', '${escapedQuery}', conjunctive := ${conjunctive})
1275
+ RETURN node, score
1276
+ ORDER BY score DESC
1277
+ LIMIT ${limit}
1278
+ `;
1279
+ let queryResult;
1280
+ try {
1281
+ queryResult = await conn.query(cypher);
1282
+ const result = Array.isArray(queryResult) ? queryResult[0] : queryResult;
1283
+ const rows = await result.getAll();
1284
+ return rows.map((row) => {
1285
+ const node = row.node || row[0] || {};
1286
+ const score = row.score ?? row[1] ?? 0;
1287
+ return {
1288
+ nodeId: node.nodeId || node.id || '',
1289
+ name: node.name || '',
1290
+ filePath: node.filePath || '',
1291
+ score: typeof score === 'number' ? score : parseFloat(score) || 0,
1292
+ ...node,
1293
+ };
1294
+ });
1295
+ }
1296
+ catch (e) {
1297
+ // Return empty if index doesn't exist yet
1298
+ if (e.message?.includes('does not exist')) {
1299
+ return [];
1300
+ }
1301
+ throw e;
1302
+ }
1303
+ finally {
1304
+ await closeQueryResult(queryResult);
1305
+ }
1306
+ };
1307
+ /**
1308
+ * Drop an FTS index
1309
+ */
1310
+ export const dropFTSIndex = async (tableName, indexName) => {
1311
+ if (!conn) {
1312
+ throw new Error('LadybugDB not initialized. Call initCgdb first.');
1313
+ }
1314
+ try {
1315
+ await conn.query(`CALL DROP_FTS_INDEX('${tableName}', '${indexName}')`);
1316
+ }
1317
+ catch {
1318
+ // Index may not exist
1319
+ }
1320
+ };