@codragraph/cli 2.1.1 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +36 -9
  2. package/dist/cli/ai-context.js +298 -1
  3. package/dist/cli/analyze.js +19 -2
  4. package/dist/cli/index.js +33 -12
  5. package/dist/cli/serve.d.ts +1 -0
  6. package/dist/cli/serve.js +3 -1
  7. package/dist/cli/setup.js +36 -19
  8. package/dist/cli/status.d.ts +13 -0
  9. package/dist/cli/status.js +99 -0
  10. package/dist/cli/tool.js +73 -33
  11. package/dist/config/ignore-service.js +3 -0
  12. package/dist/core/cgdb/pool-adapter.js +130 -20
  13. package/dist/core/graphstore/cgdb-row-source.js +3 -2
  14. package/dist/core/group/bridge-db.js +42 -10
  15. package/dist/core/ingestion/parsing-processor.js +7 -1
  16. package/dist/core/ingestion/pipeline-phases/parse-impl.js +4 -0
  17. package/dist/core/ingestion/workers/parse-worker.js +1 -1
  18. package/dist/core/ingestion/workers/worker-pool.d.ts +14 -1
  19. package/dist/core/ingestion/workers/worker-pool.js +33 -17
  20. package/dist/core/run-analyze.d.ts +20 -0
  21. package/dist/core/run-analyze.js +225 -1
  22. package/dist/core/search/bm25-index.d.ts +0 -11
  23. package/dist/core/search/bm25-index.js +7 -84
  24. package/dist/core/search/hybrid-search.js +11 -3
  25. package/dist/mcp/local/local-backend.d.ts +2 -0
  26. package/dist/mcp/local/local-backend.js +235 -18
  27. package/dist/mcp/resources.js +2 -2
  28. package/dist/server/api.d.ts +14 -2
  29. package/dist/server/api.js +90 -7
  30. package/dist/server/mcp-http.d.ts +22 -0
  31. package/dist/server/mcp-http.js +21 -2
  32. package/dist/server/web-dashboard.d.ts +28 -0
  33. package/dist/server/web-dashboard.js +61 -0
  34. package/dist/web/assets/agent-D5lb0zXz.js +1089 -0
  35. package/dist/web/assets/architectureDiagram-EMZXCZ2Q-CZtc99v_.js +36 -0
  36. package/dist/web/assets/blockDiagram-IGV67L2C-BtoUp-6Y.js +132 -0
  37. package/dist/web/assets/c4Diagram-DFAF54RM-C4Hl3J2U.js +10 -0
  38. package/dist/web/assets/chunk-3GS5O3IE-DkUjU0WD.js +231 -0
  39. package/dist/web/assets/chunk-3YCYZ6SJ-CQkVgT_z.js +1 -0
  40. package/dist/web/assets/chunk-7RZVMHOQ-BitYcNVR.js +338 -0
  41. package/dist/web/assets/chunk-AEOMTBSW-BgTIXPsY.js +1 -0
  42. package/dist/web/assets/chunk-H3VCZNTA-Cx5XV_aC.js +13 -0
  43. package/dist/web/assets/chunk-HN6EAY2L-BBnyTNdB.js +1 -0
  44. package/dist/web/assets/chunk-KSICW3F5-BYzvDLNI.js +15 -0
  45. package/dist/web/assets/chunk-O5ABG6QK-dHwHzA6n.js +1 -0
  46. package/dist/web/assets/chunk-PK6DOVAG-CvsEnugt.js +206 -0
  47. package/dist/web/assets/chunk-RWUO3TPN-BgRTY0_k.js +1 -0
  48. package/dist/web/assets/chunk-TBF5ZNIQ-DL5stGM1.js +1 -0
  49. package/dist/web/assets/chunk-TU3PZOEN-RLyvLcv-.js +1 -0
  50. package/dist/web/assets/classDiagram-PPOCWD7C-DTr8QIOf.js +1 -0
  51. package/dist/web/assets/classDiagram-v2-23LJLIIU-DTr8QIOf.js +1 -0
  52. package/dist/web/assets/context-builder-22jU3V56.js +16 -0
  53. package/dist/web/assets/cose-bilkent-PNC4W37J-DVhePRYg.js +1 -0
  54. package/dist/web/assets/dagre-E77IOHMT-Dzx0A6ZU.js +4 -0
  55. package/dist/web/assets/diagram-H7BISOXX-CC9pRew1.js +43 -0
  56. package/dist/web/assets/diagram-JC5VWROH-Bau_i9tf.js +24 -0
  57. package/dist/web/assets/diagram-LXUTUG65-D9_FM2Gt.js +10 -0
  58. package/dist/web/assets/diagram-WEHSV5V5-BMlayouL.js +24 -0
  59. package/dist/web/assets/erDiagram-GCSMX5X6-C3dhDFA8.js +85 -0
  60. package/dist/web/assets/flowDiagram-OTCZ4VVT-CWSFWmhr.js +162 -0
  61. package/dist/web/assets/ganttDiagram-MUNLMDZQ-D3a67Yol.js +292 -0
  62. package/dist/web/assets/gitGraphDiagram-3HKGZ4G3-7jmry-vM.js +106 -0
  63. package/dist/web/assets/index-BgeqpYgd.js +1415 -0
  64. package/dist/web/assets/index-CT0GtFLZ.css +1 -0
  65. package/dist/web/assets/infoDiagram-MN7RKWGX-G7lhP0Ib.js +2 -0
  66. package/dist/web/assets/ishikawaDiagram-YMYX4NHK-DUoJvNP2.js +70 -0
  67. package/dist/web/assets/journeyDiagram-SO5T7YLQ-RMFPNNqz.js +139 -0
  68. package/dist/web/assets/kanban-definition-LJHFXRCJ-BzpDs1K9.js +89 -0
  69. package/dist/web/assets/katex-GD7MH7QM-DBQvrix-.js +261 -0
  70. package/dist/web/assets/mindmap-definition-2EUWGEK5-Bk0O4roa.js +96 -0
  71. package/dist/web/assets/pieDiagram-3IATQBI2-DKU7kpgS.js +30 -0
  72. package/dist/web/assets/quadrantDiagram-E256RVCF-BY0TGWCS.js +7 -0
  73. package/dist/web/assets/requirementDiagram-M5DCFWZL-DLHOVTSv.js +84 -0
  74. package/dist/web/assets/sankeyDiagram-L3NBLAOT-DVMj5rX2.js +10 -0
  75. package/dist/web/assets/sequenceDiagram-ZOUHS735-CJC73bV-.js +157 -0
  76. package/dist/web/assets/stateDiagram-MLPALWAM-BCFyESls.js +1 -0
  77. package/dist/web/assets/stateDiagram-v2-B5LQ5ZB2-DahzzIca.js +1 -0
  78. package/dist/web/assets/timeline-definition-5SPVSISX-TRSDRgPw.js +120 -0
  79. package/dist/web/assets/vennDiagram-IE5QUKF5-DNy7HRBM.js +34 -0
  80. package/dist/web/assets/wardley-RL74JXVD-BCRCBASE-B-eZEzf9.js +161 -0
  81. package/dist/web/assets/wardleyDiagram-XU3VSMPF-BP-r1xzR.js +20 -0
  82. package/dist/web/assets/xychartDiagram-ZHJ5623Y-Dr9r7a35.js +7 -0
  83. package/dist/web/codragraph-logo-512.png +0 -0
  84. package/dist/web/codragraph-logo.png +0 -0
  85. package/dist/web/favicon.png +0 -0
  86. package/dist/web/index.html +36 -0
  87. package/hooks/claude/codragraph-hook.cjs +18 -110
  88. package/hooks/claude/pre-tool-use.sh +6 -1
  89. package/package.json +3 -1
  90. package/scripts/build.js +62 -4
  91. package/scripts/patch-tree-sitter-swift.cjs +0 -1
  92. package/skills/codragraph-cli.md +1 -1
  93. package/vendor/leiden/index.cjs +272 -285
  94. package/vendor/leiden/utils.cjs +264 -274
  95. package/dist/_shared/lbug/schema-constants.d.ts +0 -16
  96. package/dist/_shared/lbug/schema-constants.d.ts.map +0 -1
  97. package/dist/_shared/lbug/schema-constants.js +0 -67
  98. package/dist/_shared/lbug/schema-constants.js.map +0 -1
  99. package/dist/core/graphstore/lbug-row-source.d.ts +0 -19
  100. package/dist/core/graphstore/lbug-row-source.js +0 -141
  101. package/dist/core/lbug/content-read.d.ts +0 -46
  102. package/dist/core/lbug/content-read.js +0 -64
  103. package/dist/core/lbug/csv-generator.d.ts +0 -29
  104. package/dist/core/lbug/csv-generator.js +0 -492
  105. package/dist/core/lbug/lbug-adapter.d.ts +0 -176
  106. package/dist/core/lbug/lbug-adapter.js +0 -1320
  107. package/dist/core/lbug/pool-adapter.d.ts +0 -93
  108. package/dist/core/lbug/pool-adapter.js +0 -550
  109. package/dist/core/lbug/schema.d.ts +0 -62
  110. package/dist/core/lbug/schema.js +0 -502
  111. package/dist/mcp/core/lbug-adapter.d.ts +0 -5
  112. package/dist/mcp/core/lbug-adapter.js +0 -5
@@ -74,7 +74,7 @@ export function findContractNode(index, repo, role, symbolUid, filePath, symbolN
74
74
  export async function openBridgeDb(dbPath) {
75
75
  const parentDir = path.dirname(dbPath);
76
76
  await fsp.mkdir(parentDir, { recursive: true });
77
- const db = new cgdb.Database(dbPath, 0, false, false); // writable
77
+ const db = new cgdb.Database(dbPath, 0, false, false, BRIDGE_MAX_DB_SIZE_BYTES); // writable
78
78
  const conn = new cgdb.Connection(db);
79
79
  return { _db: db, _conn: conn, groupDir: parentDir };
80
80
  }
@@ -238,6 +238,25 @@ export async function retryRename(src, dst, attempts = 3) {
238
238
  }
239
239
  }
240
240
  }
241
+ async function removeBridgeArtifacts(basePath) {
242
+ for (const candidate of [basePath, `${basePath}.wal`, `${basePath}.lock`]) {
243
+ try {
244
+ await fsp.rm(candidate, { recursive: true, force: true });
245
+ }
246
+ catch {
247
+ /* ignore */
248
+ }
249
+ }
250
+ }
251
+ async function renameBridgeArtifactIfExists(src, dst) {
252
+ try {
253
+ await fsp.access(src);
254
+ }
255
+ catch {
256
+ return;
257
+ }
258
+ await retryRename(src, dst);
259
+ }
241
260
  /* ------------------------------------------------------------------ */
242
261
  /* writeBridgeMeta / readBridgeMeta */
243
262
  /* ------------------------------------------------------------------ */
@@ -261,6 +280,10 @@ export async function readBridgeMeta(groupDir) {
261
280
  }
262
281
  }
263
282
  const MAX_SAMPLE_ERRORS = 10;
283
+ // LadybugDB defaults maxDBSize to an 8 TiB mmap window on some platforms.
284
+ // Bridge databases are small contract registries, so cap the mapping to keep
285
+ // CI and low-resource user machines from failing before the first query.
286
+ const BRIDGE_MAX_DB_SIZE_BYTES = 512 * 1024 * 1024;
264
287
  function errMessage(err) {
265
288
  if (err instanceof Error)
266
289
  return err.message;
@@ -293,13 +316,9 @@ export async function writeBridge(groupDir, input) {
293
316
  report.sampleErrors.push({ kind, id, message: errMessage(err) });
294
317
  }
295
318
  };
296
- // Clean up any leftover tmp
297
- try {
298
- await fsp.rm(tmpPath, { recursive: true, force: true });
299
- }
300
- catch {
301
- /* ignore */
302
- }
319
+ // Clean up any leftover tmp plus native sidecars. LadybugDB stores WAL/lock
320
+ // files next to the database path, not inside it.
321
+ await removeBridgeArtifacts(tmpPath);
303
322
  // 1. Create temp DB, insert all data.
304
323
  //
305
324
  // Everything after `openBridgeDb` must run inside a try/finally so that
@@ -440,16 +459,27 @@ export async function writeBridge(groupDir, input) {
440
459
  }
441
460
  }
442
461
  // 3. Atomic swap: old→.bak, tmp→final, rm .bak
462
+ // Keep LadybugDB sidecars paired with the path rename. WAL files live next
463
+ // to the DB path, so renaming only the base path can hide fresh schema/data.
464
+ await removeBridgeArtifacts(bakPath);
443
465
  try {
444
466
  await fsp.access(finalPath);
445
467
  await retryRename(finalPath, bakPath);
468
+ await renameBridgeArtifactIfExists(`${finalPath}.wal`, `${bakPath}.wal`);
446
469
  }
447
470
  catch {
448
471
  /* no existing db */
449
472
  }
450
473
  await retryRename(tmpPath, finalPath);
474
+ await renameBridgeArtifactIfExists(`${tmpPath}.wal`, `${finalPath}.wal`);
475
+ try {
476
+ await fsp.rm(`${tmpPath}.lock`, { force: true });
477
+ }
478
+ catch {
479
+ /* ignore */
480
+ }
451
481
  try {
452
- await fsp.rm(bakPath, { recursive: true, force: true });
482
+ await removeBridgeArtifacts(bakPath);
453
483
  }
454
484
  catch {
455
485
  /* ignore */
@@ -480,6 +510,8 @@ export async function openBridgeDbReadOnly(groupDir) {
480
510
  try {
481
511
  await fsp.access(bakPath);
482
512
  await retryRename(bakPath, dbPath);
513
+ await renameBridgeArtifactIfExists(`${bakPath}.wal`, `${dbPath}.wal`);
514
+ await fsp.rm(`${bakPath}.lock`, { force: true });
483
515
  }
484
516
  catch {
485
517
  return null;
@@ -506,7 +538,7 @@ export async function openBridgeDbReadOnly(groupDir) {
506
538
  let db;
507
539
  let conn;
508
540
  try {
509
- db = new cgdb.Database(dbPath, 0, false, true); // readOnly
541
+ db = new cgdb.Database(dbPath, 0, false, true, BRIDGE_MAX_DB_SIZE_BYTES); // readOnly
510
542
  conn = new cgdb.Connection(db);
511
543
  return { _db: db, _conn: conn, groupDir };
512
544
  }
@@ -567,7 +567,13 @@ scopeTreeCache, onFileProgress, workerPool) => {
567
567
  return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
568
568
  }
569
569
  catch (err) {
570
- console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
570
+ console.warn('Worker pool parsing failed for this chunk, falling back to sequential:', err instanceof Error ? err.message : err);
571
+ try {
572
+ await workerPool.terminate();
573
+ }
574
+ catch (terminateErr) {
575
+ console.warn('Worker pool termination after parsing failure failed:', terminateErr instanceof Error ? terminateErr.message : terminateErr);
576
+ }
571
577
  }
572
578
  }
573
579
  // Fallback: sequential parsing (no pre-extracted data)
@@ -170,6 +170,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
170
170
  const chunkFiles = chunkPaths
171
171
  .filter((p) => chunkContents.has(p))
172
172
  .map((p) => ({ path: p, content: chunkContents.get(p) }));
173
+ const usedWorkerPoolForChunk = workerPool !== undefined;
173
174
  const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, (current, _total, filePath) => {
174
175
  const globalCurrent = filesParsedSoFar + current;
175
176
  const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
@@ -185,6 +186,9 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
185
186
  },
186
187
  });
187
188
  }, workerPool);
189
+ if (usedWorkerPoolForChunk && !chunkWorkerData) {
190
+ workerPool = undefined;
191
+ }
188
192
  const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
189
193
  if (chunkWorkerData) {
190
194
  await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
@@ -452,7 +452,7 @@ const processBatch = (files, onProgress) => {
452
452
  }
453
453
  let totalProcessed = 0;
454
454
  let lastReported = 0;
455
- const PROGRESS_INTERVAL = 100; // report every 100 files
455
+ const PROGRESS_INTERVAL = 25; // report often enough to keep worker idle timers fresh
456
456
  const onFileProcessed = onProgress
457
457
  ? () => {
458
458
  totalProcessed++;
@@ -10,7 +10,20 @@ export interface WorkerPool {
10
10
  /** Number of workers in the pool */
11
11
  readonly size: number;
12
12
  }
13
+ export interface WorkerPoolOptions {
14
+ /**
15
+ * Max files to send to a worker in one postMessage. Lower values reduce
16
+ * structured-clone memory spikes and give the main thread more chances to
17
+ * observe progress on large repos.
18
+ */
19
+ subBatchSize?: number;
20
+ /**
21
+ * Idle timeout while waiting for a worker response. Reset by worker progress
22
+ * so slow-but-moving chunks do not get retried sequentially.
23
+ */
24
+ subBatchIdleTimeoutMs?: number;
25
+ }
13
26
  /**
14
27
  * Create a pool of worker threads.
15
28
  */
16
- export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;
29
+ export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
@@ -6,14 +6,24 @@ import { fileURLToPath } from 'node:url';
6
6
  * Max files to send to a worker in a single postMessage.
7
7
  * Keeps structured-clone memory bounded per sub-batch.
8
8
  */
9
- const SUB_BATCH_SIZE = 1500;
10
- /** Per sub-batch timeout. If a single sub-batch takes longer than this,
11
- * likely a pathological file (e.g. minified 50MB JS). Fail fast. */
12
- const SUB_BATCH_TIMEOUT_MS = 30_000;
9
+ const DEFAULT_SUB_BATCH_SIZE = 250;
10
+ /**
11
+ * Idle timeout while waiting for a worker response. This is not a wall-clock
12
+ * limit: worker progress resets it. Large repos can legitimately need more
13
+ * than 30s for a chunk, but a wedged parser should still fall back.
14
+ */
15
+ const DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS = 120_000;
16
+ const positiveIntFromEnv = (name, fallback) => {
17
+ const raw = process.env[name];
18
+ if (!raw)
19
+ return fallback;
20
+ const parsed = Number.parseInt(raw, 10);
21
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
22
+ };
13
23
  /**
14
24
  * Create a pool of worker threads.
15
25
  */
16
- export const createWorkerPool = (workerUrl, poolSize) => {
26
+ export const createWorkerPool = (workerUrl, poolSize, options = {}) => {
17
27
  // Validate worker script exists before spawning to prevent uncaught
18
28
  // MODULE_NOT_FOUND crashes in worker threads (e.g. when running from src/ via vitest)
19
29
  const workerPath = fileURLToPath(workerUrl);
@@ -21,6 +31,10 @@ export const createWorkerPool = (workerUrl, poolSize) => {
21
31
  throw new Error(`Worker script not found: ${workerPath}`);
22
32
  }
23
33
  const size = poolSize ?? Math.min(8, Math.max(1, os.cpus().length - 1));
34
+ const subBatchSize = options.subBatchSize ??
35
+ positiveIntFromEnv('CODRAGRAPH_WORKER_SUB_BATCH_SIZE', DEFAULT_SUB_BATCH_SIZE);
36
+ const subBatchIdleTimeoutMs = options.subBatchIdleTimeoutMs ??
37
+ positiveIntFromEnv('CODRAGRAPH_WORKER_IDLE_TIMEOUT_MS', DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS);
24
38
  const workers = [];
25
39
  for (let i = 0; i < size; i++) {
26
40
  workers.push(new Worker(workerUrl));
@@ -38,41 +52,43 @@ export const createWorkerPool = (workerUrl, poolSize) => {
38
52
  const worker = workers[i];
39
53
  return new Promise((resolve, reject) => {
40
54
  let settled = false;
41
- let subBatchTimer = null;
55
+ let workerIdleTimer = null;
42
56
  const cleanup = () => {
43
- if (subBatchTimer)
44
- clearTimeout(subBatchTimer);
57
+ if (workerIdleTimer)
58
+ clearTimeout(workerIdleTimer);
45
59
  worker.removeListener('message', handler);
46
60
  worker.removeListener('error', errorHandler);
47
61
  worker.removeListener('exit', exitHandler);
48
62
  };
49
- const resetSubBatchTimer = () => {
50
- if (subBatchTimer)
51
- clearTimeout(subBatchTimer);
52
- subBatchTimer = setTimeout(() => {
63
+ const resetWorkerIdleTimer = () => {
64
+ if (workerIdleTimer)
65
+ clearTimeout(workerIdleTimer);
66
+ workerIdleTimer = setTimeout(() => {
53
67
  if (!settled) {
54
68
  settled = true;
55
69
  cleanup();
56
- reject(new Error(`Worker ${i} sub-batch timed out after ${SUB_BATCH_TIMEOUT_MS / 1000}s (chunk: ${chunk.length} items).`));
70
+ reject(new Error(`Worker ${i} was idle for ${subBatchIdleTimeoutMs / 1000}s while waiting for a response (chunk: ${chunk.length} items).`));
57
71
  }
58
- }, SUB_BATCH_TIMEOUT_MS);
72
+ }, subBatchIdleTimeoutMs);
59
73
  };
60
74
  let subBatchIdx = 0;
61
75
  const sendNextSubBatch = () => {
62
- const start = subBatchIdx * SUB_BATCH_SIZE;
76
+ const start = subBatchIdx * subBatchSize;
63
77
  if (start >= chunk.length) {
78
+ resetWorkerIdleTimer();
64
79
  worker.postMessage({ type: 'flush' });
65
80
  return;
66
81
  }
67
- const subBatch = chunk.slice(start, start + SUB_BATCH_SIZE);
82
+ const subBatch = chunk.slice(start, start + subBatchSize);
68
83
  subBatchIdx++;
69
- resetSubBatchTimer();
84
+ resetWorkerIdleTimer();
70
85
  worker.postMessage({ type: 'sub-batch', files: subBatch });
71
86
  };
72
87
  const handler = (msg) => {
73
88
  if (settled)
74
89
  return;
75
90
  if (msg.type === 'progress') {
91
+ resetWorkerIdleTimer();
76
92
  workerProgress[i] = msg.filesProcessed;
77
93
  if (onProgress) {
78
94
  const total = workerProgress.reduce((a, b) => a + b, 0);
@@ -8,6 +8,7 @@
8
8
  * IMPORTANT: This module must NEVER call process.exit(). The caller (CLI
9
9
  * wrapper or server worker) is responsible for process lifecycle.
10
10
  */
11
+ import { type RepoMeta } from '../storage/repo-manager.js';
11
12
  import type { ContentEncoding } from '@codragraph/graphstore';
12
13
  export interface AnalyzeCallbacks {
13
14
  onProgress: (phase: string, percent: number, message: string) => void;
@@ -68,10 +69,29 @@ export interface AnalyzeResult {
68
69
  embeddings?: number;
69
70
  };
70
71
  alreadyUpToDate?: boolean;
72
+ /** User-facing explanation for a reused index fast path. */
73
+ reuseReason?: string;
74
+ /** True when the git commit advanced but indexed inputs did not. */
75
+ reusedExistingIndex?: boolean;
71
76
  /** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
72
77
  pipelineResult?: any;
73
78
  }
79
+ export interface AnalyzeChangedPath {
80
+ /** Git name-status token, e.g. M, A, D, R100. */
81
+ status: string;
82
+ /** Current path for additions/modifications, or deleted path for deletions. */
83
+ path: string;
84
+ /** Previous path for renames/copies. */
85
+ previousPath?: string;
86
+ }
74
87
  export declare const PHASE_LABELS: Record<string, string>;
88
+ export declare const parseGitNameStatus: (raw: string) => AnalyzeChangedPath[];
89
+ export declare const listChangedPathsBetweenCommits: (repoPath: string, fromRef: string, toRef: string) => AnalyzeChangedPath[] | null;
90
+ export declare const isGeneratedAgentContextPath: (filePath: string) => boolean;
91
+ export declare const isGraphContentPath: (filePath: string) => boolean;
92
+ export declare const changedPathAffectsGraph: (change: AnalyzeChangedPath) => boolean;
93
+ export declare const getGraphRelevantChangedPaths: (changes: readonly AnalyzeChangedPath[]) => AnalyzeChangedPath[];
94
+ export declare const getAnalyzeConfigRebuildReason: (existingMeta: Pick<RepoMeta, "compress" | "stats">, options: Pick<AnalyzeOptions, "compress" | "embeddings">) => string | null;
75
95
  /**
76
96
  * Run the full CodraGraph analysis pipeline.
77
97
  *
@@ -10,18 +10,52 @@
10
10
  */
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
+ import { execFileSync } from 'node:child_process';
13
14
  import * as fsSync from 'node:fs';
14
15
  import * as v8 from 'node:v8';
16
+ import { getLanguageFromFilename } from '../_shared/index.js';
15
17
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
16
18
  import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
17
19
  import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
18
20
  import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
21
+ import { shouldIgnorePath } from '../config/ignore-service.js';
19
22
  import { recordAnalysisSnapshot } from './graphstore/index.js';
20
23
  import { generateAIContextFiles } from '../cli/ai-context.js';
21
24
  import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
22
25
  import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
23
26
  /** Threshold: auto-skip embeddings for repos with more nodes than this */
24
27
  const EMBEDDING_NODE_LIMIT = 50_000;
28
+ const GENERATED_AGENT_CONTEXT_PATHS = new Set(['agents.md', 'claude.md']);
29
+ const GENERATED_AGENT_CONTEXT_PREFIXES = [
30
+ '.claude/skills/generated/',
31
+ '.cursor/rules/codragraph-generated/',
32
+ ];
33
+ const IGNORE_CONTROL_FILES = new Set(['.gitignore', '.codragraphignore']);
34
+ const GRAPH_CONFIG_BASENAMES = new Set([
35
+ 'package.json',
36
+ 'tsconfig.json',
37
+ 'jsconfig.json',
38
+ 'go.mod',
39
+ 'cargo.toml',
40
+ 'pyproject.toml',
41
+ 'requirements.txt',
42
+ 'composer.json',
43
+ 'gemfile',
44
+ 'pom.xml',
45
+ 'build.gradle',
46
+ 'build.gradle.kts',
47
+ 'settings.gradle',
48
+ 'settings.gradle.kts',
49
+ 'pubspec.yaml',
50
+ 'pubspec.yml',
51
+ 'mix.exs',
52
+ 'rebar.config',
53
+ 'cmakelists.txt',
54
+ 'makefile',
55
+ 'dockerfile',
56
+ ]);
57
+ const GRAPH_CONFIG_PATTERNS = [/^tsconfig\..+\.json$/i, /^jsconfig\..+\.json$/i];
58
+ const MARKDOWN_EXTENSIONS = new Set(['.md', '.mdx']);
25
59
  export const PHASE_LABELS = {
26
60
  extracting: 'Scanning files',
27
61
  structure: 'Building structure',
@@ -38,6 +72,129 @@ export const PHASE_LABELS = {
38
72
  embeddings: 'Generating embeddings',
39
73
  done: 'Done',
40
74
  };
75
+ const normalizeGitPath = (filePath) => filePath.replace(/\\/g, '/');
76
+ export const parseGitNameStatus = (raw) => {
77
+ const tokens = raw.split('\0').filter(Boolean);
78
+ const changes = [];
79
+ for (let i = 0; i < tokens.length;) {
80
+ const status = tokens[i++] ?? '';
81
+ const code = status[0]?.toUpperCase();
82
+ if (code === 'R' || code === 'C') {
83
+ const previousPath = tokens[i++];
84
+ const nextPath = tokens[i++];
85
+ if (previousPath && nextPath) {
86
+ changes.push({
87
+ status,
88
+ path: normalizeGitPath(nextPath),
89
+ previousPath: normalizeGitPath(previousPath),
90
+ });
91
+ }
92
+ continue;
93
+ }
94
+ const changedPath = tokens[i++];
95
+ if (status && changedPath) {
96
+ changes.push({ status, path: normalizeGitPath(changedPath) });
97
+ }
98
+ }
99
+ return changes;
100
+ };
101
+ export const listChangedPathsBetweenCommits = (repoPath, fromRef, toRef) => {
102
+ if (!fromRef || !toRef || fromRef === toRef)
103
+ return [];
104
+ try {
105
+ const stdout = execFileSync('git', ['diff', '--name-status', '-z', `${fromRef}..${toRef}`], {
106
+ cwd: repoPath,
107
+ encoding: 'utf8',
108
+ maxBuffer: 20 * 1024 * 1024,
109
+ stdio: ['ignore', 'pipe', 'pipe'],
110
+ });
111
+ return parseGitNameStatus(stdout);
112
+ }
113
+ catch {
114
+ return null;
115
+ }
116
+ };
117
+ export const isGeneratedAgentContextPath = (filePath) => {
118
+ const normalized = normalizeGitPath(filePath).toLowerCase();
119
+ const basename = path.posix.basename(normalized);
120
+ return (GENERATED_AGENT_CONTEXT_PATHS.has(basename) ||
121
+ GENERATED_AGENT_CONTEXT_PREFIXES.some((prefix) => normalized.startsWith(prefix)));
122
+ };
123
+ export const isGraphContentPath = (filePath) => {
124
+ const normalized = normalizeGitPath(filePath);
125
+ const basename = path.posix.basename(normalized);
126
+ const lowerBasename = basename.toLowerCase();
127
+ if (isGeneratedAgentContextPath(normalized))
128
+ return false;
129
+ if (IGNORE_CONTROL_FILES.has(lowerBasename))
130
+ return true;
131
+ if (shouldIgnorePath(normalized))
132
+ return false;
133
+ if (getLanguageFromFilename(normalized) !== null)
134
+ return true;
135
+ const ext = path.posix.extname(lowerBasename);
136
+ if (MARKDOWN_EXTENSIONS.has(ext))
137
+ return true;
138
+ if (GRAPH_CONFIG_BASENAMES.has(lowerBasename))
139
+ return true;
140
+ return GRAPH_CONFIG_PATTERNS.some((pattern) => pattern.test(basename));
141
+ };
142
+ export const changedPathAffectsGraph = (change) => {
143
+ const statusCode = change.status[0]?.toUpperCase();
144
+ const paths = [change.path, change.previousPath].filter((p) => Boolean(p));
145
+ if (paths.some(isGraphContentPath))
146
+ return true;
147
+ // Add/delete/rename/copy affect the graph's File/Folder topology even when
148
+ // the path is not source code. Ignore only generated agent context and
149
+ // configured ignored paths; staying conservative here prevents stale file
150
+ // and documentation surfaces after path-only commits.
151
+ if (statusCode === 'A' || statusCode === 'D' || statusCode === 'R' || statusCode === 'C') {
152
+ return paths.some((p) => !isGeneratedAgentContextPath(p) && !shouldIgnorePath(p));
153
+ }
154
+ // Modified non-code/non-doc files keep the same path and are not read by the
155
+ // graph pipeline, so the existing graph can be reused.
156
+ if (statusCode === 'M' || statusCode === 'T')
157
+ return false;
158
+ // Unknown git status: rebuild rather than risk stale graph state.
159
+ return true;
160
+ };
161
+ export const getGraphRelevantChangedPaths = (changes) => changes.filter(changedPathAffectsGraph);
162
+ export const getAnalyzeConfigRebuildReason = (existingMeta, options) => {
163
+ const existingCompress = existingMeta.compress ?? 'none';
164
+ if (options.compress && options.compress !== existingCompress) {
165
+ return `requested compression changed from ${existingCompress} to ${options.compress}`;
166
+ }
167
+ if (options.embeddings && (existingMeta.stats?.embeddings ?? 0) === 0) {
168
+ return 'embeddings were requested but the existing index has no vectors';
169
+ }
170
+ return null;
171
+ };
172
+ const formatChangeForLog = (change) => change.previousPath ? `${change.previousPath} -> ${change.path}` : change.path;
173
+ const buildReusedMeta = (existingMeta, repoPath, currentCommit) => ({
174
+ ...existingMeta,
175
+ repoPath,
176
+ lastCommit: currentCommit,
177
+ indexedAt: new Date().toISOString(),
178
+ schemaVersion: INDEX_SCHEMA_VERSION,
179
+ remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : existingMeta.remoteUrl,
180
+ });
181
+ const metaStatsForAIContext = (stats = {}) => ({
182
+ files: stats.files,
183
+ nodes: stats.nodes,
184
+ edges: stats.edges,
185
+ communities: stats.communities,
186
+ clusters: stats.featureClusters,
187
+ processes: stats.processes,
188
+ });
189
+ const pathExists = async (targetPath) => {
190
+ try {
191
+ await fs.stat(targetPath);
192
+ return true;
193
+ }
194
+ catch {
195
+ return false;
196
+ }
197
+ };
41
198
  // ---------------------------------------------------------------------------
42
199
  // Main orchestrator
43
200
  // ---------------------------------------------------------------------------
@@ -135,20 +292,87 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
135
292
  // end-to-end yet, so the supported migration path is re-analyze via a fresh
136
293
  // CREATE NODE TABLE.
137
294
  const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
295
+ const existingCgdbPresent = existingMeta ? await pathExists(cgdbPath) : false;
296
+ const storageRebuildReason = existingMeta && schemaUpToDate && !existingCgdbPresent
297
+ ? 'graph database files are missing'
298
+ : null;
299
+ const configRebuildReason = storageRebuildReason ??
300
+ (existingMeta && schemaUpToDate && !options.force
301
+ ? getAnalyzeConfigRebuildReason(existingMeta, options)
302
+ : null);
138
303
  if (existingMeta &&
139
304
  schemaUpToDate &&
140
305
  !options.force &&
306
+ !configRebuildReason &&
141
307
  existingMeta.lastCommit === currentCommit) {
142
308
  // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
143
309
  if (currentCommit !== '') {
310
+ const repoName = options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath);
311
+ try {
312
+ await generateAIContextFiles(repoPath, storagePath, repoName, metaStatsForAIContext(existingMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
313
+ }
314
+ catch {
315
+ // Best-effort only.
316
+ }
144
317
  return {
145
- repoName: options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath),
318
+ repoName,
146
319
  repoPath,
147
320
  stats: existingMeta.stats ?? {},
148
321
  alreadyUpToDate: true,
149
322
  };
150
323
  }
151
324
  }
325
+ if (existingMeta && schemaUpToDate && !options.force && configRebuildReason) {
326
+ log(`Re-analyzing: ${configRebuildReason}.`);
327
+ }
328
+ if (existingMeta &&
329
+ schemaUpToDate &&
330
+ !options.force &&
331
+ !configRebuildReason &&
332
+ currentCommit !== '' &&
333
+ existingMeta.lastCommit !== currentCommit) {
334
+ const changedPaths = listChangedPathsBetweenCommits(repoPath, existingMeta.lastCommit, currentCommit);
335
+ if (changedPaths) {
336
+ const graphRelevantChanges = getGraphRelevantChangedPaths(changedPaths);
337
+ if (graphRelevantChanges.length === 0) {
338
+ const reusedMeta = buildReusedMeta(existingMeta, repoPath, currentCommit);
339
+ await saveMeta(storagePath, reusedMeta);
340
+ const projectName = await registerRepo(repoPath, reusedMeta, {
341
+ name: options.registryName,
342
+ allowDuplicateName: options.allowDuplicateName,
343
+ });
344
+ if (hasGitDir(repoPath)) {
345
+ await addToGitignore(repoPath);
346
+ }
347
+ try {
348
+ await generateAIContextFiles(repoPath, storagePath, projectName, metaStatsForAIContext(reusedMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
349
+ }
350
+ catch {
351
+ // Best-effort only.
352
+ }
353
+ const reuseReason = `Smart analyze reused the existing graph; ${changedPaths.length} changed ` +
354
+ `file(s) did not affect indexed graph inputs.`;
355
+ log(reuseReason);
356
+ progress('done', 100, 'Existing graph reused');
357
+ return {
358
+ repoName: projectName,
359
+ repoPath,
360
+ stats: reusedMeta.stats ?? {},
361
+ alreadyUpToDate: true,
362
+ reusedExistingIndex: true,
363
+ reuseReason,
364
+ };
365
+ }
366
+ const preview = graphRelevantChanges.slice(0, 5).map(formatChangeForLog).join(', ');
367
+ const suffix = graphRelevantChanges.length > 5 ? ', ...' : '';
368
+ log(`Smart analyze: ${graphRelevantChanges.length} indexed graph input change(s) require rebuild` +
369
+ (preview ? ` (${preview}${suffix})` : '') +
370
+ '.');
371
+ }
372
+ else {
373
+ log('Smart analyze: could not inspect git diff; rebuilding.');
374
+ }
375
+ }
152
376
  if (existingMeta && !schemaUpToDate) {
153
377
  log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
154
378
  `${INDEX_SCHEMA_VERSION} (FeatureCluster context-pack schema). ` +
@@ -16,17 +16,6 @@ export interface BM25SearchResult {
16
16
  rank: number;
17
17
  nodeIds?: string[];
18
18
  }
19
- /**
20
- * Drop all ensured-FTS cache entries for a given repoId.
21
- *
22
- * Called from the pool-close listener so that a pool teardown / recreation
23
- * forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
24
- * against the fresh connection rather than trust stale ensure-state from a
25
- * previous pool lifetime.
26
- *
27
- * Exported for tests; the listener wiring is internal.
28
- */
29
- export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
30
19
  /**
31
20
  * Search using LadybugDB's built-in FTS (always fresh, reads from disk)
32
21
  *