@codragraph/cli 2.1.1 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/dist/cli/ai-context.js +298 -1
- package/dist/cli/analyze.js +19 -2
- package/dist/cli/index.js +33 -12
- package/dist/cli/serve.d.ts +1 -0
- package/dist/cli/serve.js +3 -1
- package/dist/cli/setup.js +36 -19
- package/dist/cli/status.d.ts +13 -0
- package/dist/cli/status.js +99 -0
- package/dist/cli/tool.js +73 -33
- package/dist/config/ignore-service.js +3 -0
- package/dist/core/cgdb/pool-adapter.js +130 -20
- package/dist/core/graphstore/cgdb-row-source.js +3 -2
- package/dist/core/group/bridge-db.js +42 -10
- package/dist/core/ingestion/parsing-processor.js +7 -1
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +4 -0
- package/dist/core/ingestion/workers/parse-worker.js +1 -1
- package/dist/core/ingestion/workers/worker-pool.d.ts +14 -1
- package/dist/core/ingestion/workers/worker-pool.js +33 -17
- package/dist/core/run-analyze.d.ts +20 -0
- package/dist/core/run-analyze.js +225 -1
- package/dist/core/search/bm25-index.d.ts +0 -11
- package/dist/core/search/bm25-index.js +7 -84
- package/dist/core/search/hybrid-search.js +11 -3
- package/dist/mcp/local/local-backend.d.ts +2 -0
- package/dist/mcp/local/local-backend.js +235 -18
- package/dist/mcp/resources.js +2 -2
- package/dist/server/api.d.ts +14 -2
- package/dist/server/api.js +90 -7
- package/dist/server/mcp-http.d.ts +22 -0
- package/dist/server/mcp-http.js +21 -2
- package/dist/server/web-dashboard.d.ts +28 -0
- package/dist/server/web-dashboard.js +61 -0
- package/dist/web/assets/agent-D5lb0zXz.js +1089 -0
- package/dist/web/assets/architectureDiagram-EMZXCZ2Q-CZtc99v_.js +36 -0
- package/dist/web/assets/blockDiagram-IGV67L2C-BtoUp-6Y.js +132 -0
- package/dist/web/assets/c4Diagram-DFAF54RM-C4Hl3J2U.js +10 -0
- package/dist/web/assets/chunk-3GS5O3IE-DkUjU0WD.js +231 -0
- package/dist/web/assets/chunk-3YCYZ6SJ-CQkVgT_z.js +1 -0
- package/dist/web/assets/chunk-7RZVMHOQ-BitYcNVR.js +338 -0
- package/dist/web/assets/chunk-AEOMTBSW-BgTIXPsY.js +1 -0
- package/dist/web/assets/chunk-H3VCZNTA-Cx5XV_aC.js +13 -0
- package/dist/web/assets/chunk-HN6EAY2L-BBnyTNdB.js +1 -0
- package/dist/web/assets/chunk-KSICW3F5-BYzvDLNI.js +15 -0
- package/dist/web/assets/chunk-O5ABG6QK-dHwHzA6n.js +1 -0
- package/dist/web/assets/chunk-PK6DOVAG-CvsEnugt.js +206 -0
- package/dist/web/assets/chunk-RWUO3TPN-BgRTY0_k.js +1 -0
- package/dist/web/assets/chunk-TBF5ZNIQ-DL5stGM1.js +1 -0
- package/dist/web/assets/chunk-TU3PZOEN-RLyvLcv-.js +1 -0
- package/dist/web/assets/classDiagram-PPOCWD7C-DTr8QIOf.js +1 -0
- package/dist/web/assets/classDiagram-v2-23LJLIIU-DTr8QIOf.js +1 -0
- package/dist/web/assets/context-builder-22jU3V56.js +16 -0
- package/dist/web/assets/cose-bilkent-PNC4W37J-DVhePRYg.js +1 -0
- package/dist/web/assets/dagre-E77IOHMT-Dzx0A6ZU.js +4 -0
- package/dist/web/assets/diagram-H7BISOXX-CC9pRew1.js +43 -0
- package/dist/web/assets/diagram-JC5VWROH-Bau_i9tf.js +24 -0
- package/dist/web/assets/diagram-LXUTUG65-D9_FM2Gt.js +10 -0
- package/dist/web/assets/diagram-WEHSV5V5-BMlayouL.js +24 -0
- package/dist/web/assets/erDiagram-GCSMX5X6-C3dhDFA8.js +85 -0
- package/dist/web/assets/flowDiagram-OTCZ4VVT-CWSFWmhr.js +162 -0
- package/dist/web/assets/ganttDiagram-MUNLMDZQ-D3a67Yol.js +292 -0
- package/dist/web/assets/gitGraphDiagram-3HKGZ4G3-7jmry-vM.js +106 -0
- package/dist/web/assets/index-BgeqpYgd.js +1415 -0
- package/dist/web/assets/index-CT0GtFLZ.css +1 -0
- package/dist/web/assets/infoDiagram-MN7RKWGX-G7lhP0Ib.js +2 -0
- package/dist/web/assets/ishikawaDiagram-YMYX4NHK-DUoJvNP2.js +70 -0
- package/dist/web/assets/journeyDiagram-SO5T7YLQ-RMFPNNqz.js +139 -0
- package/dist/web/assets/kanban-definition-LJHFXRCJ-BzpDs1K9.js +89 -0
- package/dist/web/assets/katex-GD7MH7QM-DBQvrix-.js +261 -0
- package/dist/web/assets/mindmap-definition-2EUWGEK5-Bk0O4roa.js +96 -0
- package/dist/web/assets/pieDiagram-3IATQBI2-DKU7kpgS.js +30 -0
- package/dist/web/assets/quadrantDiagram-E256RVCF-BY0TGWCS.js +7 -0
- package/dist/web/assets/requirementDiagram-M5DCFWZL-DLHOVTSv.js +84 -0
- package/dist/web/assets/sankeyDiagram-L3NBLAOT-DVMj5rX2.js +10 -0
- package/dist/web/assets/sequenceDiagram-ZOUHS735-CJC73bV-.js +157 -0
- package/dist/web/assets/stateDiagram-MLPALWAM-BCFyESls.js +1 -0
- package/dist/web/assets/stateDiagram-v2-B5LQ5ZB2-DahzzIca.js +1 -0
- package/dist/web/assets/timeline-definition-5SPVSISX-TRSDRgPw.js +120 -0
- package/dist/web/assets/vennDiagram-IE5QUKF5-DNy7HRBM.js +34 -0
- package/dist/web/assets/wardley-RL74JXVD-BCRCBASE-B-eZEzf9.js +161 -0
- package/dist/web/assets/wardleyDiagram-XU3VSMPF-BP-r1xzR.js +20 -0
- package/dist/web/assets/xychartDiagram-ZHJ5623Y-Dr9r7a35.js +7 -0
- package/dist/web/codragraph-logo-512.png +0 -0
- package/dist/web/codragraph-logo.png +0 -0
- package/dist/web/favicon.png +0 -0
- package/dist/web/index.html +36 -0
- package/hooks/claude/codragraph-hook.cjs +18 -110
- package/hooks/claude/pre-tool-use.sh +6 -1
- package/package.json +3 -1
- package/scripts/build.js +62 -4
- package/scripts/patch-tree-sitter-swift.cjs +0 -1
- package/skills/codragraph-cli.md +1 -1
- package/vendor/leiden/index.cjs +272 -285
- package/vendor/leiden/utils.cjs +264 -274
- package/dist/_shared/lbug/schema-constants.d.ts +0 -16
- package/dist/_shared/lbug/schema-constants.d.ts.map +0 -1
- package/dist/_shared/lbug/schema-constants.js +0 -67
- package/dist/_shared/lbug/schema-constants.js.map +0 -1
- package/dist/core/graphstore/lbug-row-source.d.ts +0 -19
- package/dist/core/graphstore/lbug-row-source.js +0 -141
- package/dist/core/lbug/content-read.d.ts +0 -46
- package/dist/core/lbug/content-read.js +0 -64
- package/dist/core/lbug/csv-generator.d.ts +0 -29
- package/dist/core/lbug/csv-generator.js +0 -492
- package/dist/core/lbug/lbug-adapter.d.ts +0 -176
- package/dist/core/lbug/lbug-adapter.js +0 -1320
- package/dist/core/lbug/pool-adapter.d.ts +0 -93
- package/dist/core/lbug/pool-adapter.js +0 -550
- package/dist/core/lbug/schema.d.ts +0 -62
- package/dist/core/lbug/schema.js +0 -502
- package/dist/mcp/core/lbug-adapter.d.ts +0 -5
- package/dist/mcp/core/lbug-adapter.js +0 -5
|
@@ -74,7 +74,7 @@ export function findContractNode(index, repo, role, symbolUid, filePath, symbolN
|
|
|
74
74
|
export async function openBridgeDb(dbPath) {
|
|
75
75
|
const parentDir = path.dirname(dbPath);
|
|
76
76
|
await fsp.mkdir(parentDir, { recursive: true });
|
|
77
|
-
const db = new cgdb.Database(dbPath, 0, false, false); // writable
|
|
77
|
+
const db = new cgdb.Database(dbPath, 0, false, false, BRIDGE_MAX_DB_SIZE_BYTES); // writable
|
|
78
78
|
const conn = new cgdb.Connection(db);
|
|
79
79
|
return { _db: db, _conn: conn, groupDir: parentDir };
|
|
80
80
|
}
|
|
@@ -238,6 +238,25 @@ export async function retryRename(src, dst, attempts = 3) {
|
|
|
238
238
|
}
|
|
239
239
|
}
|
|
240
240
|
}
|
|
241
|
+
async function removeBridgeArtifacts(basePath) {
|
|
242
|
+
for (const candidate of [basePath, `${basePath}.wal`, `${basePath}.lock`]) {
|
|
243
|
+
try {
|
|
244
|
+
await fsp.rm(candidate, { recursive: true, force: true });
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
/* ignore */
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
async function renameBridgeArtifactIfExists(src, dst) {
|
|
252
|
+
try {
|
|
253
|
+
await fsp.access(src);
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
await retryRename(src, dst);
|
|
259
|
+
}
|
|
241
260
|
/* ------------------------------------------------------------------ */
|
|
242
261
|
/* writeBridgeMeta / readBridgeMeta */
|
|
243
262
|
/* ------------------------------------------------------------------ */
|
|
@@ -261,6 +280,10 @@ export async function readBridgeMeta(groupDir) {
|
|
|
261
280
|
}
|
|
262
281
|
}
|
|
263
282
|
const MAX_SAMPLE_ERRORS = 10;
|
|
283
|
+
// LadybugDB defaults maxDBSize to an 8 TiB mmap window on some platforms.
|
|
284
|
+
// Bridge databases are small contract registries, so cap the mapping to keep
|
|
285
|
+
// CI and low-resource user machines from failing before the first query.
|
|
286
|
+
const BRIDGE_MAX_DB_SIZE_BYTES = 512 * 1024 * 1024;
|
|
264
287
|
function errMessage(err) {
|
|
265
288
|
if (err instanceof Error)
|
|
266
289
|
return err.message;
|
|
@@ -293,13 +316,9 @@ export async function writeBridge(groupDir, input) {
|
|
|
293
316
|
report.sampleErrors.push({ kind, id, message: errMessage(err) });
|
|
294
317
|
}
|
|
295
318
|
};
|
|
296
|
-
// Clean up any leftover tmp
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
}
|
|
300
|
-
catch {
|
|
301
|
-
/* ignore */
|
|
302
|
-
}
|
|
319
|
+
// Clean up any leftover tmp plus native sidecars. LadybugDB stores WAL/lock
|
|
320
|
+
// files next to the database path, not inside it.
|
|
321
|
+
await removeBridgeArtifacts(tmpPath);
|
|
303
322
|
// 1. Create temp DB, insert all data.
|
|
304
323
|
//
|
|
305
324
|
// Everything after `openBridgeDb` must run inside a try/finally so that
|
|
@@ -440,16 +459,27 @@ export async function writeBridge(groupDir, input) {
|
|
|
440
459
|
}
|
|
441
460
|
}
|
|
442
461
|
// 3. Atomic swap: old→.bak, tmp→final, rm .bak
|
|
462
|
+
// Keep LadybugDB sidecars paired with the path rename. WAL files live next
|
|
463
|
+
// to the DB path, so renaming only the base path can hide fresh schema/data.
|
|
464
|
+
await removeBridgeArtifacts(bakPath);
|
|
443
465
|
try {
|
|
444
466
|
await fsp.access(finalPath);
|
|
445
467
|
await retryRename(finalPath, bakPath);
|
|
468
|
+
await renameBridgeArtifactIfExists(`${finalPath}.wal`, `${bakPath}.wal`);
|
|
446
469
|
}
|
|
447
470
|
catch {
|
|
448
471
|
/* no existing db */
|
|
449
472
|
}
|
|
450
473
|
await retryRename(tmpPath, finalPath);
|
|
474
|
+
await renameBridgeArtifactIfExists(`${tmpPath}.wal`, `${finalPath}.wal`);
|
|
475
|
+
try {
|
|
476
|
+
await fsp.rm(`${tmpPath}.lock`, { force: true });
|
|
477
|
+
}
|
|
478
|
+
catch {
|
|
479
|
+
/* ignore */
|
|
480
|
+
}
|
|
451
481
|
try {
|
|
452
|
-
await
|
|
482
|
+
await removeBridgeArtifacts(bakPath);
|
|
453
483
|
}
|
|
454
484
|
catch {
|
|
455
485
|
/* ignore */
|
|
@@ -480,6 +510,8 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
480
510
|
try {
|
|
481
511
|
await fsp.access(bakPath);
|
|
482
512
|
await retryRename(bakPath, dbPath);
|
|
513
|
+
await renameBridgeArtifactIfExists(`${bakPath}.wal`, `${dbPath}.wal`);
|
|
514
|
+
await fsp.rm(`${bakPath}.lock`, { force: true });
|
|
483
515
|
}
|
|
484
516
|
catch {
|
|
485
517
|
return null;
|
|
@@ -506,7 +538,7 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
506
538
|
let db;
|
|
507
539
|
let conn;
|
|
508
540
|
try {
|
|
509
|
-
db = new cgdb.Database(dbPath, 0, false, true); // readOnly
|
|
541
|
+
db = new cgdb.Database(dbPath, 0, false, true, BRIDGE_MAX_DB_SIZE_BYTES); // readOnly
|
|
510
542
|
conn = new cgdb.Connection(db);
|
|
511
543
|
return { _db: db, _conn: conn, groupDir };
|
|
512
544
|
}
|
|
@@ -567,7 +567,13 @@ scopeTreeCache, onFileProgress, workerPool) => {
|
|
|
567
567
|
return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
|
|
568
568
|
}
|
|
569
569
|
catch (err) {
|
|
570
|
-
console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
570
|
+
console.warn('Worker pool parsing failed for this chunk, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
571
|
+
try {
|
|
572
|
+
await workerPool.terminate();
|
|
573
|
+
}
|
|
574
|
+
catch (terminateErr) {
|
|
575
|
+
console.warn('Worker pool termination after parsing failure failed:', terminateErr instanceof Error ? terminateErr.message : terminateErr);
|
|
576
|
+
}
|
|
571
577
|
}
|
|
572
578
|
}
|
|
573
579
|
// Fallback: sequential parsing (no pre-extracted data)
|
|
@@ -170,6 +170,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
170
170
|
const chunkFiles = chunkPaths
|
|
171
171
|
.filter((p) => chunkContents.has(p))
|
|
172
172
|
.map((p) => ({ path: p, content: chunkContents.get(p) }));
|
|
173
|
+
const usedWorkerPoolForChunk = workerPool !== undefined;
|
|
173
174
|
const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, (current, _total, filePath) => {
|
|
174
175
|
const globalCurrent = filesParsedSoFar + current;
|
|
175
176
|
const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
|
|
@@ -185,6 +186,9 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
185
186
|
},
|
|
186
187
|
});
|
|
187
188
|
}, workerPool);
|
|
189
|
+
if (usedWorkerPoolForChunk && !chunkWorkerData) {
|
|
190
|
+
workerPool = undefined;
|
|
191
|
+
}
|
|
188
192
|
const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
|
|
189
193
|
if (chunkWorkerData) {
|
|
190
194
|
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
|
|
@@ -452,7 +452,7 @@ const processBatch = (files, onProgress) => {
|
|
|
452
452
|
}
|
|
453
453
|
let totalProcessed = 0;
|
|
454
454
|
let lastReported = 0;
|
|
455
|
-
const PROGRESS_INTERVAL =
|
|
455
|
+
const PROGRESS_INTERVAL = 25; // report often enough to keep worker idle timers fresh
|
|
456
456
|
const onFileProcessed = onProgress
|
|
457
457
|
? () => {
|
|
458
458
|
totalProcessed++;
|
|
@@ -10,7 +10,20 @@ export interface WorkerPool {
|
|
|
10
10
|
/** Number of workers in the pool */
|
|
11
11
|
readonly size: number;
|
|
12
12
|
}
|
|
13
|
+
export interface WorkerPoolOptions {
|
|
14
|
+
/**
|
|
15
|
+
* Max files to send to a worker in one postMessage. Lower values reduce
|
|
16
|
+
* structured-clone memory spikes and give the main thread more chances to
|
|
17
|
+
* observe progress on large repos.
|
|
18
|
+
*/
|
|
19
|
+
subBatchSize?: number;
|
|
20
|
+
/**
|
|
21
|
+
* Idle timeout while waiting for a worker response. Reset by worker progress
|
|
22
|
+
* so slow-but-moving chunks do not get retried sequentially.
|
|
23
|
+
*/
|
|
24
|
+
subBatchIdleTimeoutMs?: number;
|
|
25
|
+
}
|
|
13
26
|
/**
|
|
14
27
|
* Create a pool of worker threads.
|
|
15
28
|
*/
|
|
16
|
-
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;
|
|
29
|
+
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
|
|
@@ -6,14 +6,24 @@ import { fileURLToPath } from 'node:url';
|
|
|
6
6
|
* Max files to send to a worker in a single postMessage.
|
|
7
7
|
* Keeps structured-clone memory bounded per sub-batch.
|
|
8
8
|
*/
|
|
9
|
-
const
|
|
10
|
-
/**
|
|
11
|
-
*
|
|
12
|
-
|
|
9
|
+
const DEFAULT_SUB_BATCH_SIZE = 250;
|
|
10
|
+
/**
|
|
11
|
+
* Idle timeout while waiting for a worker response. This is not a wall-clock
|
|
12
|
+
* limit: worker progress resets it. Large repos can legitimately need more
|
|
13
|
+
* than 30s for a chunk, but a wedged parser should still fall back.
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS = 120_000;
|
|
16
|
+
const positiveIntFromEnv = (name, fallback) => {
|
|
17
|
+
const raw = process.env[name];
|
|
18
|
+
if (!raw)
|
|
19
|
+
return fallback;
|
|
20
|
+
const parsed = Number.parseInt(raw, 10);
|
|
21
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
22
|
+
};
|
|
13
23
|
/**
|
|
14
24
|
* Create a pool of worker threads.
|
|
15
25
|
*/
|
|
16
|
-
export const createWorkerPool = (workerUrl, poolSize) => {
|
|
26
|
+
export const createWorkerPool = (workerUrl, poolSize, options = {}) => {
|
|
17
27
|
// Validate worker script exists before spawning to prevent uncaught
|
|
18
28
|
// MODULE_NOT_FOUND crashes in worker threads (e.g. when running from src/ via vitest)
|
|
19
29
|
const workerPath = fileURLToPath(workerUrl);
|
|
@@ -21,6 +31,10 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
21
31
|
throw new Error(`Worker script not found: ${workerPath}`);
|
|
22
32
|
}
|
|
23
33
|
const size = poolSize ?? Math.min(8, Math.max(1, os.cpus().length - 1));
|
|
34
|
+
const subBatchSize = options.subBatchSize ??
|
|
35
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_SUB_BATCH_SIZE', DEFAULT_SUB_BATCH_SIZE);
|
|
36
|
+
const subBatchIdleTimeoutMs = options.subBatchIdleTimeoutMs ??
|
|
37
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_IDLE_TIMEOUT_MS', DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS);
|
|
24
38
|
const workers = [];
|
|
25
39
|
for (let i = 0; i < size; i++) {
|
|
26
40
|
workers.push(new Worker(workerUrl));
|
|
@@ -38,41 +52,43 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
38
52
|
const worker = workers[i];
|
|
39
53
|
return new Promise((resolve, reject) => {
|
|
40
54
|
let settled = false;
|
|
41
|
-
let
|
|
55
|
+
let workerIdleTimer = null;
|
|
42
56
|
const cleanup = () => {
|
|
43
|
-
if (
|
|
44
|
-
clearTimeout(
|
|
57
|
+
if (workerIdleTimer)
|
|
58
|
+
clearTimeout(workerIdleTimer);
|
|
45
59
|
worker.removeListener('message', handler);
|
|
46
60
|
worker.removeListener('error', errorHandler);
|
|
47
61
|
worker.removeListener('exit', exitHandler);
|
|
48
62
|
};
|
|
49
|
-
const
|
|
50
|
-
if (
|
|
51
|
-
clearTimeout(
|
|
52
|
-
|
|
63
|
+
const resetWorkerIdleTimer = () => {
|
|
64
|
+
if (workerIdleTimer)
|
|
65
|
+
clearTimeout(workerIdleTimer);
|
|
66
|
+
workerIdleTimer = setTimeout(() => {
|
|
53
67
|
if (!settled) {
|
|
54
68
|
settled = true;
|
|
55
69
|
cleanup();
|
|
56
|
-
reject(new Error(`Worker ${i}
|
|
70
|
+
reject(new Error(`Worker ${i} was idle for ${subBatchIdleTimeoutMs / 1000}s while waiting for a response (chunk: ${chunk.length} items).`));
|
|
57
71
|
}
|
|
58
|
-
},
|
|
72
|
+
}, subBatchIdleTimeoutMs);
|
|
59
73
|
};
|
|
60
74
|
let subBatchIdx = 0;
|
|
61
75
|
const sendNextSubBatch = () => {
|
|
62
|
-
const start = subBatchIdx *
|
|
76
|
+
const start = subBatchIdx * subBatchSize;
|
|
63
77
|
if (start >= chunk.length) {
|
|
78
|
+
resetWorkerIdleTimer();
|
|
64
79
|
worker.postMessage({ type: 'flush' });
|
|
65
80
|
return;
|
|
66
81
|
}
|
|
67
|
-
const subBatch = chunk.slice(start, start +
|
|
82
|
+
const subBatch = chunk.slice(start, start + subBatchSize);
|
|
68
83
|
subBatchIdx++;
|
|
69
|
-
|
|
84
|
+
resetWorkerIdleTimer();
|
|
70
85
|
worker.postMessage({ type: 'sub-batch', files: subBatch });
|
|
71
86
|
};
|
|
72
87
|
const handler = (msg) => {
|
|
73
88
|
if (settled)
|
|
74
89
|
return;
|
|
75
90
|
if (msg.type === 'progress') {
|
|
91
|
+
resetWorkerIdleTimer();
|
|
76
92
|
workerProgress[i] = msg.filesProcessed;
|
|
77
93
|
if (onProgress) {
|
|
78
94
|
const total = workerProgress.reduce((a, b) => a + b, 0);
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* IMPORTANT: This module must NEVER call process.exit(). The caller (CLI
|
|
9
9
|
* wrapper or server worker) is responsible for process lifecycle.
|
|
10
10
|
*/
|
|
11
|
+
import { type RepoMeta } from '../storage/repo-manager.js';
|
|
11
12
|
import type { ContentEncoding } from '@codragraph/graphstore';
|
|
12
13
|
export interface AnalyzeCallbacks {
|
|
13
14
|
onProgress: (phase: string, percent: number, message: string) => void;
|
|
@@ -68,10 +69,29 @@ export interface AnalyzeResult {
|
|
|
68
69
|
embeddings?: number;
|
|
69
70
|
};
|
|
70
71
|
alreadyUpToDate?: boolean;
|
|
72
|
+
/** User-facing explanation for a reused index fast path. */
|
|
73
|
+
reuseReason?: string;
|
|
74
|
+
/** True when the git commit advanced but indexed inputs did not. */
|
|
75
|
+
reusedExistingIndex?: boolean;
|
|
71
76
|
/** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
|
|
72
77
|
pipelineResult?: any;
|
|
73
78
|
}
|
|
79
|
+
export interface AnalyzeChangedPath {
|
|
80
|
+
/** Git name-status token, e.g. M, A, D, R100. */
|
|
81
|
+
status: string;
|
|
82
|
+
/** Current path for additions/modifications, or deleted path for deletions. */
|
|
83
|
+
path: string;
|
|
84
|
+
/** Previous path for renames/copies. */
|
|
85
|
+
previousPath?: string;
|
|
86
|
+
}
|
|
74
87
|
export declare const PHASE_LABELS: Record<string, string>;
|
|
88
|
+
export declare const parseGitNameStatus: (raw: string) => AnalyzeChangedPath[];
|
|
89
|
+
export declare const listChangedPathsBetweenCommits: (repoPath: string, fromRef: string, toRef: string) => AnalyzeChangedPath[] | null;
|
|
90
|
+
export declare const isGeneratedAgentContextPath: (filePath: string) => boolean;
|
|
91
|
+
export declare const isGraphContentPath: (filePath: string) => boolean;
|
|
92
|
+
export declare const changedPathAffectsGraph: (change: AnalyzeChangedPath) => boolean;
|
|
93
|
+
export declare const getGraphRelevantChangedPaths: (changes: readonly AnalyzeChangedPath[]) => AnalyzeChangedPath[];
|
|
94
|
+
export declare const getAnalyzeConfigRebuildReason: (existingMeta: Pick<RepoMeta, "compress" | "stats">, options: Pick<AnalyzeOptions, "compress" | "embeddings">) => string | null;
|
|
75
95
|
/**
|
|
76
96
|
* Run the full CodraGraph analysis pipeline.
|
|
77
97
|
*
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -10,18 +10,52 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import path from 'path';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
|
+
import { execFileSync } from 'node:child_process';
|
|
13
14
|
import * as fsSync from 'node:fs';
|
|
14
15
|
import * as v8 from 'node:v8';
|
|
16
|
+
import { getLanguageFromFilename } from '../_shared/index.js';
|
|
15
17
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
16
18
|
import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
|
|
17
19
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
18
20
|
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
21
|
+
import { shouldIgnorePath } from '../config/ignore-service.js';
|
|
19
22
|
import { recordAnalysisSnapshot } from './graphstore/index.js';
|
|
20
23
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
21
24
|
import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
|
|
22
25
|
import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
|
|
23
26
|
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
24
27
|
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
28
|
+
const GENERATED_AGENT_CONTEXT_PATHS = new Set(['agents.md', 'claude.md']);
|
|
29
|
+
const GENERATED_AGENT_CONTEXT_PREFIXES = [
|
|
30
|
+
'.claude/skills/generated/',
|
|
31
|
+
'.cursor/rules/codragraph-generated/',
|
|
32
|
+
];
|
|
33
|
+
const IGNORE_CONTROL_FILES = new Set(['.gitignore', '.codragraphignore']);
|
|
34
|
+
const GRAPH_CONFIG_BASENAMES = new Set([
|
|
35
|
+
'package.json',
|
|
36
|
+
'tsconfig.json',
|
|
37
|
+
'jsconfig.json',
|
|
38
|
+
'go.mod',
|
|
39
|
+
'cargo.toml',
|
|
40
|
+
'pyproject.toml',
|
|
41
|
+
'requirements.txt',
|
|
42
|
+
'composer.json',
|
|
43
|
+
'gemfile',
|
|
44
|
+
'pom.xml',
|
|
45
|
+
'build.gradle',
|
|
46
|
+
'build.gradle.kts',
|
|
47
|
+
'settings.gradle',
|
|
48
|
+
'settings.gradle.kts',
|
|
49
|
+
'pubspec.yaml',
|
|
50
|
+
'pubspec.yml',
|
|
51
|
+
'mix.exs',
|
|
52
|
+
'rebar.config',
|
|
53
|
+
'cmakelists.txt',
|
|
54
|
+
'makefile',
|
|
55
|
+
'dockerfile',
|
|
56
|
+
]);
|
|
57
|
+
const GRAPH_CONFIG_PATTERNS = [/^tsconfig\..+\.json$/i, /^jsconfig\..+\.json$/i];
|
|
58
|
+
const MARKDOWN_EXTENSIONS = new Set(['.md', '.mdx']);
|
|
25
59
|
export const PHASE_LABELS = {
|
|
26
60
|
extracting: 'Scanning files',
|
|
27
61
|
structure: 'Building structure',
|
|
@@ -38,6 +72,129 @@ export const PHASE_LABELS = {
|
|
|
38
72
|
embeddings: 'Generating embeddings',
|
|
39
73
|
done: 'Done',
|
|
40
74
|
};
|
|
75
|
+
const normalizeGitPath = (filePath) => filePath.replace(/\\/g, '/');
|
|
76
|
+
export const parseGitNameStatus = (raw) => {
|
|
77
|
+
const tokens = raw.split('\0').filter(Boolean);
|
|
78
|
+
const changes = [];
|
|
79
|
+
for (let i = 0; i < tokens.length;) {
|
|
80
|
+
const status = tokens[i++] ?? '';
|
|
81
|
+
const code = status[0]?.toUpperCase();
|
|
82
|
+
if (code === 'R' || code === 'C') {
|
|
83
|
+
const previousPath = tokens[i++];
|
|
84
|
+
const nextPath = tokens[i++];
|
|
85
|
+
if (previousPath && nextPath) {
|
|
86
|
+
changes.push({
|
|
87
|
+
status,
|
|
88
|
+
path: normalizeGitPath(nextPath),
|
|
89
|
+
previousPath: normalizeGitPath(previousPath),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
const changedPath = tokens[i++];
|
|
95
|
+
if (status && changedPath) {
|
|
96
|
+
changes.push({ status, path: normalizeGitPath(changedPath) });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return changes;
|
|
100
|
+
};
|
|
101
|
+
export const listChangedPathsBetweenCommits = (repoPath, fromRef, toRef) => {
|
|
102
|
+
if (!fromRef || !toRef || fromRef === toRef)
|
|
103
|
+
return [];
|
|
104
|
+
try {
|
|
105
|
+
const stdout = execFileSync('git', ['diff', '--name-status', '-z', `${fromRef}..${toRef}`], {
|
|
106
|
+
cwd: repoPath,
|
|
107
|
+
encoding: 'utf8',
|
|
108
|
+
maxBuffer: 20 * 1024 * 1024,
|
|
109
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
110
|
+
});
|
|
111
|
+
return parseGitNameStatus(stdout);
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
export const isGeneratedAgentContextPath = (filePath) => {
|
|
118
|
+
const normalized = normalizeGitPath(filePath).toLowerCase();
|
|
119
|
+
const basename = path.posix.basename(normalized);
|
|
120
|
+
return (GENERATED_AGENT_CONTEXT_PATHS.has(basename) ||
|
|
121
|
+
GENERATED_AGENT_CONTEXT_PREFIXES.some((prefix) => normalized.startsWith(prefix)));
|
|
122
|
+
};
|
|
123
|
+
export const isGraphContentPath = (filePath) => {
|
|
124
|
+
const normalized = normalizeGitPath(filePath);
|
|
125
|
+
const basename = path.posix.basename(normalized);
|
|
126
|
+
const lowerBasename = basename.toLowerCase();
|
|
127
|
+
if (isGeneratedAgentContextPath(normalized))
|
|
128
|
+
return false;
|
|
129
|
+
if (IGNORE_CONTROL_FILES.has(lowerBasename))
|
|
130
|
+
return true;
|
|
131
|
+
if (shouldIgnorePath(normalized))
|
|
132
|
+
return false;
|
|
133
|
+
if (getLanguageFromFilename(normalized) !== null)
|
|
134
|
+
return true;
|
|
135
|
+
const ext = path.posix.extname(lowerBasename);
|
|
136
|
+
if (MARKDOWN_EXTENSIONS.has(ext))
|
|
137
|
+
return true;
|
|
138
|
+
if (GRAPH_CONFIG_BASENAMES.has(lowerBasename))
|
|
139
|
+
return true;
|
|
140
|
+
return GRAPH_CONFIG_PATTERNS.some((pattern) => pattern.test(basename));
|
|
141
|
+
};
|
|
142
|
+
export const changedPathAffectsGraph = (change) => {
|
|
143
|
+
const statusCode = change.status[0]?.toUpperCase();
|
|
144
|
+
const paths = [change.path, change.previousPath].filter((p) => Boolean(p));
|
|
145
|
+
if (paths.some(isGraphContentPath))
|
|
146
|
+
return true;
|
|
147
|
+
// Add/delete/rename/copy affect the graph's File/Folder topology even when
|
|
148
|
+
// the path is not source code. Ignore only generated agent context and
|
|
149
|
+
// configured ignored paths; staying conservative here prevents stale file
|
|
150
|
+
// and documentation surfaces after path-only commits.
|
|
151
|
+
if (statusCode === 'A' || statusCode === 'D' || statusCode === 'R' || statusCode === 'C') {
|
|
152
|
+
return paths.some((p) => !isGeneratedAgentContextPath(p) && !shouldIgnorePath(p));
|
|
153
|
+
}
|
|
154
|
+
// Modified non-code/non-doc files keep the same path and are not read by the
|
|
155
|
+
// graph pipeline, so the existing graph can be reused.
|
|
156
|
+
if (statusCode === 'M' || statusCode === 'T')
|
|
157
|
+
return false;
|
|
158
|
+
// Unknown git status: rebuild rather than risk stale graph state.
|
|
159
|
+
return true;
|
|
160
|
+
};
|
|
161
|
+
export const getGraphRelevantChangedPaths = (changes) => changes.filter(changedPathAffectsGraph);
|
|
162
|
+
export const getAnalyzeConfigRebuildReason = (existingMeta, options) => {
|
|
163
|
+
const existingCompress = existingMeta.compress ?? 'none';
|
|
164
|
+
if (options.compress && options.compress !== existingCompress) {
|
|
165
|
+
return `requested compression changed from ${existingCompress} to ${options.compress}`;
|
|
166
|
+
}
|
|
167
|
+
if (options.embeddings && (existingMeta.stats?.embeddings ?? 0) === 0) {
|
|
168
|
+
return 'embeddings were requested but the existing index has no vectors';
|
|
169
|
+
}
|
|
170
|
+
return null;
|
|
171
|
+
};
|
|
172
|
+
const formatChangeForLog = (change) => change.previousPath ? `${change.previousPath} -> ${change.path}` : change.path;
|
|
173
|
+
const buildReusedMeta = (existingMeta, repoPath, currentCommit) => ({
|
|
174
|
+
...existingMeta,
|
|
175
|
+
repoPath,
|
|
176
|
+
lastCommit: currentCommit,
|
|
177
|
+
indexedAt: new Date().toISOString(),
|
|
178
|
+
schemaVersion: INDEX_SCHEMA_VERSION,
|
|
179
|
+
remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : existingMeta.remoteUrl,
|
|
180
|
+
});
|
|
181
|
+
const metaStatsForAIContext = (stats = {}) => ({
|
|
182
|
+
files: stats.files,
|
|
183
|
+
nodes: stats.nodes,
|
|
184
|
+
edges: stats.edges,
|
|
185
|
+
communities: stats.communities,
|
|
186
|
+
clusters: stats.featureClusters,
|
|
187
|
+
processes: stats.processes,
|
|
188
|
+
});
|
|
189
|
+
const pathExists = async (targetPath) => {
|
|
190
|
+
try {
|
|
191
|
+
await fs.stat(targetPath);
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
};
|
|
41
198
|
// ---------------------------------------------------------------------------
|
|
42
199
|
// Main orchestrator
|
|
43
200
|
// ---------------------------------------------------------------------------
|
|
@@ -135,20 +292,87 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
135
292
|
// end-to-end yet, so the supported migration path is re-analyze via a fresh
|
|
136
293
|
// CREATE NODE TABLE.
|
|
137
294
|
const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
|
|
295
|
+
const existingCgdbPresent = existingMeta ? await pathExists(cgdbPath) : false;
|
|
296
|
+
const storageRebuildReason = existingMeta && schemaUpToDate && !existingCgdbPresent
|
|
297
|
+
? 'graph database files are missing'
|
|
298
|
+
: null;
|
|
299
|
+
const configRebuildReason = storageRebuildReason ??
|
|
300
|
+
(existingMeta && schemaUpToDate && !options.force
|
|
301
|
+
? getAnalyzeConfigRebuildReason(existingMeta, options)
|
|
302
|
+
: null);
|
|
138
303
|
if (existingMeta &&
|
|
139
304
|
schemaUpToDate &&
|
|
140
305
|
!options.force &&
|
|
306
|
+
!configRebuildReason &&
|
|
141
307
|
existingMeta.lastCommit === currentCommit) {
|
|
142
308
|
// Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
|
|
143
309
|
if (currentCommit !== '') {
|
|
310
|
+
const repoName = options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath);
|
|
311
|
+
try {
|
|
312
|
+
await generateAIContextFiles(repoPath, storagePath, repoName, metaStatsForAIContext(existingMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
|
|
313
|
+
}
|
|
314
|
+
catch {
|
|
315
|
+
// Best-effort only.
|
|
316
|
+
}
|
|
144
317
|
return {
|
|
145
|
-
repoName
|
|
318
|
+
repoName,
|
|
146
319
|
repoPath,
|
|
147
320
|
stats: existingMeta.stats ?? {},
|
|
148
321
|
alreadyUpToDate: true,
|
|
149
322
|
};
|
|
150
323
|
}
|
|
151
324
|
}
|
|
325
|
+
if (existingMeta && schemaUpToDate && !options.force && configRebuildReason) {
|
|
326
|
+
log(`Re-analyzing: ${configRebuildReason}.`);
|
|
327
|
+
}
|
|
328
|
+
if (existingMeta &&
|
|
329
|
+
schemaUpToDate &&
|
|
330
|
+
!options.force &&
|
|
331
|
+
!configRebuildReason &&
|
|
332
|
+
currentCommit !== '' &&
|
|
333
|
+
existingMeta.lastCommit !== currentCommit) {
|
|
334
|
+
const changedPaths = listChangedPathsBetweenCommits(repoPath, existingMeta.lastCommit, currentCommit);
|
|
335
|
+
if (changedPaths) {
|
|
336
|
+
const graphRelevantChanges = getGraphRelevantChangedPaths(changedPaths);
|
|
337
|
+
if (graphRelevantChanges.length === 0) {
|
|
338
|
+
const reusedMeta = buildReusedMeta(existingMeta, repoPath, currentCommit);
|
|
339
|
+
await saveMeta(storagePath, reusedMeta);
|
|
340
|
+
const projectName = await registerRepo(repoPath, reusedMeta, {
|
|
341
|
+
name: options.registryName,
|
|
342
|
+
allowDuplicateName: options.allowDuplicateName,
|
|
343
|
+
});
|
|
344
|
+
if (hasGitDir(repoPath)) {
|
|
345
|
+
await addToGitignore(repoPath);
|
|
346
|
+
}
|
|
347
|
+
try {
|
|
348
|
+
await generateAIContextFiles(repoPath, storagePath, projectName, metaStatsForAIContext(reusedMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
|
|
349
|
+
}
|
|
350
|
+
catch {
|
|
351
|
+
// Best-effort only.
|
|
352
|
+
}
|
|
353
|
+
const reuseReason = `Smart analyze reused the existing graph; ${changedPaths.length} changed ` +
|
|
354
|
+
`file(s) did not affect indexed graph inputs.`;
|
|
355
|
+
log(reuseReason);
|
|
356
|
+
progress('done', 100, 'Existing graph reused');
|
|
357
|
+
return {
|
|
358
|
+
repoName: projectName,
|
|
359
|
+
repoPath,
|
|
360
|
+
stats: reusedMeta.stats ?? {},
|
|
361
|
+
alreadyUpToDate: true,
|
|
362
|
+
reusedExistingIndex: true,
|
|
363
|
+
reuseReason,
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
const preview = graphRelevantChanges.slice(0, 5).map(formatChangeForLog).join(', ');
|
|
367
|
+
const suffix = graphRelevantChanges.length > 5 ? ', ...' : '';
|
|
368
|
+
log(`Smart analyze: ${graphRelevantChanges.length} indexed graph input change(s) require rebuild` +
|
|
369
|
+
(preview ? ` (${preview}${suffix})` : '') +
|
|
370
|
+
'.');
|
|
371
|
+
}
|
|
372
|
+
else {
|
|
373
|
+
log('Smart analyze: could not inspect git diff; rebuilding.');
|
|
374
|
+
}
|
|
375
|
+
}
|
|
152
376
|
if (existingMeta && !schemaUpToDate) {
|
|
153
377
|
log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
|
|
154
378
|
`${INDEX_SCHEMA_VERSION} (FeatureCluster context-pack schema). ` +
|
|
@@ -16,17 +16,6 @@ export interface BM25SearchResult {
|
|
|
16
16
|
rank: number;
|
|
17
17
|
nodeIds?: string[];
|
|
18
18
|
}
|
|
19
|
-
/**
|
|
20
|
-
* Drop all ensured-FTS cache entries for a given repoId.
|
|
21
|
-
*
|
|
22
|
-
* Called from the pool-close listener so that a pool teardown / recreation
|
|
23
|
-
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
24
|
-
* against the fresh connection rather than trust stale ensure-state from a
|
|
25
|
-
* previous pool lifetime.
|
|
26
|
-
*
|
|
27
|
-
* Exported for tests; the listener wiring is internal.
|
|
28
|
-
*/
|
|
29
|
-
export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
|
|
30
19
|
/**
|
|
31
20
|
* Search using LadybugDB's built-in FTS (always fresh, reads from disk)
|
|
32
21
|
*
|