@codragraph/cli 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -7
- package/dist/cli/ai-context.js +297 -0
- package/dist/cli/analyze.d.ts +9 -4
- package/dist/cli/analyze.js +37 -13
- package/dist/cli/index.js +40 -14
- package/dist/cli/status.d.ts +1 -1
- package/dist/cli/status.js +8 -0
- package/dist/cli/tool.d.ts +10 -2
- package/dist/cli/tool.js +100 -39
- package/dist/config/ignore-service.js +1 -0
- package/dist/core/adaptive-profile.d.ts +52 -0
- package/dist/core/adaptive-profile.js +180 -0
- package/dist/core/cgdb/cgdb-adapter.d.ts +34 -5
- package/dist/core/cgdb/cgdb-adapter.js +418 -5
- package/dist/core/cgdb/pool-adapter.js +130 -20
- package/dist/core/ingestion/parsing-processor.js +7 -1
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +7 -1
- package/dist/core/ingestion/pipeline-phases/structure.js +19 -3
- package/dist/core/ingestion/pipeline.d.ts +10 -0
- package/dist/core/ingestion/workers/parse-worker.js +1 -1
- package/dist/core/ingestion/workers/worker-pool.d.ts +14 -1
- package/dist/core/ingestion/workers/worker-pool.js +33 -17
- package/dist/core/run-analyze.d.ts +27 -2
- package/dist/core/run-analyze.js +626 -32
- package/dist/core/search/bm25-index.d.ts +16 -8
- package/dist/core/search/bm25-index.js +72 -110
- package/dist/mcp/local/local-backend.d.ts +2 -0
- package/dist/mcp/local/local-backend.js +241 -21
- package/dist/storage/repo-manager.d.ts +29 -0
- package/dist/web/assets/__vite-browser-external-BIHI7g3E.js +1 -0
- package/dist/web/assets/agent-DcdaQnmu.js +1104 -0
- package/dist/web/assets/architectureDiagram-UL44E2DR-DFSpa3Hb.js +36 -0
- package/dist/web/assets/blockDiagram-7IZFK4PR-DlFaxH1b.js +132 -0
- package/dist/web/assets/{c4Diagram-DFAF54RM-C4Hl3J2U.js → c4Diagram-Y2BXMSZH-BjJ_Yrim.js} +1 -1
- package/dist/web/assets/{chunk-7RZVMHOQ-BitYcNVR.js → chunk-3SSMPTDK-KGZSzG3Y.js} +1 -1
- package/dist/web/assets/{chunk-TBF5ZNIQ-DL5stGM1.js → chunk-6764PJDD-p1sGJgVm.js} +1 -1
- package/dist/web/assets/{chunk-KSICW3F5-BYzvDLNI.js → chunk-AZZRMDJM-DIDkQA4V.js} +1 -1
- package/dist/web/assets/{chunk-AEOMTBSW-BgTIXPsY.js → chunk-JQRUD6KW-DAwg-yCU.js} +1 -1
- package/dist/web/assets/chunk-KRXBNO2N-ChVO_XdS.js +1 -0
- package/dist/web/assets/chunk-LCXTWHL2-DGYdb_Eh.js +231 -0
- package/dist/web/assets/{chunk-O5ABG6QK-dHwHzA6n.js → chunk-LII3EMHJ-Bzh9SNgD.js} +1 -1
- package/dist/web/assets/chunk-RG4AUYOV-Bcl7U_IV.js +206 -0
- package/dist/web/assets/{chunk-TU3PZOEN-RLyvLcv-.js → chunk-T5OCTHI4-CZYMg5sc.js} +1 -1
- package/dist/web/assets/chunk-W44A43WB-REOI67PN.js +13 -0
- package/dist/web/assets/{chunk-RWUO3TPN-BgRTY0_k.js → chunk-ZXARS5L4-BfFdV1tf.js} +1 -1
- package/dist/web/assets/classDiagram-KGZ6W3CR-B-qkKMYi.js +1 -0
- package/dist/web/assets/classDiagram-v2-72OJOZXJ-B-qkKMYi.js +1 -0
- package/dist/web/assets/{cose-bilkent-PNC4W37J-DVhePRYg.js → cose-bilkent-UX7MHV2Q-D6vANJGG.js} +1 -1
- package/dist/web/assets/dagre-ND4H6XIP-BiHe5Lal.js +4 -0
- package/dist/web/assets/diagram-3NCE3AQN-CEutBCOW.js +43 -0
- package/dist/web/assets/diagram-GF46GFSD-CZns6HPQ.js +24 -0
- package/dist/web/assets/diagram-HNR7UZ2L-Vz8fE5of.js +3 -0
- package/dist/web/assets/diagram-QXG6HAR7-D60HKZ_y.js +24 -0
- package/dist/web/assets/diagram-WEQXMOUZ-vGAf1p3E.js +10 -0
- package/dist/web/assets/{erDiagram-GCSMX5X6-C3dhDFA8.js → erDiagram-L5TCEMPS-DZaplJA6.js} +5 -5
- package/dist/web/assets/{flowDiagram-OTCZ4VVT-CWSFWmhr.js → flowDiagram-H6V6AXG4-BqUqeAsI.js} +9 -9
- package/dist/web/assets/ganttDiagram-JCBTUEKG-XEB6H-0G.js +292 -0
- package/dist/web/assets/gitGraphDiagram-S2ZK5IYY-7G50u1Cd.js +106 -0
- package/dist/web/assets/index-B5WxtMpv.js +1415 -0
- package/dist/web/assets/infoDiagram-3YFTVSEB-Cut_rzaf.js +2 -0
- package/dist/web/assets/{ishikawaDiagram-YMYX4NHK-DUoJvNP2.js → ishikawaDiagram-BNXS4ZKH-B4DGfGi3.js} +3 -3
- package/dist/web/assets/{journeyDiagram-SO5T7YLQ-RMFPNNqz.js → journeyDiagram-M6C3CM3L-BBFhsL3E.js} +1 -1
- package/dist/web/assets/{kanban-definition-LJHFXRCJ-BzpDs1K9.js → kanban-definition-75IXJCU3-DarGRyn3.js} +4 -4
- package/dist/web/assets/{katex-GD7MH7QM-DBQvrix-.js → katex-K3KEBU37-W5XTYMhr.js} +1 -1
- package/dist/web/assets/mindmap-definition-2TDM6QVE-BgeczIJM.js +96 -0
- package/dist/web/assets/pieDiagram-CU6KROY3-Kkoo-Noq.js +30 -0
- package/dist/web/assets/quadrantDiagram-VICAPDV7-CDQFeRWN.js +7 -0
- package/dist/web/assets/{requirementDiagram-M5DCFWZL-DLHOVTSv.js → requirementDiagram-JXO7QTGE-Cz9-XnkA.js} +2 -2
- package/dist/web/assets/sankeyDiagram-URQDO5SZ-CU26z0n7.js +40 -0
- package/dist/web/assets/sequenceDiagram-VS2MUI6T-OGK1FLOt.js +162 -0
- package/dist/web/assets/stateDiagram-7D4R322I-DJ9brq0U.js +1 -0
- package/dist/web/assets/stateDiagram-v2-36443NZ5-DhJ4Ky-7.js +1 -0
- package/dist/web/assets/{timeline-definition-5SPVSISX-TRSDRgPw.js → timeline-definition-O6YCAMPW-XZvnjqTT.js} +4 -4
- package/dist/web/assets/{vennDiagram-IE5QUKF5-DNy7HRBM.js → vennDiagram-MWXL3ELB-CJUssEjA.js} +6 -6
- package/dist/web/assets/wardley-L42UT6IY-5TKZOOLJ-DZr11zBG.js +173 -0
- package/dist/web/assets/wardleyDiagram-CUQ6CDDI-C276iqrN.js +78 -0
- package/dist/web/assets/{xychartDiagram-ZHJ5623Y-Dr9r7a35.js → xychartDiagram-N2JHSOCM-B9-uCZyP.js} +4 -4
- package/dist/web/index.html +1 -1
- package/hooks/claude/codragraph-hook.cjs +15 -122
- package/package.json +1 -1
- package/vendor/node_modules/node-addon-api/node_addon_api_except.stamp +0 -0
- package/dist/web/assets/agent-D5lb0zXz.js +0 -1089
- package/dist/web/assets/architectureDiagram-EMZXCZ2Q-CZtc99v_.js +0 -36
- package/dist/web/assets/blockDiagram-IGV67L2C-BtoUp-6Y.js +0 -132
- package/dist/web/assets/chunk-3GS5O3IE-DkUjU0WD.js +0 -231
- package/dist/web/assets/chunk-3YCYZ6SJ-CQkVgT_z.js +0 -1
- package/dist/web/assets/chunk-H3VCZNTA-Cx5XV_aC.js +0 -13
- package/dist/web/assets/chunk-HN6EAY2L-BBnyTNdB.js +0 -1
- package/dist/web/assets/chunk-PK6DOVAG-CvsEnugt.js +0 -206
- package/dist/web/assets/classDiagram-PPOCWD7C-DTr8QIOf.js +0 -1
- package/dist/web/assets/classDiagram-v2-23LJLIIU-DTr8QIOf.js +0 -1
- package/dist/web/assets/dagre-E77IOHMT-Dzx0A6ZU.js +0 -4
- package/dist/web/assets/diagram-H7BISOXX-CC9pRew1.js +0 -43
- package/dist/web/assets/diagram-JC5VWROH-Bau_i9tf.js +0 -24
- package/dist/web/assets/diagram-LXUTUG65-D9_FM2Gt.js +0 -10
- package/dist/web/assets/diagram-WEHSV5V5-BMlayouL.js +0 -24
- package/dist/web/assets/ganttDiagram-MUNLMDZQ-D3a67Yol.js +0 -292
- package/dist/web/assets/gitGraphDiagram-3HKGZ4G3-7jmry-vM.js +0 -106
- package/dist/web/assets/index-BgeqpYgd.js +0 -1415
- package/dist/web/assets/infoDiagram-MN7RKWGX-G7lhP0Ib.js +0 -2
- package/dist/web/assets/mindmap-definition-2EUWGEK5-Bk0O4roa.js +0 -96
- package/dist/web/assets/pieDiagram-3IATQBI2-DKU7kpgS.js +0 -30
- package/dist/web/assets/quadrantDiagram-E256RVCF-BY0TGWCS.js +0 -7
- package/dist/web/assets/sankeyDiagram-L3NBLAOT-DVMj5rX2.js +0 -10
- package/dist/web/assets/sequenceDiagram-ZOUHS735-CJC73bV-.js +0 -157
- package/dist/web/assets/stateDiagram-MLPALWAM-BCFyESls.js +0 -1
- package/dist/web/assets/stateDiagram-v2-B5LQ5ZB2-DahzzIca.js +0 -1
- package/dist/web/assets/wardley-RL74JXVD-BCRCBASE-B-eZEzf9.js +0 -161
- package/dist/web/assets/wardleyDiagram-XU3VSMPF-BP-r1xzR.js +0 -20
|
@@ -16,7 +16,49 @@
|
|
|
16
16
|
*/
|
|
17
17
|
import fs from 'fs/promises';
|
|
18
18
|
import cgdb from '@ladybugdb/core';
|
|
19
|
+
import { NODE_TABLES } from './schema.js';
|
|
19
20
|
const pool = new Map();
|
|
21
|
+
const SIMPLE_LABELLESS_MATCH_RE = /^MATCH\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)/i;
|
|
22
|
+
const CYPHER_LIMIT_RE = /\bLIMIT\s+(\d+)\s*;?\s*$/i;
|
|
23
|
+
const CYPHER_RELATION_RE = /--|-\[|\]-|->|<-/;
|
|
24
|
+
const NATIVE_UNSAFE_NODE_LABELS = new Set(['Union']);
|
|
25
|
+
function quoteKnownNodeLabels(query) {
|
|
26
|
+
return query;
|
|
27
|
+
}
|
|
28
|
+
function getNativeUnsafeNodeLabel(query) {
|
|
29
|
+
const labelRe = /\(\s*[A-Za-z_][A-Za-z0-9_]*\s*:\s*`?([A-Za-z_][A-Za-z0-9_]*)`?(?=[\s){])/g;
|
|
30
|
+
for (const match of query.matchAll(labelRe)) {
|
|
31
|
+
const label = match[1];
|
|
32
|
+
if (NATIVE_UNSAFE_NODE_LABELS.has(label))
|
|
33
|
+
return label;
|
|
34
|
+
}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
function getSimpleLabellessNodeAlias(query) {
|
|
38
|
+
const trimmed = query.trim();
|
|
39
|
+
const match = SIMPLE_LABELLESS_MATCH_RE.exec(trimmed);
|
|
40
|
+
if (!match)
|
|
41
|
+
return null;
|
|
42
|
+
if (CYPHER_RELATION_RE.test(trimmed))
|
|
43
|
+
return null;
|
|
44
|
+
if (/\bMATCH\b/i.test(trimmed.slice(match[0].length)))
|
|
45
|
+
return null;
|
|
46
|
+
return match[1];
|
|
47
|
+
}
|
|
48
|
+
function getCypherLimit(query) {
|
|
49
|
+
const match = CYPHER_LIMIT_RE.exec(query);
|
|
50
|
+
if (!match)
|
|
51
|
+
return null;
|
|
52
|
+
const limit = Number.parseInt(match[1], 10);
|
|
53
|
+
return Number.isFinite(limit) && limit > 0 ? limit : null;
|
|
54
|
+
}
|
|
55
|
+
function withCypherLimit(query, limit) {
|
|
56
|
+
const safeLimit = Math.max(1, Math.trunc(limit));
|
|
57
|
+
if (CYPHER_LIMIT_RE.test(query)) {
|
|
58
|
+
return query.replace(CYPHER_LIMIT_RE, `LIMIT ${safeLimit}`);
|
|
59
|
+
}
|
|
60
|
+
return `${query.replace(/;\s*$/, '')} LIMIT ${safeLimit}`;
|
|
61
|
+
}
|
|
20
62
|
const poolCloseListeners = new Set();
|
|
21
63
|
/**
|
|
22
64
|
* Subscribe to pool-close events. Returns a disposer that removes the
|
|
@@ -463,15 +505,7 @@ function withTimeout(promise, ms, label) {
|
|
|
463
505
|
});
|
|
464
506
|
return Promise.race([promise, timeout]).finally(() => clearTimeout(timer));
|
|
465
507
|
}
|
|
466
|
-
|
|
467
|
-
const entry = pool.get(repoId);
|
|
468
|
-
if (!entry) {
|
|
469
|
-
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
470
|
-
}
|
|
471
|
-
if (isWriteQuery(cypher)) {
|
|
472
|
-
throw new Error('Write operations are not allowed. The pool adapter is read-only.');
|
|
473
|
-
}
|
|
474
|
-
entry.lastUsed = Date.now();
|
|
508
|
+
async function runQueryOnEntry(entry, cypher) {
|
|
475
509
|
const conn = await checkout(entry);
|
|
476
510
|
silenceStdout();
|
|
477
511
|
activeQueryCount++;
|
|
@@ -486,17 +520,8 @@ export const executeQuery = async (repoId, cypher) => {
|
|
|
486
520
|
restoreStdout();
|
|
487
521
|
checkin(entry, conn);
|
|
488
522
|
}
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
* Execute a parameterized query on a specific repo's connection pool.
|
|
492
|
-
* Uses prepare/execute pattern to prevent Cypher injection.
|
|
493
|
-
*/
|
|
494
|
-
export const executeParameterized = async (repoId, cypher, params) => {
|
|
495
|
-
const entry = pool.get(repoId);
|
|
496
|
-
if (!entry) {
|
|
497
|
-
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
498
|
-
}
|
|
499
|
-
entry.lastUsed = Date.now();
|
|
523
|
+
}
|
|
524
|
+
async function runParameterizedOnEntry(entry, cypher, params) {
|
|
500
525
|
const conn = await checkout(entry);
|
|
501
526
|
silenceStdout();
|
|
502
527
|
activeQueryCount++;
|
|
@@ -516,6 +541,91 @@ export const executeParameterized = async (repoId, cypher, params) => {
|
|
|
516
541
|
restoreStdout();
|
|
517
542
|
checkin(entry, conn);
|
|
518
543
|
}
|
|
544
|
+
}
|
|
545
|
+
async function runLabellessNodeScan(query, alias, runner) {
|
|
546
|
+
const limit = getCypherLimit(query) ?? 100;
|
|
547
|
+
const rows = [];
|
|
548
|
+
let lastError = null;
|
|
549
|
+
for (const label of NODE_TABLES) {
|
|
550
|
+
if (NATIVE_UNSAFE_NODE_LABELS.has(label))
|
|
551
|
+
continue;
|
|
552
|
+
if (rows.length >= limit)
|
|
553
|
+
break;
|
|
554
|
+
const labelQuery = withCypherLimit(query.replace(SIMPLE_LABELLESS_MATCH_RE, `MATCH (${alias}:\`${label}\`)`), limit - rows.length);
|
|
555
|
+
try {
|
|
556
|
+
const labelRows = await runner(labelQuery);
|
|
557
|
+
rows.push(...decorateLabellessRows(labelRows, label));
|
|
558
|
+
}
|
|
559
|
+
catch (err) {
|
|
560
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
561
|
+
if (!isBenignLabelScanError(error)) {
|
|
562
|
+
lastError = error;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
if (rows.length === 0 && lastError) {
|
|
567
|
+
throw lastError;
|
|
568
|
+
}
|
|
569
|
+
return rows.slice(0, limit);
|
|
570
|
+
}
|
|
571
|
+
function decorateLabellessRows(rows, label) {
|
|
572
|
+
return rows.map((row) => {
|
|
573
|
+
if (!row || typeof row !== 'object' || Array.isArray(row))
|
|
574
|
+
return row;
|
|
575
|
+
const decorated = { ...row, __cgLabel: label };
|
|
576
|
+
for (const key of ['type', 'kind', 'label']) {
|
|
577
|
+
const value = decorated[key];
|
|
578
|
+
if (value === '' || value === null || value === undefined) {
|
|
579
|
+
decorated[key] = label;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return decorated;
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
function isBenignLabelScanError(error) {
|
|
586
|
+
const message = error.message.toLowerCase();
|
|
587
|
+
return (message.includes('cannot find property') ||
|
|
588
|
+
message.includes('does not have property') ||
|
|
589
|
+
(message.includes('property') && message.includes('not found')));
|
|
590
|
+
}
|
|
591
|
+
export const executeQuery = async (repoId, cypher) => {
|
|
592
|
+
const entry = pool.get(repoId);
|
|
593
|
+
if (!entry) {
|
|
594
|
+
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
595
|
+
}
|
|
596
|
+
const safeCypher = quoteKnownNodeLabels(cypher);
|
|
597
|
+
if (isWriteQuery(safeCypher)) {
|
|
598
|
+
throw new Error('Write operations are not allowed. The pool adapter is read-only.');
|
|
599
|
+
}
|
|
600
|
+
if (getNativeUnsafeNodeLabel(safeCypher)) {
|
|
601
|
+
return [];
|
|
602
|
+
}
|
|
603
|
+
entry.lastUsed = Date.now();
|
|
604
|
+
const labellessAlias = getSimpleLabellessNodeAlias(safeCypher);
|
|
605
|
+
if (labellessAlias) {
|
|
606
|
+
return runLabellessNodeScan(safeCypher, labellessAlias, (labelQuery) => runQueryOnEntry(entry, labelQuery));
|
|
607
|
+
}
|
|
608
|
+
return runQueryOnEntry(entry, safeCypher);
|
|
609
|
+
};
|
|
610
|
+
/**
|
|
611
|
+
* Execute a parameterized query on a specific repo's connection pool.
|
|
612
|
+
* Uses prepare/execute pattern to prevent Cypher injection.
|
|
613
|
+
*/
|
|
614
|
+
export const executeParameterized = async (repoId, cypher, params) => {
|
|
615
|
+
const entry = pool.get(repoId);
|
|
616
|
+
if (!entry) {
|
|
617
|
+
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
618
|
+
}
|
|
619
|
+
entry.lastUsed = Date.now();
|
|
620
|
+
const safeCypher = quoteKnownNodeLabels(cypher);
|
|
621
|
+
if (getNativeUnsafeNodeLabel(safeCypher)) {
|
|
622
|
+
return [];
|
|
623
|
+
}
|
|
624
|
+
const labellessAlias = getSimpleLabellessNodeAlias(safeCypher);
|
|
625
|
+
if (labellessAlias) {
|
|
626
|
+
return runLabellessNodeScan(safeCypher, labellessAlias, (labelQuery) => runParameterizedOnEntry(entry, labelQuery, params));
|
|
627
|
+
}
|
|
628
|
+
return runParameterizedOnEntry(entry, safeCypher, params);
|
|
519
629
|
};
|
|
520
630
|
/**
|
|
521
631
|
* Close one or all repo pools.
|
|
@@ -567,7 +567,13 @@ scopeTreeCache, onFileProgress, workerPool) => {
|
|
|
567
567
|
return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
|
|
568
568
|
}
|
|
569
569
|
catch (err) {
|
|
570
|
-
console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
570
|
+
console.warn('Worker pool parsing failed for this chunk, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
571
|
+
try {
|
|
572
|
+
await workerPool.terminate();
|
|
573
|
+
}
|
|
574
|
+
catch (terminateErr) {
|
|
575
|
+
console.warn('Worker pool termination after parsing failure failed:', terminateErr instanceof Error ? terminateErr.message : terminateErr);
|
|
576
|
+
}
|
|
571
577
|
}
|
|
572
578
|
}
|
|
573
579
|
// Fallback: sequential parsing (no pre-extracted data)
|
|
@@ -119,7 +119,9 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
119
119
|
workerUrl = pathToFileURL(distWorker);
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
|
-
workerPool = createWorkerPool(workerUrl
|
|
122
|
+
workerPool = createWorkerPool(workerUrl, options?.workerPoolSize, {
|
|
123
|
+
subBatchSize: options?.workerSubBatchSize,
|
|
124
|
+
});
|
|
123
125
|
}
|
|
124
126
|
catch (err) {
|
|
125
127
|
console.warn('Worker pool creation failed, using sequential fallback:', err.message);
|
|
@@ -170,6 +172,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
170
172
|
const chunkFiles = chunkPaths
|
|
171
173
|
.filter((p) => chunkContents.has(p))
|
|
172
174
|
.map((p) => ({ path: p, content: chunkContents.get(p) }));
|
|
175
|
+
const usedWorkerPoolForChunk = workerPool !== undefined;
|
|
173
176
|
const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, (current, _total, filePath) => {
|
|
174
177
|
const globalCurrent = filesParsedSoFar + current;
|
|
175
178
|
const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
|
|
@@ -185,6 +188,9 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
185
188
|
},
|
|
186
189
|
});
|
|
187
190
|
}, workerPool);
|
|
191
|
+
if (usedWorkerPoolForChunk && !chunkWorkerData) {
|
|
192
|
+
workerPool = undefined;
|
|
193
|
+
}
|
|
188
194
|
const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
|
|
189
195
|
if (chunkWorkerData) {
|
|
190
196
|
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
|
|
@@ -20,16 +20,32 @@ export const structurePhase = {
|
|
|
20
20
|
message: 'Analyzing project structure...',
|
|
21
21
|
stats: { filesProcessed: 0, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
22
22
|
});
|
|
23
|
-
|
|
23
|
+
const focusSet = ctx.options?.focusPaths
|
|
24
|
+
? new Set(ctx.options.focusPaths.map((p) => p.replace(/\\/g, '/')))
|
|
25
|
+
: null;
|
|
26
|
+
const graphPaths = focusSet ? allPaths.filter((p) => focusSet.has(p)) : allPaths;
|
|
27
|
+
processStructure(ctx.graph, graphPaths);
|
|
24
28
|
ctx.onProgress({
|
|
25
29
|
phase: 'structure',
|
|
26
30
|
percent: 20,
|
|
27
31
|
message: 'Project structure analyzed',
|
|
28
|
-
stats: {
|
|
32
|
+
stats: {
|
|
33
|
+
filesProcessed: graphPaths.length,
|
|
34
|
+
totalFiles: focusSet ? graphPaths.length : totalFiles,
|
|
35
|
+
nodesCreated: ctx.graph.nodeCount,
|
|
36
|
+
},
|
|
29
37
|
});
|
|
30
38
|
// Build the set once here so cobol, markdown, and cross-file propagation
|
|
31
39
|
// can all reuse it instead of re-materializing `new Set(allPaths)` each.
|
|
32
40
|
const allPathSet = new Set(allPaths);
|
|
33
|
-
|
|
41
|
+
const focusedScannedFiles = focusSet
|
|
42
|
+
? scannedFiles.filter((f) => focusSet.has(f.path))
|
|
43
|
+
: scannedFiles;
|
|
44
|
+
return {
|
|
45
|
+
scannedFiles: focusedScannedFiles,
|
|
46
|
+
allPaths,
|
|
47
|
+
allPathSet,
|
|
48
|
+
totalFiles: focusSet ? focusedScannedFiles.length : totalFiles,
|
|
49
|
+
};
|
|
34
50
|
},
|
|
35
51
|
};
|
|
@@ -25,6 +25,16 @@ export interface PipelineOptions {
|
|
|
25
25
|
featureClusterRepo?: string;
|
|
26
26
|
/** Indexed source commit written onto FeatureCluster metadata. */
|
|
27
27
|
lastIndexedCommit?: string;
|
|
28
|
+
/**
|
|
29
|
+
* Optional focused indexing set for incremental file analysis. The walker
|
|
30
|
+
* still scans the repo so import resolution sees every path, but graph
|
|
31
|
+
* writes and content parsing are limited to these relative paths.
|
|
32
|
+
*/
|
|
33
|
+
focusPaths?: readonly string[];
|
|
34
|
+
/** Adaptive analyze worker pool size. */
|
|
35
|
+
workerPoolSize?: number;
|
|
36
|
+
/** Adaptive analyze max files per worker message. */
|
|
37
|
+
workerSubBatchSize?: number;
|
|
28
38
|
/**
|
|
29
39
|
* @internal Test-only override for worker-pool gating thresholds.
|
|
30
40
|
* When unset, production defaults apply (15 files OR 512 KB total bytes).
|
|
@@ -452,7 +452,7 @@ const processBatch = (files, onProgress) => {
|
|
|
452
452
|
}
|
|
453
453
|
let totalProcessed = 0;
|
|
454
454
|
let lastReported = 0;
|
|
455
|
-
const PROGRESS_INTERVAL =
|
|
455
|
+
const PROGRESS_INTERVAL = 25; // report often enough to keep worker idle timers fresh
|
|
456
456
|
const onFileProcessed = onProgress
|
|
457
457
|
? () => {
|
|
458
458
|
totalProcessed++;
|
|
@@ -10,7 +10,20 @@ export interface WorkerPool {
|
|
|
10
10
|
/** Number of workers in the pool */
|
|
11
11
|
readonly size: number;
|
|
12
12
|
}
|
|
13
|
+
export interface WorkerPoolOptions {
|
|
14
|
+
/**
|
|
15
|
+
* Max files to send to a worker in one postMessage. Lower values reduce
|
|
16
|
+
* structured-clone memory spikes and give the main thread more chances to
|
|
17
|
+
* observe progress on large repos.
|
|
18
|
+
*/
|
|
19
|
+
subBatchSize?: number;
|
|
20
|
+
/**
|
|
21
|
+
* Idle timeout while waiting for a worker response. Reset by worker progress
|
|
22
|
+
* so slow-but-moving chunks do not get retried sequentially.
|
|
23
|
+
*/
|
|
24
|
+
subBatchIdleTimeoutMs?: number;
|
|
25
|
+
}
|
|
13
26
|
/**
|
|
14
27
|
* Create a pool of worker threads.
|
|
15
28
|
*/
|
|
16
|
-
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;
|
|
29
|
+
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
|
|
@@ -6,14 +6,24 @@ import { fileURLToPath } from 'node:url';
|
|
|
6
6
|
* Max files to send to a worker in a single postMessage.
|
|
7
7
|
* Keeps structured-clone memory bounded per sub-batch.
|
|
8
8
|
*/
|
|
9
|
-
const
|
|
10
|
-
/**
|
|
11
|
-
*
|
|
12
|
-
|
|
9
|
+
const DEFAULT_SUB_BATCH_SIZE = 250;
|
|
10
|
+
/**
|
|
11
|
+
* Idle timeout while waiting for a worker response. This is not a wall-clock
|
|
12
|
+
* limit: worker progress resets it. Large repos can legitimately need more
|
|
13
|
+
* than 30s for a chunk, but a wedged parser should still fall back.
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS = 120_000;
|
|
16
|
+
const positiveIntFromEnv = (name, fallback) => {
|
|
17
|
+
const raw = process.env[name];
|
|
18
|
+
if (!raw)
|
|
19
|
+
return fallback;
|
|
20
|
+
const parsed = Number.parseInt(raw, 10);
|
|
21
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
22
|
+
};
|
|
13
23
|
/**
|
|
14
24
|
* Create a pool of worker threads.
|
|
15
25
|
*/
|
|
16
|
-
export const createWorkerPool = (workerUrl, poolSize) => {
|
|
26
|
+
export const createWorkerPool = (workerUrl, poolSize, options = {}) => {
|
|
17
27
|
// Validate worker script exists before spawning to prevent uncaught
|
|
18
28
|
// MODULE_NOT_FOUND crashes in worker threads (e.g. when running from src/ via vitest)
|
|
19
29
|
const workerPath = fileURLToPath(workerUrl);
|
|
@@ -21,6 +31,10 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
21
31
|
throw new Error(`Worker script not found: ${workerPath}`);
|
|
22
32
|
}
|
|
23
33
|
const size = poolSize ?? Math.min(8, Math.max(1, os.cpus().length - 1));
|
|
34
|
+
const subBatchSize = options.subBatchSize ??
|
|
35
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_SUB_BATCH_SIZE', DEFAULT_SUB_BATCH_SIZE);
|
|
36
|
+
const subBatchIdleTimeoutMs = options.subBatchIdleTimeoutMs ??
|
|
37
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_IDLE_TIMEOUT_MS', DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS);
|
|
24
38
|
const workers = [];
|
|
25
39
|
for (let i = 0; i < size; i++) {
|
|
26
40
|
workers.push(new Worker(workerUrl));
|
|
@@ -38,41 +52,43 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
38
52
|
const worker = workers[i];
|
|
39
53
|
return new Promise((resolve, reject) => {
|
|
40
54
|
let settled = false;
|
|
41
|
-
let
|
|
55
|
+
let workerIdleTimer = null;
|
|
42
56
|
const cleanup = () => {
|
|
43
|
-
if (
|
|
44
|
-
clearTimeout(
|
|
57
|
+
if (workerIdleTimer)
|
|
58
|
+
clearTimeout(workerIdleTimer);
|
|
45
59
|
worker.removeListener('message', handler);
|
|
46
60
|
worker.removeListener('error', errorHandler);
|
|
47
61
|
worker.removeListener('exit', exitHandler);
|
|
48
62
|
};
|
|
49
|
-
const
|
|
50
|
-
if (
|
|
51
|
-
clearTimeout(
|
|
52
|
-
|
|
63
|
+
const resetWorkerIdleTimer = () => {
|
|
64
|
+
if (workerIdleTimer)
|
|
65
|
+
clearTimeout(workerIdleTimer);
|
|
66
|
+
workerIdleTimer = setTimeout(() => {
|
|
53
67
|
if (!settled) {
|
|
54
68
|
settled = true;
|
|
55
69
|
cleanup();
|
|
56
|
-
reject(new Error(`Worker ${i}
|
|
70
|
+
reject(new Error(`Worker ${i} was idle for ${subBatchIdleTimeoutMs / 1000}s while waiting for a response (chunk: ${chunk.length} items).`));
|
|
57
71
|
}
|
|
58
|
-
},
|
|
72
|
+
}, subBatchIdleTimeoutMs);
|
|
59
73
|
};
|
|
60
74
|
let subBatchIdx = 0;
|
|
61
75
|
const sendNextSubBatch = () => {
|
|
62
|
-
const start = subBatchIdx *
|
|
76
|
+
const start = subBatchIdx * subBatchSize;
|
|
63
77
|
if (start >= chunk.length) {
|
|
78
|
+
resetWorkerIdleTimer();
|
|
64
79
|
worker.postMessage({ type: 'flush' });
|
|
65
80
|
return;
|
|
66
81
|
}
|
|
67
|
-
const subBatch = chunk.slice(start, start +
|
|
82
|
+
const subBatch = chunk.slice(start, start + subBatchSize);
|
|
68
83
|
subBatchIdx++;
|
|
69
|
-
|
|
84
|
+
resetWorkerIdleTimer();
|
|
70
85
|
worker.postMessage({ type: 'sub-batch', files: subBatch });
|
|
71
86
|
};
|
|
72
87
|
const handler = (msg) => {
|
|
73
88
|
if (settled)
|
|
74
89
|
return;
|
|
75
90
|
if (msg.type === 'progress') {
|
|
91
|
+
resetWorkerIdleTimer();
|
|
76
92
|
workerProgress[i] = msg.filesProcessed;
|
|
77
93
|
if (onProgress) {
|
|
78
94
|
const total = workerProgress.reduce((a, b) => a + b, 0);
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { type RepoMeta } from '../storage/repo-manager.js';
|
|
12
12
|
import type { ContentEncoding } from '@codragraph/graphstore';
|
|
13
|
+
import { type AnalyzeProfileOption, type CompressionOption, type EmbeddingMode } from './adaptive-profile.js';
|
|
13
14
|
export interface AnalyzeCallbacks {
|
|
14
15
|
onProgress: (phase: string, percent: number, message: string) => void;
|
|
15
16
|
onLog?: (message: string) => void;
|
|
@@ -23,6 +24,8 @@ export interface AnalyzeOptions {
|
|
|
23
24
|
*/
|
|
24
25
|
force?: boolean;
|
|
25
26
|
embeddings?: boolean;
|
|
27
|
+
profile?: AnalyzeProfileOption;
|
|
28
|
+
embeddingMode?: EmbeddingMode;
|
|
26
29
|
skipGit?: boolean;
|
|
27
30
|
/** Skip AGENTS.md and CLAUDE.md codragraph block updates. */
|
|
28
31
|
skipAgentsMd?: boolean;
|
|
@@ -54,7 +57,7 @@ export interface AnalyzeOptions {
|
|
|
54
57
|
* that wrote the rows). Readers on older Node will get a clear
|
|
55
58
|
* forward-compat error rather than silently bad content.
|
|
56
59
|
*/
|
|
57
|
-
compress?:
|
|
60
|
+
compress?: CompressionOption;
|
|
58
61
|
}
|
|
59
62
|
export interface AnalyzeResult {
|
|
60
63
|
repoName: string;
|
|
@@ -91,7 +94,29 @@ export declare const isGeneratedAgentContextPath: (filePath: string) => boolean;
|
|
|
91
94
|
export declare const isGraphContentPath: (filePath: string) => boolean;
|
|
92
95
|
export declare const changedPathAffectsGraph: (change: AnalyzeChangedPath) => boolean;
|
|
93
96
|
export declare const getGraphRelevantChangedPaths: (changes: readonly AnalyzeChangedPath[]) => AnalyzeChangedPath[];
|
|
94
|
-
export
|
|
97
|
+
export interface IncrementalFilePatchPlan {
|
|
98
|
+
eligible: boolean;
|
|
99
|
+
reason: string;
|
|
100
|
+
replacePaths: string[];
|
|
101
|
+
currentPaths: string[];
|
|
102
|
+
fileCountDelta: number;
|
|
103
|
+
/**
|
|
104
|
+
* True when a change can affect resolver/global structure broadly
|
|
105
|
+
* (config/ignore/package). The incremental path still avoids deleting the
|
|
106
|
+
* whole DB, but it replaces every file-scoped node from a fresh full scan.
|
|
107
|
+
*/
|
|
108
|
+
replaceAllFileScoped: boolean;
|
|
109
|
+
/** Old path -> new path aliases used to reconnect external edges across renames. */
|
|
110
|
+
pathAliases: Record<string, string>;
|
|
111
|
+
}
|
|
112
|
+
export declare const isPatchableIncrementalPath: (filePath: string) => boolean;
|
|
113
|
+
export declare const buildIncrementalFilePatchPlan: (changes: readonly AnalyzeChangedPath[], _options?: {
|
|
114
|
+
limit?: number;
|
|
115
|
+
}) => IncrementalFilePatchPlan;
|
|
116
|
+
export declare const getAnalyzeConfigRebuildReason: (existingMeta: Pick<RepoMeta, "compress" | "searchIndexes" | "stats">, options: {
|
|
117
|
+
compress?: ContentEncoding;
|
|
118
|
+
embeddings?: boolean;
|
|
119
|
+
}) => string | null;
|
|
95
120
|
/**
|
|
96
121
|
* Run the full CodraGraph analysis pipeline.
|
|
97
122
|
*
|