@codragraph/cli 2.1.4 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -7
- package/dist/cli/ai-context.js +297 -0
- package/dist/cli/index.js +31 -11
- package/dist/cli/tool.js +73 -33
- package/dist/config/ignore-service.js +1 -0
- package/dist/core/cgdb/pool-adapter.js +130 -20
- package/dist/core/ingestion/parsing-processor.js +7 -1
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +4 -0
- package/dist/core/ingestion/workers/parse-worker.js +1 -1
- package/dist/core/ingestion/workers/worker-pool.d.ts +14 -1
- package/dist/core/ingestion/workers/worker-pool.js +33 -17
- package/dist/core/run-analyze.js +28 -5
- package/dist/core/search/bm25-index.d.ts +0 -11
- package/dist/core/search/bm25-index.js +7 -84
- package/dist/mcp/local/local-backend.d.ts +2 -0
- package/dist/mcp/local/local-backend.js +235 -18
- package/hooks/claude/codragraph-hook.cjs +15 -122
- package/package.json +1 -1
package/dist/cli/tool.js
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
import { writeSync } from 'node:fs';
|
|
18
18
|
import { LocalBackend } from '../mcp/local/local-backend.js';
|
|
19
19
|
import { emitTokenStats } from './compress-stats.js';
|
|
20
|
+
import { findRepo } from '../storage/repo-manager.js';
|
|
20
21
|
let _backend = null;
|
|
21
22
|
async function getBackend() {
|
|
22
23
|
if (_backend)
|
|
@@ -29,6 +30,21 @@ async function getBackend() {
|
|
|
29
30
|
}
|
|
30
31
|
return _backend;
|
|
31
32
|
}
|
|
33
|
+
async function callToolOnce(toolName, params) {
|
|
34
|
+
const backend = await getBackend();
|
|
35
|
+
return backend.callTool(toolName, params);
|
|
36
|
+
}
|
|
37
|
+
async function resolveCliRepoParam(repoParam) {
|
|
38
|
+
if (repoParam)
|
|
39
|
+
return repoParam;
|
|
40
|
+
const currentRepo = await findRepo(process.cwd());
|
|
41
|
+
if (currentRepo)
|
|
42
|
+
return currentRepo.repoPath;
|
|
43
|
+
output(`Error: Current repository is not indexed: ${process.cwd()}\n` +
|
|
44
|
+
'Run: npx @codragraph/cli analyze');
|
|
45
|
+
process.exitCode = 1;
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
32
48
|
/**
|
|
33
49
|
* Write tool output to stdout using low-level fd write.
|
|
34
50
|
*
|
|
@@ -59,14 +75,16 @@ export async function queryCommand(queryText, options) {
|
|
|
59
75
|
console.error('Usage: codragraph query <search_query>');
|
|
60
76
|
process.exit(1);
|
|
61
77
|
}
|
|
62
|
-
const
|
|
63
|
-
|
|
78
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
79
|
+
if (!repo)
|
|
80
|
+
return;
|
|
81
|
+
const result = await callToolOnce('query', {
|
|
64
82
|
query: queryText,
|
|
65
83
|
task_context: options?.context,
|
|
66
84
|
goal: options?.goal,
|
|
67
85
|
limit: options?.limit ? parseInt(options.limit) : undefined,
|
|
68
86
|
include_content: options?.content ?? false,
|
|
69
|
-
repo
|
|
87
|
+
repo,
|
|
70
88
|
});
|
|
71
89
|
output(result);
|
|
72
90
|
emitTokenStats(result);
|
|
@@ -76,13 +94,15 @@ export async function contextCommand(name, options) {
|
|
|
76
94
|
console.error('Usage: codragraph context <symbol_name> [--uid <uid>] [--file <path>]');
|
|
77
95
|
process.exit(1);
|
|
78
96
|
}
|
|
79
|
-
const
|
|
80
|
-
|
|
97
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
98
|
+
if (!repo)
|
|
99
|
+
return;
|
|
100
|
+
const result = await callToolOnce('context', {
|
|
81
101
|
name: name || undefined,
|
|
82
102
|
uid: options?.uid,
|
|
83
103
|
file_path: options?.file,
|
|
84
104
|
include_content: options?.content ?? false,
|
|
85
|
-
repo
|
|
105
|
+
repo,
|
|
86
106
|
});
|
|
87
107
|
output(result);
|
|
88
108
|
emitTokenStats(result);
|
|
@@ -93,13 +113,15 @@ export async function impactCommand(target, options) {
|
|
|
93
113
|
process.exit(1);
|
|
94
114
|
}
|
|
95
115
|
try {
|
|
96
|
-
const
|
|
97
|
-
|
|
116
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
117
|
+
if (!repo)
|
|
118
|
+
return;
|
|
119
|
+
const result = await callToolOnce('impact', {
|
|
98
120
|
target,
|
|
99
121
|
direction: options?.direction || 'upstream',
|
|
100
122
|
maxDepth: options?.depth ? parseInt(options.depth, 10) : undefined,
|
|
101
123
|
includeTests: options?.includeTests ?? false,
|
|
102
|
-
repo
|
|
124
|
+
repo,
|
|
103
125
|
});
|
|
104
126
|
output(result);
|
|
105
127
|
emitTokenStats(result);
|
|
@@ -121,26 +143,32 @@ export async function cypherCommand(query, options) {
|
|
|
121
143
|
console.error('Usage: codragraph cypher <cypher_query>');
|
|
122
144
|
process.exit(1);
|
|
123
145
|
}
|
|
124
|
-
const
|
|
125
|
-
|
|
146
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
147
|
+
if (!repo)
|
|
148
|
+
return;
|
|
149
|
+
const result = await callToolOnce('cypher', {
|
|
126
150
|
query,
|
|
127
|
-
repo
|
|
151
|
+
repo,
|
|
128
152
|
});
|
|
129
153
|
output(result);
|
|
130
154
|
}
|
|
131
155
|
export async function featureClustersCommand(options) {
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
|
|
156
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
157
|
+
if (!repo)
|
|
158
|
+
return;
|
|
159
|
+
const result = await callToolOnce('feature_clusters', {
|
|
160
|
+
repo,
|
|
135
161
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
136
162
|
});
|
|
137
163
|
output(result);
|
|
138
164
|
}
|
|
139
165
|
export async function clusterQueryCommand(query, options) {
|
|
140
|
-
const
|
|
141
|
-
|
|
166
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
167
|
+
if (!repo)
|
|
168
|
+
return;
|
|
169
|
+
const result = await callToolOnce('cluster_query', {
|
|
142
170
|
query,
|
|
143
|
-
repo
|
|
171
|
+
repo,
|
|
144
172
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
145
173
|
});
|
|
146
174
|
output(result);
|
|
@@ -150,10 +178,12 @@ export async function featureContextCommand(name, options) {
|
|
|
150
178
|
console.error('Usage: codragraph feature-context <name>');
|
|
151
179
|
process.exit(1);
|
|
152
180
|
}
|
|
153
|
-
const
|
|
154
|
-
|
|
181
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
182
|
+
if (!repo)
|
|
183
|
+
return;
|
|
184
|
+
const result = await callToolOnce('feature_context', {
|
|
155
185
|
name,
|
|
156
|
-
repo
|
|
186
|
+
repo,
|
|
157
187
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
158
188
|
});
|
|
159
189
|
output(result);
|
|
@@ -164,10 +194,12 @@ export async function clusterContextCommand(name, options) {
|
|
|
164
194
|
console.error('Usage: codragraph cluster-context <name>');
|
|
165
195
|
process.exit(1);
|
|
166
196
|
}
|
|
167
|
-
const
|
|
168
|
-
|
|
197
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
198
|
+
if (!repo)
|
|
199
|
+
return;
|
|
200
|
+
const result = await callToolOnce('cluster_context', {
|
|
169
201
|
name,
|
|
170
|
-
repo
|
|
202
|
+
repo,
|
|
171
203
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
172
204
|
});
|
|
173
205
|
output(result);
|
|
@@ -178,10 +210,12 @@ export async function contextPackCommand(name, options) {
|
|
|
178
210
|
console.error('Usage: codragraph context-pack <name>');
|
|
179
211
|
process.exit(1);
|
|
180
212
|
}
|
|
181
|
-
const
|
|
182
|
-
|
|
213
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
214
|
+
if (!repo)
|
|
215
|
+
return;
|
|
216
|
+
const result = await callToolOnce('context_pack', {
|
|
183
217
|
name,
|
|
184
|
-
repo
|
|
218
|
+
repo,
|
|
185
219
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
186
220
|
});
|
|
187
221
|
output(result);
|
|
@@ -192,11 +226,13 @@ export async function clusterImpactCommand(name, options) {
|
|
|
192
226
|
console.error('Usage: codragraph cluster-impact <name>');
|
|
193
227
|
process.exit(1);
|
|
194
228
|
}
|
|
195
|
-
const
|
|
196
|
-
|
|
229
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
230
|
+
if (!repo)
|
|
231
|
+
return;
|
|
232
|
+
const result = await callToolOnce('cluster_impact', {
|
|
197
233
|
name,
|
|
198
234
|
direction: options?.direction,
|
|
199
|
-
repo
|
|
235
|
+
repo,
|
|
200
236
|
limit: options?.limit ? parseInt(options.limit, 10) : undefined,
|
|
201
237
|
});
|
|
202
238
|
output(result);
|
|
@@ -236,11 +272,15 @@ function formatDetectChangesResult(result) {
|
|
|
236
272
|
return lines.join('\n').trim();
|
|
237
273
|
}
|
|
238
274
|
export async function detectChangesCommand(options) {
|
|
239
|
-
const
|
|
240
|
-
|
|
275
|
+
const repo = await resolveCliRepoParam(options?.repo);
|
|
276
|
+
if (!repo)
|
|
277
|
+
return;
|
|
278
|
+
const result = await callToolOnce('detect_changes', {
|
|
241
279
|
scope: options?.scope || 'unstaged',
|
|
242
280
|
base_ref: options?.baseRef,
|
|
243
|
-
repo
|
|
281
|
+
repo,
|
|
244
282
|
});
|
|
245
283
|
output(formatDetectChangesResult(result));
|
|
284
|
+
if (result?.error)
|
|
285
|
+
process.exitCode = 1;
|
|
246
286
|
}
|
|
@@ -16,7 +16,49 @@
|
|
|
16
16
|
*/
|
|
17
17
|
import fs from 'fs/promises';
|
|
18
18
|
import cgdb from '@ladybugdb/core';
|
|
19
|
+
import { NODE_TABLES } from './schema.js';
|
|
19
20
|
const pool = new Map();
|
|
21
|
+
const SIMPLE_LABELLESS_MATCH_RE = /^MATCH\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)/i;
|
|
22
|
+
const CYPHER_LIMIT_RE = /\bLIMIT\s+(\d+)\s*;?\s*$/i;
|
|
23
|
+
const CYPHER_RELATION_RE = /--|-\[|\]-|->|<-/;
|
|
24
|
+
const NATIVE_UNSAFE_NODE_LABELS = new Set(['Union']);
|
|
25
|
+
function quoteKnownNodeLabels(query) {
|
|
26
|
+
return query;
|
|
27
|
+
}
|
|
28
|
+
function getNativeUnsafeNodeLabel(query) {
|
|
29
|
+
const labelRe = /\(\s*[A-Za-z_][A-Za-z0-9_]*\s*:\s*`?([A-Za-z_][A-Za-z0-9_]*)`?(?=[\s){])/g;
|
|
30
|
+
for (const match of query.matchAll(labelRe)) {
|
|
31
|
+
const label = match[1];
|
|
32
|
+
if (NATIVE_UNSAFE_NODE_LABELS.has(label))
|
|
33
|
+
return label;
|
|
34
|
+
}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
function getSimpleLabellessNodeAlias(query) {
|
|
38
|
+
const trimmed = query.trim();
|
|
39
|
+
const match = SIMPLE_LABELLESS_MATCH_RE.exec(trimmed);
|
|
40
|
+
if (!match)
|
|
41
|
+
return null;
|
|
42
|
+
if (CYPHER_RELATION_RE.test(trimmed))
|
|
43
|
+
return null;
|
|
44
|
+
if (/\bMATCH\b/i.test(trimmed.slice(match[0].length)))
|
|
45
|
+
return null;
|
|
46
|
+
return match[1];
|
|
47
|
+
}
|
|
48
|
+
function getCypherLimit(query) {
|
|
49
|
+
const match = CYPHER_LIMIT_RE.exec(query);
|
|
50
|
+
if (!match)
|
|
51
|
+
return null;
|
|
52
|
+
const limit = Number.parseInt(match[1], 10);
|
|
53
|
+
return Number.isFinite(limit) && limit > 0 ? limit : null;
|
|
54
|
+
}
|
|
55
|
+
function withCypherLimit(query, limit) {
|
|
56
|
+
const safeLimit = Math.max(1, Math.trunc(limit));
|
|
57
|
+
if (CYPHER_LIMIT_RE.test(query)) {
|
|
58
|
+
return query.replace(CYPHER_LIMIT_RE, `LIMIT ${safeLimit}`);
|
|
59
|
+
}
|
|
60
|
+
return `${query.replace(/;\s*$/, '')} LIMIT ${safeLimit}`;
|
|
61
|
+
}
|
|
20
62
|
const poolCloseListeners = new Set();
|
|
21
63
|
/**
|
|
22
64
|
* Subscribe to pool-close events. Returns a disposer that removes the
|
|
@@ -463,15 +505,7 @@ function withTimeout(promise, ms, label) {
|
|
|
463
505
|
});
|
|
464
506
|
return Promise.race([promise, timeout]).finally(() => clearTimeout(timer));
|
|
465
507
|
}
|
|
466
|
-
|
|
467
|
-
const entry = pool.get(repoId);
|
|
468
|
-
if (!entry) {
|
|
469
|
-
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
470
|
-
}
|
|
471
|
-
if (isWriteQuery(cypher)) {
|
|
472
|
-
throw new Error('Write operations are not allowed. The pool adapter is read-only.');
|
|
473
|
-
}
|
|
474
|
-
entry.lastUsed = Date.now();
|
|
508
|
+
async function runQueryOnEntry(entry, cypher) {
|
|
475
509
|
const conn = await checkout(entry);
|
|
476
510
|
silenceStdout();
|
|
477
511
|
activeQueryCount++;
|
|
@@ -486,17 +520,8 @@ export const executeQuery = async (repoId, cypher) => {
|
|
|
486
520
|
restoreStdout();
|
|
487
521
|
checkin(entry, conn);
|
|
488
522
|
}
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
* Execute a parameterized query on a specific repo's connection pool.
|
|
492
|
-
* Uses prepare/execute pattern to prevent Cypher injection.
|
|
493
|
-
*/
|
|
494
|
-
export const executeParameterized = async (repoId, cypher, params) => {
|
|
495
|
-
const entry = pool.get(repoId);
|
|
496
|
-
if (!entry) {
|
|
497
|
-
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
498
|
-
}
|
|
499
|
-
entry.lastUsed = Date.now();
|
|
523
|
+
}
|
|
524
|
+
async function runParameterizedOnEntry(entry, cypher, params) {
|
|
500
525
|
const conn = await checkout(entry);
|
|
501
526
|
silenceStdout();
|
|
502
527
|
activeQueryCount++;
|
|
@@ -516,6 +541,91 @@ export const executeParameterized = async (repoId, cypher, params) => {
|
|
|
516
541
|
restoreStdout();
|
|
517
542
|
checkin(entry, conn);
|
|
518
543
|
}
|
|
544
|
+
}
|
|
545
|
+
async function runLabellessNodeScan(query, alias, runner) {
|
|
546
|
+
const limit = getCypherLimit(query) ?? 100;
|
|
547
|
+
const rows = [];
|
|
548
|
+
let lastError = null;
|
|
549
|
+
for (const label of NODE_TABLES) {
|
|
550
|
+
if (NATIVE_UNSAFE_NODE_LABELS.has(label))
|
|
551
|
+
continue;
|
|
552
|
+
if (rows.length >= limit)
|
|
553
|
+
break;
|
|
554
|
+
const labelQuery = withCypherLimit(query.replace(SIMPLE_LABELLESS_MATCH_RE, `MATCH (${alias}:\`${label}\`)`), limit - rows.length);
|
|
555
|
+
try {
|
|
556
|
+
const labelRows = await runner(labelQuery);
|
|
557
|
+
rows.push(...decorateLabellessRows(labelRows, label));
|
|
558
|
+
}
|
|
559
|
+
catch (err) {
|
|
560
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
561
|
+
if (!isBenignLabelScanError(error)) {
|
|
562
|
+
lastError = error;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
if (rows.length === 0 && lastError) {
|
|
567
|
+
throw lastError;
|
|
568
|
+
}
|
|
569
|
+
return rows.slice(0, limit);
|
|
570
|
+
}
|
|
571
|
+
function decorateLabellessRows(rows, label) {
|
|
572
|
+
return rows.map((row) => {
|
|
573
|
+
if (!row || typeof row !== 'object' || Array.isArray(row))
|
|
574
|
+
return row;
|
|
575
|
+
const decorated = { ...row, __cgLabel: label };
|
|
576
|
+
for (const key of ['type', 'kind', 'label']) {
|
|
577
|
+
const value = decorated[key];
|
|
578
|
+
if (value === '' || value === null || value === undefined) {
|
|
579
|
+
decorated[key] = label;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return decorated;
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
function isBenignLabelScanError(error) {
|
|
586
|
+
const message = error.message.toLowerCase();
|
|
587
|
+
return (message.includes('cannot find property') ||
|
|
588
|
+
message.includes('does not have property') ||
|
|
589
|
+
message.includes('property') && message.includes('not found'));
|
|
590
|
+
}
|
|
591
|
+
export const executeQuery = async (repoId, cypher) => {
|
|
592
|
+
const entry = pool.get(repoId);
|
|
593
|
+
if (!entry) {
|
|
594
|
+
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
595
|
+
}
|
|
596
|
+
const safeCypher = quoteKnownNodeLabels(cypher);
|
|
597
|
+
if (isWriteQuery(safeCypher)) {
|
|
598
|
+
throw new Error('Write operations are not allowed. The pool adapter is read-only.');
|
|
599
|
+
}
|
|
600
|
+
if (getNativeUnsafeNodeLabel(safeCypher)) {
|
|
601
|
+
return [];
|
|
602
|
+
}
|
|
603
|
+
entry.lastUsed = Date.now();
|
|
604
|
+
const labellessAlias = getSimpleLabellessNodeAlias(safeCypher);
|
|
605
|
+
if (labellessAlias) {
|
|
606
|
+
return runLabellessNodeScan(safeCypher, labellessAlias, (labelQuery) => runQueryOnEntry(entry, labelQuery));
|
|
607
|
+
}
|
|
608
|
+
return runQueryOnEntry(entry, safeCypher);
|
|
609
|
+
};
|
|
610
|
+
/**
|
|
611
|
+
* Execute a parameterized query on a specific repo's connection pool.
|
|
612
|
+
* Uses prepare/execute pattern to prevent Cypher injection.
|
|
613
|
+
*/
|
|
614
|
+
export const executeParameterized = async (repoId, cypher, params) => {
|
|
615
|
+
const entry = pool.get(repoId);
|
|
616
|
+
if (!entry) {
|
|
617
|
+
throw new Error(`LadybugDB not initialized for repo "${repoId}". Call initCgdb first.`);
|
|
618
|
+
}
|
|
619
|
+
entry.lastUsed = Date.now();
|
|
620
|
+
const safeCypher = quoteKnownNodeLabels(cypher);
|
|
621
|
+
if (getNativeUnsafeNodeLabel(safeCypher)) {
|
|
622
|
+
return [];
|
|
623
|
+
}
|
|
624
|
+
const labellessAlias = getSimpleLabellessNodeAlias(safeCypher);
|
|
625
|
+
if (labellessAlias) {
|
|
626
|
+
return runLabellessNodeScan(safeCypher, labellessAlias, (labelQuery) => runParameterizedOnEntry(entry, labelQuery, params));
|
|
627
|
+
}
|
|
628
|
+
return runParameterizedOnEntry(entry, safeCypher, params);
|
|
519
629
|
};
|
|
520
630
|
/**
|
|
521
631
|
* Close one or all repo pools.
|
|
@@ -567,7 +567,13 @@ scopeTreeCache, onFileProgress, workerPool) => {
|
|
|
567
567
|
return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
|
|
568
568
|
}
|
|
569
569
|
catch (err) {
|
|
570
|
-
console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
570
|
+
console.warn('Worker pool parsing failed for this chunk, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
571
|
+
try {
|
|
572
|
+
await workerPool.terminate();
|
|
573
|
+
}
|
|
574
|
+
catch (terminateErr) {
|
|
575
|
+
console.warn('Worker pool termination after parsing failure failed:', terminateErr instanceof Error ? terminateErr.message : terminateErr);
|
|
576
|
+
}
|
|
571
577
|
}
|
|
572
578
|
}
|
|
573
579
|
// Fallback: sequential parsing (no pre-extracted data)
|
|
@@ -170,6 +170,7 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
170
170
|
const chunkFiles = chunkPaths
|
|
171
171
|
.filter((p) => chunkContents.has(p))
|
|
172
172
|
.map((p) => ({ path: p, content: chunkContents.get(p) }));
|
|
173
|
+
const usedWorkerPoolForChunk = workerPool !== undefined;
|
|
173
174
|
const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, scopeTreeCache, (current, _total, filePath) => {
|
|
174
175
|
const globalCurrent = filesParsedSoFar + current;
|
|
175
176
|
const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
|
|
@@ -185,6 +186,9 @@ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, t
|
|
|
185
186
|
},
|
|
186
187
|
});
|
|
187
188
|
}, workerPool);
|
|
189
|
+
if (usedWorkerPoolForChunk && !chunkWorkerData) {
|
|
190
|
+
workerPool = undefined;
|
|
191
|
+
}
|
|
188
192
|
const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
|
|
189
193
|
if (chunkWorkerData) {
|
|
190
194
|
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
|
|
@@ -452,7 +452,7 @@ const processBatch = (files, onProgress) => {
|
|
|
452
452
|
}
|
|
453
453
|
let totalProcessed = 0;
|
|
454
454
|
let lastReported = 0;
|
|
455
|
-
const PROGRESS_INTERVAL =
|
|
455
|
+
const PROGRESS_INTERVAL = 25; // report often enough to keep worker idle timers fresh
|
|
456
456
|
const onFileProcessed = onProgress
|
|
457
457
|
? () => {
|
|
458
458
|
totalProcessed++;
|
|
@@ -10,7 +10,20 @@ export interface WorkerPool {
|
|
|
10
10
|
/** Number of workers in the pool */
|
|
11
11
|
readonly size: number;
|
|
12
12
|
}
|
|
13
|
+
export interface WorkerPoolOptions {
|
|
14
|
+
/**
|
|
15
|
+
* Max files to send to a worker in one postMessage. Lower values reduce
|
|
16
|
+
* structured-clone memory spikes and give the main thread more chances to
|
|
17
|
+
* observe progress on large repos.
|
|
18
|
+
*/
|
|
19
|
+
subBatchSize?: number;
|
|
20
|
+
/**
|
|
21
|
+
* Idle timeout while waiting for a worker response. Reset by worker progress
|
|
22
|
+
* so slow-but-moving chunks do not get retried sequentially.
|
|
23
|
+
*/
|
|
24
|
+
subBatchIdleTimeoutMs?: number;
|
|
25
|
+
}
|
|
13
26
|
/**
|
|
14
27
|
* Create a pool of worker threads.
|
|
15
28
|
*/
|
|
16
|
-
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;
|
|
29
|
+
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number, options?: WorkerPoolOptions) => WorkerPool;
|
|
@@ -6,14 +6,24 @@ import { fileURLToPath } from 'node:url';
|
|
|
6
6
|
* Max files to send to a worker in a single postMessage.
|
|
7
7
|
* Keeps structured-clone memory bounded per sub-batch.
|
|
8
8
|
*/
|
|
9
|
-
const
|
|
10
|
-
/**
|
|
11
|
-
*
|
|
12
|
-
|
|
9
|
+
const DEFAULT_SUB_BATCH_SIZE = 250;
|
|
10
|
+
/**
|
|
11
|
+
* Idle timeout while waiting for a worker response. This is not a wall-clock
|
|
12
|
+
* limit: worker progress resets it. Large repos can legitimately need more
|
|
13
|
+
* than 30s for a chunk, but a wedged parser should still fall back.
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS = 120_000;
|
|
16
|
+
const positiveIntFromEnv = (name, fallback) => {
|
|
17
|
+
const raw = process.env[name];
|
|
18
|
+
if (!raw)
|
|
19
|
+
return fallback;
|
|
20
|
+
const parsed = Number.parseInt(raw, 10);
|
|
21
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
22
|
+
};
|
|
13
23
|
/**
|
|
14
24
|
* Create a pool of worker threads.
|
|
15
25
|
*/
|
|
16
|
-
export const createWorkerPool = (workerUrl, poolSize) => {
|
|
26
|
+
export const createWorkerPool = (workerUrl, poolSize, options = {}) => {
|
|
17
27
|
// Validate worker script exists before spawning to prevent uncaught
|
|
18
28
|
// MODULE_NOT_FOUND crashes in worker threads (e.g. when running from src/ via vitest)
|
|
19
29
|
const workerPath = fileURLToPath(workerUrl);
|
|
@@ -21,6 +31,10 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
21
31
|
throw new Error(`Worker script not found: ${workerPath}`);
|
|
22
32
|
}
|
|
23
33
|
const size = poolSize ?? Math.min(8, Math.max(1, os.cpus().length - 1));
|
|
34
|
+
const subBatchSize = options.subBatchSize ??
|
|
35
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_SUB_BATCH_SIZE', DEFAULT_SUB_BATCH_SIZE);
|
|
36
|
+
const subBatchIdleTimeoutMs = options.subBatchIdleTimeoutMs ??
|
|
37
|
+
positiveIntFromEnv('CODRAGRAPH_WORKER_IDLE_TIMEOUT_MS', DEFAULT_SUB_BATCH_IDLE_TIMEOUT_MS);
|
|
24
38
|
const workers = [];
|
|
25
39
|
for (let i = 0; i < size; i++) {
|
|
26
40
|
workers.push(new Worker(workerUrl));
|
|
@@ -38,41 +52,43 @@ export const createWorkerPool = (workerUrl, poolSize) => {
|
|
|
38
52
|
const worker = workers[i];
|
|
39
53
|
return new Promise((resolve, reject) => {
|
|
40
54
|
let settled = false;
|
|
41
|
-
let
|
|
55
|
+
let workerIdleTimer = null;
|
|
42
56
|
const cleanup = () => {
|
|
43
|
-
if (
|
|
44
|
-
clearTimeout(
|
|
57
|
+
if (workerIdleTimer)
|
|
58
|
+
clearTimeout(workerIdleTimer);
|
|
45
59
|
worker.removeListener('message', handler);
|
|
46
60
|
worker.removeListener('error', errorHandler);
|
|
47
61
|
worker.removeListener('exit', exitHandler);
|
|
48
62
|
};
|
|
49
|
-
const
|
|
50
|
-
if (
|
|
51
|
-
clearTimeout(
|
|
52
|
-
|
|
63
|
+
const resetWorkerIdleTimer = () => {
|
|
64
|
+
if (workerIdleTimer)
|
|
65
|
+
clearTimeout(workerIdleTimer);
|
|
66
|
+
workerIdleTimer = setTimeout(() => {
|
|
53
67
|
if (!settled) {
|
|
54
68
|
settled = true;
|
|
55
69
|
cleanup();
|
|
56
|
-
reject(new Error(`Worker ${i}
|
|
70
|
+
reject(new Error(`Worker ${i} was idle for ${subBatchIdleTimeoutMs / 1000}s while waiting for a response (chunk: ${chunk.length} items).`));
|
|
57
71
|
}
|
|
58
|
-
},
|
|
72
|
+
}, subBatchIdleTimeoutMs);
|
|
59
73
|
};
|
|
60
74
|
let subBatchIdx = 0;
|
|
61
75
|
const sendNextSubBatch = () => {
|
|
62
|
-
const start = subBatchIdx *
|
|
76
|
+
const start = subBatchIdx * subBatchSize;
|
|
63
77
|
if (start >= chunk.length) {
|
|
78
|
+
resetWorkerIdleTimer();
|
|
64
79
|
worker.postMessage({ type: 'flush' });
|
|
65
80
|
return;
|
|
66
81
|
}
|
|
67
|
-
const subBatch = chunk.slice(start, start +
|
|
82
|
+
const subBatch = chunk.slice(start, start + subBatchSize);
|
|
68
83
|
subBatchIdx++;
|
|
69
|
-
|
|
84
|
+
resetWorkerIdleTimer();
|
|
70
85
|
worker.postMessage({ type: 'sub-batch', files: subBatch });
|
|
71
86
|
};
|
|
72
87
|
const handler = (msg) => {
|
|
73
88
|
if (settled)
|
|
74
89
|
return;
|
|
75
90
|
if (msg.type === 'progress') {
|
|
91
|
+
resetWorkerIdleTimer();
|
|
76
92
|
workerProgress[i] = msg.filesProcessed;
|
|
77
93
|
if (onProgress) {
|
|
78
94
|
const total = workerProgress.reduce((a, b) => a + b, 0);
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -144,8 +144,10 @@ export const changedPathAffectsGraph = (change) => {
|
|
|
144
144
|
const paths = [change.path, change.previousPath].filter((p) => Boolean(p));
|
|
145
145
|
if (paths.some(isGraphContentPath))
|
|
146
146
|
return true;
|
|
147
|
-
// Add/delete/rename/copy
|
|
148
|
-
// is not
|
|
147
|
+
// Add/delete/rename/copy affect the graph's File/Folder topology even when
|
|
148
|
+
// the path is not source code. Ignore only generated agent context and
|
|
149
|
+
// configured ignored paths; staying conservative here prevents stale file
|
|
150
|
+
// and documentation surfaces after path-only commits.
|
|
149
151
|
if (statusCode === 'A' || statusCode === 'D' || statusCode === 'R' || statusCode === 'C') {
|
|
150
152
|
return paths.some((p) => !isGeneratedAgentContextPath(p) && !shouldIgnorePath(p));
|
|
151
153
|
}
|
|
@@ -176,6 +178,14 @@ const buildReusedMeta = (existingMeta, repoPath, currentCommit) => ({
|
|
|
176
178
|
schemaVersion: INDEX_SCHEMA_VERSION,
|
|
177
179
|
remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : existingMeta.remoteUrl,
|
|
178
180
|
});
|
|
181
|
+
const metaStatsForAIContext = (stats = {}) => ({
|
|
182
|
+
files: stats.files,
|
|
183
|
+
nodes: stats.nodes,
|
|
184
|
+
edges: stats.edges,
|
|
185
|
+
communities: stats.communities,
|
|
186
|
+
clusters: stats.featureClusters,
|
|
187
|
+
processes: stats.processes,
|
|
188
|
+
});
|
|
179
189
|
const pathExists = async (targetPath) => {
|
|
180
190
|
try {
|
|
181
191
|
await fs.stat(targetPath);
|
|
@@ -297,8 +307,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
297
307
|
existingMeta.lastCommit === currentCommit) {
|
|
298
308
|
// Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
|
|
299
309
|
if (currentCommit !== '') {
|
|
310
|
+
const repoName = options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath);
|
|
311
|
+
try {
|
|
312
|
+
await generateAIContextFiles(repoPath, storagePath, repoName, metaStatsForAIContext(existingMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
|
|
313
|
+
}
|
|
314
|
+
catch {
|
|
315
|
+
// Best-effort only.
|
|
316
|
+
}
|
|
300
317
|
return {
|
|
301
|
-
repoName
|
|
318
|
+
repoName,
|
|
302
319
|
repoPath,
|
|
303
320
|
stats: existingMeta.stats ?? {},
|
|
304
321
|
alreadyUpToDate: true,
|
|
@@ -327,8 +344,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
327
344
|
if (hasGitDir(repoPath)) {
|
|
328
345
|
await addToGitignore(repoPath);
|
|
329
346
|
}
|
|
347
|
+
try {
|
|
348
|
+
await generateAIContextFiles(repoPath, storagePath, projectName, metaStatsForAIContext(reusedMeta.stats), undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
|
|
349
|
+
}
|
|
350
|
+
catch {
|
|
351
|
+
// Best-effort only.
|
|
352
|
+
}
|
|
330
353
|
const reuseReason = `Smart analyze reused the existing graph; ${changedPaths.length} changed ` +
|
|
331
|
-
`file(s) did not affect indexed
|
|
354
|
+
`file(s) did not affect indexed graph inputs.`;
|
|
332
355
|
log(reuseReason);
|
|
333
356
|
progress('done', 100, 'Existing graph reused');
|
|
334
357
|
return {
|
|
@@ -342,7 +365,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
342
365
|
}
|
|
343
366
|
const preview = graphRelevantChanges.slice(0, 5).map(formatChangeForLog).join(', ');
|
|
344
367
|
const suffix = graphRelevantChanges.length > 5 ? ', ...' : '';
|
|
345
|
-
log(`Smart analyze: ${graphRelevantChanges.length} indexed change(s) require rebuild` +
|
|
368
|
+
log(`Smart analyze: ${graphRelevantChanges.length} indexed graph input change(s) require rebuild` +
|
|
346
369
|
(preview ? ` (${preview}${suffix})` : '') +
|
|
347
370
|
'.');
|
|
348
371
|
}
|
|
@@ -16,17 +16,6 @@ export interface BM25SearchResult {
|
|
|
16
16
|
rank: number;
|
|
17
17
|
nodeIds?: string[];
|
|
18
18
|
}
|
|
19
|
-
/**
|
|
20
|
-
* Drop all ensured-FTS cache entries for a given repoId.
|
|
21
|
-
*
|
|
22
|
-
* Called from the pool-close listener so that a pool teardown / recreation
|
|
23
|
-
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
24
|
-
* against the fresh connection rather than trust stale ensure-state from a
|
|
25
|
-
* previous pool lifetime.
|
|
26
|
-
*
|
|
27
|
-
* Exported for tests; the listener wiring is internal.
|
|
28
|
-
*/
|
|
29
|
-
export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
|
|
30
19
|
/**
|
|
31
20
|
* Search using LadybugDB's built-in FTS (always fresh, reads from disk)
|
|
32
21
|
*
|