@codexstar/bug-hunter 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +151 -0
- package/LICENSE +21 -0
- package/README.md +665 -0
- package/SKILL.md +624 -0
- package/bin/bug-hunter +222 -0
- package/evals/evals.json +362 -0
- package/modes/_dispatch.md +121 -0
- package/modes/extended.md +94 -0
- package/modes/fix-loop.md +115 -0
- package/modes/fix-pipeline.md +384 -0
- package/modes/large-codebase.md +212 -0
- package/modes/local-sequential.md +143 -0
- package/modes/loop.md +125 -0
- package/modes/parallel.md +113 -0
- package/modes/scaled.md +76 -0
- package/modes/single-file.md +38 -0
- package/modes/small.md +86 -0
- package/package.json +56 -0
- package/prompts/doc-lookup.md +44 -0
- package/prompts/examples/hunter-examples.md +131 -0
- package/prompts/examples/skeptic-examples.md +87 -0
- package/prompts/fixer.md +103 -0
- package/prompts/hunter.md +146 -0
- package/prompts/recon.md +159 -0
- package/prompts/referee.md +122 -0
- package/prompts/skeptic.md +143 -0
- package/prompts/threat-model.md +122 -0
- package/scripts/bug-hunter-state.cjs +537 -0
- package/scripts/code-index.cjs +541 -0
- package/scripts/context7-api.cjs +133 -0
- package/scripts/delta-mode.cjs +219 -0
- package/scripts/dep-scan.cjs +343 -0
- package/scripts/doc-lookup.cjs +316 -0
- package/scripts/fix-lock.cjs +167 -0
- package/scripts/init-test-fixture.sh +19 -0
- package/scripts/payload-guard.cjs +197 -0
- package/scripts/run-bug-hunter.cjs +892 -0
- package/scripts/tests/bug-hunter-state.test.cjs +87 -0
- package/scripts/tests/code-index.test.cjs +57 -0
- package/scripts/tests/delta-mode.test.cjs +47 -0
- package/scripts/tests/fix-lock.test.cjs +36 -0
- package/scripts/tests/fixtures/flaky-worker.cjs +63 -0
- package/scripts/tests/fixtures/low-confidence-worker.cjs +73 -0
- package/scripts/tests/fixtures/success-worker.cjs +42 -0
- package/scripts/tests/payload-guard.test.cjs +41 -0
- package/scripts/tests/run-bug-hunter.test.cjs +403 -0
- package/scripts/tests/test-utils.cjs +59 -0
- package/scripts/tests/worktree-harvest.test.cjs +297 -0
- package/scripts/triage.cjs +528 -0
- package/scripts/worktree-harvest.cjs +516 -0
- package/templates/subagent-wrapper.md +109 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* triage.cjs — Zero-token pre-Recon codebase triage
|
|
5
|
+
*
|
|
6
|
+
* Runs BEFORE any LLM agent is invoked. Uses pure filesystem operations
|
|
7
|
+
* (no fd, rg, grep, or any optional CLI tool) to:
|
|
8
|
+
*
|
|
9
|
+
* 1. Count and classify all source files
|
|
10
|
+
* 2. Decide the optimal execution strategy
|
|
11
|
+
* 3. Build a risk-scored file list using path heuristics
|
|
12
|
+
* 4. Compute FILE_BUDGET from actual file sizes
|
|
13
|
+
* 5. Output a machine-readable plan the orchestrator can act on directly
|
|
14
|
+
*
|
|
15
|
+
* This replaces the expensive "Recon agent reads 2,000 files" step with
|
|
16
|
+
* a deterministic script that costs 0 tokens and runs in <2 seconds.
|
|
17
|
+
*
|
|
18
|
+
* Usage:
|
|
19
|
+
* triage.cjs scan <targetPath> [--output <path>] [--max-depth <n>]
|
|
20
|
+
* triage.cjs scan <targetPath> --format human
|
|
21
|
+
*
|
|
22
|
+
* Output: JSON plan to stdout (or --output file) with:
|
|
23
|
+
* - strategy: "single-file" | "small" | "parallel" | "extended" | "scaled" | "large-codebase"
|
|
24
|
+
* - fileBudget: computed from actual file sizes
|
|
25
|
+
* - domains: directory-level risk classification
|
|
26
|
+
* - riskMap: file-level classification (only for ≤200 files)
|
|
27
|
+
* - plan: which mode file to read, whether --loop is needed
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
const fs = require('fs');
|
|
31
|
+
const path = require('path');
|
|
32
|
+
|
|
33
|
+
// ─── Source extensions ───────────────────────────────────────────────
|
|
34
|
+
const SOURCE_EXTENSIONS = new Set([
|
|
35
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
|
36
|
+
'.py', '.go', '.rs', '.java', '.kt', '.rb', '.php',
|
|
37
|
+
'.cs', '.cpp', '.c', '.h', '.hpp', '.swift', '.scala',
|
|
38
|
+
'.ex', '.exs', '.erl', '.hs', '.ml', '.clj', '.lua'
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
// ─── Directories to always skip ─────────────────────────────────────
|
|
42
|
+
const SKIP_DIRS = new Set([
|
|
43
|
+
'node_modules', 'vendor', 'dist', 'build', '.git', '__pycache__',
|
|
44
|
+
'.next', 'coverage', '.cache', 'tmp', '.tmp', '.idea', '.vscode',
|
|
45
|
+
'.svn', 'target', 'out', '.output', '.nuxt', '.turbo', '.parcel-cache',
|
|
46
|
+
'bower_components', 'jspm_packages', '.yarn', '.pnp',
|
|
47
|
+
'venv', '.venv', 'env', '.env', 'virtualenv',
|
|
48
|
+
'Pods', '.gradle', '.mvn', 'bin', 'obj',
|
|
49
|
+
'artifacts', 'logs', '.terraform'
|
|
50
|
+
]);
|
|
51
|
+
|
|
52
|
+
// ─── Non-source directories (low value for bug hunting) ─────────────
|
|
53
|
+
const LOW_VALUE_DIRS = new Set([
|
|
54
|
+
'docs', 'doc', 'documentation', 'assets', 'public', 'static',
|
|
55
|
+
'images', 'img', 'icons', 'fonts', 'styles', 'css', 'scss',
|
|
56
|
+
'less', 'locales', 'i18n', 'l10n', 'translations',
|
|
57
|
+
'migrations', 'seeds', 'fixtures', 'snapshots', '__snapshots__',
|
|
58
|
+
'scripts', 'tools', 'devtools', 'examples', 'samples', 'demo',
|
|
59
|
+
'storybook', '.storybook', 'stories'
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
// ─── Risk classification by directory name ──────────────────────────
|
|
63
|
+
const CRITICAL_PATTERNS = /\b(auth|security|session|token|jwt|oauth|saml|permission|acl|rbac|crypto|secret|credential|password|login|signup|register|verify|middleware|gateway|proxy|payment|billing|checkout|charge|subscription|stripe|paypal|webhook|callback)\b/i;
|
|
64
|
+
const HIGH_PATTERNS = /\b(api|route|router|controller|handler|endpoint|resolver|service|model|schema|database|db|repository|store|state|queue|worker|job|cron|consumer|producer|cache|redis|mongo|prisma|sequelize|typeorm|knex|sql|graphql|trpc|grpc|socket|websocket|sse|stream|upload|download|file|storage|s3|email|notification|sms)\b/i;
|
|
65
|
+
const MEDIUM_PATTERNS = /\b(util|utils|helper|helpers|lib|common|shared|core|config|env|logger|error|exception|validator|sanitize|transform|format|parse|convert|serialize)\b/i;
|
|
66
|
+
const TEST_PATTERNS = /\b(test|tests|spec|specs|__tests__|__test__|testing|e2e|integration|unit|cypress|playwright|jest)\b/i;
|
|
67
|
+
const TEST_FILE_PATTERNS = /\.(test|spec|e2e)\.(ts|tsx|js|jsx|py|go|rs|java|rb)$|_test\.(go|py|rb)$|Test\.(java|kt)$/;
|
|
68
|
+
|
|
69
|
+
// ─── Walk filesystem (no external tools needed) ─────────────────────
|
|
70
|
+
function walkDir(dirPath, maxDepth, currentDepth) {
|
|
71
|
+
if (currentDepth > maxDepth) {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
let entries;
|
|
76
|
+
try {
|
|
77
|
+
entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
78
|
+
} catch {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const results = [];
|
|
83
|
+
|
|
84
|
+
for (const entry of entries) {
|
|
85
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
86
|
+
|
|
87
|
+
if (entry.isDirectory()) {
|
|
88
|
+
if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) {
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
results.push(...walkDir(fullPath, maxDepth, currentDepth + 1));
|
|
92
|
+
} else if (entry.isFile()) {
|
|
93
|
+
const ext = path.extname(entry.name);
|
|
94
|
+
if (SOURCE_EXTENSIONS.has(ext)) {
|
|
95
|
+
results.push(fullPath);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return results;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ─── Classify a single file by its path ─────────────────────────────
|
|
104
|
+
function classifyFile(filePath, repoRoot) {
|
|
105
|
+
const relative = path.relative(repoRoot, filePath);
|
|
106
|
+
const parts = relative.split(path.sep);
|
|
107
|
+
const dirPath = parts.slice(0, -1).join('/');
|
|
108
|
+
const fileName = parts[parts.length - 1];
|
|
109
|
+
|
|
110
|
+
// Test files
|
|
111
|
+
if (TEST_FILE_PATTERNS.test(fileName) || parts.some((p) => TEST_PATTERNS.test(p))) {
|
|
112
|
+
return 'context-only';
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Low-value directories
|
|
116
|
+
if (parts.some((p) => LOW_VALUE_DIRS.has(p.toLowerCase()))) {
|
|
117
|
+
return 'low';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Check file path + name against risk patterns
|
|
121
|
+
const fullPathStr = dirPath + '/' + fileName;
|
|
122
|
+
if (CRITICAL_PATTERNS.test(fullPathStr)) {
|
|
123
|
+
return 'critical';
|
|
124
|
+
}
|
|
125
|
+
if (HIGH_PATTERNS.test(fullPathStr)) {
|
|
126
|
+
return 'high';
|
|
127
|
+
}
|
|
128
|
+
if (MEDIUM_PATTERNS.test(fullPathStr)) {
|
|
129
|
+
return 'medium';
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Default: medium (unknown files are worth scanning)
|
|
133
|
+
return 'medium';
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ─── Discover domain boundaries ─────────────────────────────────────
|
|
137
|
+
function discoverDomains(files, repoRoot) {
|
|
138
|
+
// Group files by their top-level meaningful directory
|
|
139
|
+
const domainFiles = new Map();
|
|
140
|
+
|
|
141
|
+
for (const filePath of files) {
|
|
142
|
+
const relative = path.relative(repoRoot, filePath);
|
|
143
|
+
const parts = relative.split(path.sep);
|
|
144
|
+
|
|
145
|
+
// Find the domain root: first 1-2 meaningful directory levels
|
|
146
|
+
let domainKey;
|
|
147
|
+
if (parts.length <= 1) {
|
|
148
|
+
domainKey = '.';
|
|
149
|
+
} else if (['src', 'lib', 'app', 'packages', 'services', 'apps', 'modules'].includes(parts[0].toLowerCase())) {
|
|
150
|
+
domainKey = parts.length >= 3 ? parts.slice(0, 2).join('/') : parts[0];
|
|
151
|
+
} else {
|
|
152
|
+
domainKey = parts[0];
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (!domainFiles.has(domainKey)) {
|
|
156
|
+
domainFiles.set(domainKey, []);
|
|
157
|
+
}
|
|
158
|
+
domainFiles.get(domainKey).push(filePath);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Classify each domain
|
|
162
|
+
const domains = [];
|
|
163
|
+
for (const [domainPath, domainFileList] of domainFiles.entries()) {
|
|
164
|
+
const riskCounts = { critical: 0, high: 0, medium: 0, low: 0, 'context-only': 0 };
|
|
165
|
+
for (const f of domainFileList) {
|
|
166
|
+
const risk = classifyFile(f, repoRoot);
|
|
167
|
+
riskCounts[risk] += 1;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Domain tier = based on concentration of risk, not just presence
|
|
171
|
+
let tier;
|
|
172
|
+
const total = domainFileList.length;
|
|
173
|
+
const criticalRatio = riskCounts.critical / total;
|
|
174
|
+
const highRatio = riskCounts.high / total;
|
|
175
|
+
|
|
176
|
+
if (riskCounts.critical >= 3 && criticalRatio >= 0.15) {
|
|
177
|
+
// Genuinely critical: meaningful portion of files are critical
|
|
178
|
+
tier = 'CRITICAL';
|
|
179
|
+
} else if (riskCounts.critical >= 1 && criticalRatio >= 0.3) {
|
|
180
|
+
// Small domain where most files are critical
|
|
181
|
+
tier = 'CRITICAL';
|
|
182
|
+
} else if (riskCounts.critical >= 1) {
|
|
183
|
+
// Has some critical files but mostly other stuff — boost to HIGH
|
|
184
|
+
tier = 'HIGH';
|
|
185
|
+
} else if (riskCounts.high > 0) {
|
|
186
|
+
tier = 'HIGH';
|
|
187
|
+
} else if (riskCounts['context-only'] > total * 0.8) {
|
|
188
|
+
tier = 'CONTEXT-ONLY';
|
|
189
|
+
} else if (riskCounts.low > total * 0.5) {
|
|
190
|
+
tier = 'LOW';
|
|
191
|
+
} else {
|
|
192
|
+
tier = 'MEDIUM';
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
domains.push({
|
|
196
|
+
path: domainPath,
|
|
197
|
+
tier,
|
|
198
|
+
fileCount: domainFileList.length,
|
|
199
|
+
riskBreakdown: riskCounts,
|
|
200
|
+
files: domainFileList.map((f) => path.relative(repoRoot, f))
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Sort: CRITICAL first, then HIGH, then MEDIUM, then rest
|
|
205
|
+
const tierOrder = { CRITICAL: 0, HIGH: 1, MEDIUM: 2, LOW: 3, 'CONTEXT-ONLY': 4 };
|
|
206
|
+
domains.sort((a, b) => (tierOrder[a.tier] || 99) - (tierOrder[b.tier] || 99));
|
|
207
|
+
|
|
208
|
+
return domains;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ─── Compute FILE_BUDGET from actual file sizes ─────────────────────
|
|
212
|
+
function computeFileBudget(files) {
|
|
213
|
+
if (files.length === 0) {
|
|
214
|
+
return { fileBudget: 40, avgLines: 0, totalLines: 0, avgTokens: 0, sampledFiles: 0 };
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Sample up to 30 files to estimate average size (fast, even for huge repos)
|
|
218
|
+
const sampleSize = Math.min(30, files.length);
|
|
219
|
+
const step = Math.max(1, Math.floor(files.length / sampleSize));
|
|
220
|
+
let totalLines = 0;
|
|
221
|
+
let sampled = 0;
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < files.length && sampled < sampleSize; i += step) {
|
|
224
|
+
try {
|
|
225
|
+
const content = fs.readFileSync(files[i], 'utf8');
|
|
226
|
+
totalLines += content.split('\n').length;
|
|
227
|
+
sampled += 1;
|
|
228
|
+
} catch {
|
|
229
|
+
// Skip unreadable files
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (sampled === 0) {
|
|
234
|
+
return { fileBudget: 40, avgLines: 0, totalLines: 0, avgTokens: 0, sampledFiles: 0 };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const avgLines = Math.round(totalLines / sampled);
|
|
238
|
+
const avgTokens = avgLines * 4;
|
|
239
|
+
const estimatedTotalLines = avgLines * files.length;
|
|
240
|
+
|
|
241
|
+
// FILE_BUDGET = floor(150000 / avgTokens), capped at 60, floored at 10
|
|
242
|
+
let fileBudget;
|
|
243
|
+
if (avgTokens <= 0) {
|
|
244
|
+
fileBudget = 60;
|
|
245
|
+
} else {
|
|
246
|
+
fileBudget = Math.floor(150000 / avgTokens);
|
|
247
|
+
}
|
|
248
|
+
fileBudget = Math.max(10, Math.min(60, fileBudget));
|
|
249
|
+
|
|
250
|
+
return { fileBudget, avgLines, totalLines: estimatedTotalLines, avgTokens, sampledFiles: sampled };
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ─── Determine strategy ─────────────────────────────────────────────
|
|
254
|
+
function determineStrategy(fileCount, fileBudget) {
|
|
255
|
+
if (fileCount <= 1) {
|
|
256
|
+
return { strategy: 'single-file', modeFile: 'modes/single-file.md', needsLoop: false };
|
|
257
|
+
}
|
|
258
|
+
if (fileCount <= 10) {
|
|
259
|
+
return { strategy: 'small', modeFile: 'modes/small.md', needsLoop: false };
|
|
260
|
+
}
|
|
261
|
+
if (fileCount <= fileBudget) {
|
|
262
|
+
return { strategy: 'parallel', modeFile: 'modes/parallel.md', needsLoop: false };
|
|
263
|
+
}
|
|
264
|
+
if (fileCount <= fileBudget * 2) {
|
|
265
|
+
return { strategy: 'extended', modeFile: 'modes/extended.md', needsLoop: false };
|
|
266
|
+
}
|
|
267
|
+
if (fileCount <= fileBudget * 3) {
|
|
268
|
+
return { strategy: 'scaled', modeFile: 'modes/scaled.md', needsLoop: false };
|
|
269
|
+
}
|
|
270
|
+
return { strategy: 'large-codebase', modeFile: 'modes/large-codebase.md', needsLoop: true };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ─── Build the risk map (file-level, only for small/medium codebases)
|
|
274
|
+
function buildRiskMap(files, repoRoot) {
|
|
275
|
+
const riskMap = { critical: [], high: [], medium: [], low: [], 'context-only': [] };
|
|
276
|
+
|
|
277
|
+
for (const filePath of files) {
|
|
278
|
+
const risk = classifyFile(filePath, repoRoot);
|
|
279
|
+
const relative = path.relative(repoRoot, filePath);
|
|
280
|
+
riskMap[risk].push(relative);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Sort each tier alphabetically
|
|
284
|
+
for (const tier of Object.keys(riskMap)) {
|
|
285
|
+
riskMap[tier].sort();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
return riskMap;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ─── Main: triage scan ──────────────────────────────────────────────
|
|
292
|
+
function scan(targetPath, options) {
|
|
293
|
+
const resolvedTarget = path.resolve(targetPath);
|
|
294
|
+
const maxDepth = options.maxDepth || 20;
|
|
295
|
+
|
|
296
|
+
if (!fs.existsSync(resolvedTarget)) {
|
|
297
|
+
throw new Error(`Target not found: ${resolvedTarget}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Single file?
|
|
301
|
+
const stat = fs.statSync(resolvedTarget);
|
|
302
|
+
if (stat.isFile()) {
|
|
303
|
+
const relative = path.basename(resolvedTarget);
|
|
304
|
+
return {
|
|
305
|
+
generatedAt: new Date().toISOString(),
|
|
306
|
+
target: resolvedTarget,
|
|
307
|
+
totalFiles: 1,
|
|
308
|
+
strategy: 'single-file',
|
|
309
|
+
modeFile: 'modes/single-file.md',
|
|
310
|
+
needsLoop: false,
|
|
311
|
+
fileBudget: 40,
|
|
312
|
+
avgLines: 0,
|
|
313
|
+
riskMap: { critical: [relative], high: [], medium: [], low: [], 'context-only': [] },
|
|
314
|
+
domains: [],
|
|
315
|
+
scanOrder: [relative],
|
|
316
|
+
tokenEstimate: { recon: 0, perHunterChunk: 0, total: 0 },
|
|
317
|
+
recommendations: ['Single file — run full pipeline directly, skip Recon.']
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Directory scan
|
|
322
|
+
const allFiles = walkDir(resolvedTarget, maxDepth, 0);
|
|
323
|
+
const totalFiles = allFiles.length;
|
|
324
|
+
|
|
325
|
+
// Compute FILE_BUDGET
|
|
326
|
+
const budget = computeFileBudget(allFiles);
|
|
327
|
+
|
|
328
|
+
// Determine strategy
|
|
329
|
+
const { strategy, modeFile, needsLoop } = determineStrategy(totalFiles, budget.fileBudget);
|
|
330
|
+
|
|
331
|
+
// Discover domains
|
|
332
|
+
const domains = discoverDomains(allFiles, resolvedTarget);
|
|
333
|
+
|
|
334
|
+
// Build risk map only for ≤ 200 files (otherwise too expensive for the output)
|
|
335
|
+
const includeFileRiskMap = totalFiles <= 200;
|
|
336
|
+
const riskMap = includeFileRiskMap ? buildRiskMap(allFiles, resolvedTarget) : null;
|
|
337
|
+
|
|
338
|
+
// Build scan order: CRITICAL → HIGH → MEDIUM (skip low + context-only)
|
|
339
|
+
let scanOrder;
|
|
340
|
+
if (riskMap) {
|
|
341
|
+
scanOrder = [...riskMap.critical, ...riskMap.high, ...riskMap.medium];
|
|
342
|
+
if (scanOrder.length === 0 && riskMap.low.length > 0) {
|
|
343
|
+
scanOrder = [...riskMap.low];
|
|
344
|
+
}
|
|
345
|
+
} else {
|
|
346
|
+
// For large codebases, just list domains in priority order
|
|
347
|
+
scanOrder = domains
|
|
348
|
+
.filter((d) => d.tier !== 'CONTEXT-ONLY' && d.tier !== 'LOW')
|
|
349
|
+
.map((d) => `${d.path}/ (${d.fileCount} files, ${d.tier})`);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Token estimates
|
|
353
|
+
const scannable = riskMap
|
|
354
|
+
? scanOrder.length
|
|
355
|
+
: domains.filter((d) => !['CONTEXT-ONLY', 'LOW'].includes(d.tier)).reduce((s, d) => s + d.fileCount, 0);
|
|
356
|
+
const tokensPerFile = budget.avgTokens || 400;
|
|
357
|
+
const reconTokens = includeFileRiskMap ? Math.min(totalFiles * 20, 5000) : Math.min(domains.length * 100, 3000);
|
|
358
|
+
const perHunterChunk = Math.min(budget.fileBudget, scannable) * tokensPerFile;
|
|
359
|
+
const totalTokenEstimate = reconTokens + (scannable * tokensPerFile) + (scannable * 200); // 200 tokens per finding estimate
|
|
360
|
+
|
|
361
|
+
// Recommendations
|
|
362
|
+
const recommendations = [];
|
|
363
|
+
if (strategy === 'single-file' || strategy === 'small') {
|
|
364
|
+
recommendations.push('Small codebase — Recon is optional, proceed directly to Hunter.');
|
|
365
|
+
}
|
|
366
|
+
if (strategy === 'large-codebase') {
|
|
367
|
+
recommendations.push('Large codebase — use domain-scoped auditing. Process one domain at a time.');
|
|
368
|
+
recommendations.push(`${domains.filter((d) => d.tier === 'CRITICAL').length} CRITICAL domains to audit first.`);
|
|
369
|
+
if (!needsLoop) {
|
|
370
|
+
recommendations.push('Consider --loop for full coverage.');
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (budget.avgLines > 300) {
|
|
374
|
+
recommendations.push(`Large files (avg ${budget.avgLines} lines) — FILE_BUDGET is low (${budget.fileBudget}). Chunking is important.`);
|
|
375
|
+
}
|
|
376
|
+
if (totalFiles > 500 && !includeFileRiskMap) {
|
|
377
|
+
recommendations.push('File-level risk map omitted for performance. Recon should classify files within each domain.');
|
|
378
|
+
}
|
|
379
|
+
if (riskMap && riskMap.critical.length + riskMap.high.length + riskMap.medium.length === 0 && riskMap.low.length > 0) {
|
|
380
|
+
recommendations.push('Only LOW-tier source files detected — promote them into scan order to avoid an empty audit.');
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const result = {
|
|
384
|
+
generatedAt: new Date().toISOString(),
|
|
385
|
+
target: resolvedTarget,
|
|
386
|
+
totalFiles,
|
|
387
|
+
scannableFiles: scannable,
|
|
388
|
+
strategy,
|
|
389
|
+
modeFile,
|
|
390
|
+
needsLoop,
|
|
391
|
+
fileBudget: budget.fileBudget,
|
|
392
|
+
avgLines: budget.avgLines,
|
|
393
|
+
avgTokensPerFile: budget.avgTokens || 400,
|
|
394
|
+
estimatedTotalLines: budget.totalLines,
|
|
395
|
+
sampledFiles: budget.sampledFiles,
|
|
396
|
+
domains: domains.map((d) => ({
|
|
397
|
+
path: d.path,
|
|
398
|
+
tier: d.tier,
|
|
399
|
+
fileCount: d.fileCount,
|
|
400
|
+
riskBreakdown: d.riskBreakdown
|
|
401
|
+
})),
|
|
402
|
+
scanOrder,
|
|
403
|
+
tokenEstimate: {
|
|
404
|
+
recon: reconTokens,
|
|
405
|
+
perHunterChunk,
|
|
406
|
+
totalPipeline: totalTokenEstimate
|
|
407
|
+
},
|
|
408
|
+
recommendations
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
// Include file-level risk map only for small-to-medium codebases
|
|
412
|
+
if (riskMap) {
|
|
413
|
+
result.riskMap = riskMap;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Include domain file lists only for large-codebase strategy
|
|
417
|
+
if (strategy === 'large-codebase') {
|
|
418
|
+
result.domainFileLists = {};
|
|
419
|
+
for (const domain of domains) {
|
|
420
|
+
if (domain.tier !== 'CONTEXT-ONLY') {
|
|
421
|
+
result.domainFileLists[domain.path] = domain.files;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
return result;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// ─── Human-readable output ──────────────────────────────────────────
|
|
430
|
+
function formatHuman(result) {
|
|
431
|
+
const lines = [];
|
|
432
|
+
lines.push(`Bug Hunter Triage — ${result.target}`);
|
|
433
|
+
lines.push('═'.repeat(60));
|
|
434
|
+
lines.push('');
|
|
435
|
+
lines.push(`Total source files: ${result.totalFiles}`);
|
|
436
|
+
lines.push(`Scannable files: ${result.scannableFiles}`);
|
|
437
|
+
lines.push(`Average lines/file: ${result.avgLines}`);
|
|
438
|
+
lines.push(`FILE_BUDGET: ${result.fileBudget}`);
|
|
439
|
+
lines.push(`Strategy: ${result.strategy}`);
|
|
440
|
+
lines.push(`Mode file: ${result.modeFile}`);
|
|
441
|
+
lines.push(`Needs --loop: ${result.needsLoop ? 'YES' : 'no'}`);
|
|
442
|
+
lines.push('');
|
|
443
|
+
lines.push('Domains:');
|
|
444
|
+
// Sort by tier for display: CRITICAL → HIGH → MEDIUM → LOW → CONTEXT-ONLY
|
|
445
|
+
const tierOrder = { CRITICAL: 0, HIGH: 1, MEDIUM: 2, LOW: 3, 'CONTEXT-ONLY': 4 };
|
|
446
|
+
const sorted = [...result.domains].sort((a, b) => (tierOrder[a.tier] || 99) - (tierOrder[b.tier] || 99));
|
|
447
|
+
for (const d of sorted) {
|
|
448
|
+
lines.push(` ${d.tier.padEnd(12)} ${d.path} (${d.fileCount} files)`);
|
|
449
|
+
}
|
|
450
|
+
lines.push('');
|
|
451
|
+
lines.push('Token estimates:');
|
|
452
|
+
lines.push(` Recon: ~${result.tokenEstimate.recon.toLocaleString()} tokens`);
|
|
453
|
+
lines.push(` Per Hunter chunk: ~${result.tokenEstimate.perHunterChunk.toLocaleString()} tokens`);
|
|
454
|
+
lines.push(` Full pipeline: ~${result.tokenEstimate.totalPipeline.toLocaleString()} tokens`);
|
|
455
|
+
lines.push('');
|
|
456
|
+
if (result.recommendations.length > 0) {
|
|
457
|
+
lines.push('Recommendations:');
|
|
458
|
+
for (const r of result.recommendations) {
|
|
459
|
+
lines.push(` • ${r}`);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
return lines.join('\n');
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// ─── CLI ─────────────────────────────────────────────────────────────
|
|
466
|
+
function parseArgs(argv) {
|
|
467
|
+
const args = { command: null, target: null, output: null, maxDepth: 20, format: 'json' };
|
|
468
|
+
let i = 0;
|
|
469
|
+
if (argv.length > 0 && !argv[0].startsWith('-')) {
|
|
470
|
+
args.command = argv[0];
|
|
471
|
+
i = 1;
|
|
472
|
+
}
|
|
473
|
+
if (i < argv.length && !argv[i].startsWith('-')) {
|
|
474
|
+
args.target = argv[i];
|
|
475
|
+
i += 1;
|
|
476
|
+
}
|
|
477
|
+
while (i < argv.length) {
|
|
478
|
+
const flag = argv[i];
|
|
479
|
+
if (flag === '--output' && i + 1 < argv.length) {
|
|
480
|
+
args.output = argv[i + 1];
|
|
481
|
+
i += 2;
|
|
482
|
+
} else if (flag === '--max-depth' && i + 1 < argv.length) {
|
|
483
|
+
args.maxDepth = parseInt(argv[i + 1], 10) || 20;
|
|
484
|
+
i += 2;
|
|
485
|
+
} else if (flag === '--format' && i + 1 < argv.length) {
|
|
486
|
+
args.format = argv[i + 1];
|
|
487
|
+
i += 2;
|
|
488
|
+
} else {
|
|
489
|
+
i += 1;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
return args;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
function main() {
|
|
496
|
+
const args = parseArgs(process.argv.slice(2));
|
|
497
|
+
|
|
498
|
+
if (args.command !== 'scan' || !args.target) {
|
|
499
|
+
console.error('Usage:');
|
|
500
|
+
console.error(' triage.cjs scan <targetPath> [--output <path>] [--max-depth <n>] [--format json|human]');
|
|
501
|
+
process.exit(1);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const result = scan(args.target, { maxDepth: args.maxDepth });
|
|
505
|
+
|
|
506
|
+
if (args.format === 'human') {
|
|
507
|
+
console.log(formatHuman(result));
|
|
508
|
+
} else {
|
|
509
|
+
const json = JSON.stringify(result, null, 2);
|
|
510
|
+
if (args.output) {
|
|
511
|
+
const dir = path.dirname(args.output);
|
|
512
|
+
if (dir && !fs.existsSync(dir)) {
|
|
513
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
514
|
+
}
|
|
515
|
+
fs.writeFileSync(args.output, json + '\n', 'utf8');
|
|
516
|
+
console.log(JSON.stringify({ ok: true, outputPath: args.output, totalFiles: result.totalFiles, strategy: result.strategy }));
|
|
517
|
+
} else {
|
|
518
|
+
console.log(json);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
try {
|
|
524
|
+
main();
|
|
525
|
+
} catch (error) {
|
|
526
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
527
|
+
process.exit(1);
|
|
528
|
+
}
|