@codexstar/bug-hunter 3.0.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +149 -83
  2. package/README.md +150 -15
  3. package/SKILL.md +94 -27
  4. package/agents/openai.yaml +4 -0
  5. package/bin/bug-hunter +9 -3
  6. package/docs/images/2026-03-12-fix-plan-rollout.png +0 -0
  7. package/docs/images/2026-03-12-hero-bug-hunter-overview.png +0 -0
  8. package/docs/images/2026-03-12-machine-readable-artifacts.png +0 -0
  9. package/docs/images/2026-03-12-pr-review-flow.png +0 -0
  10. package/docs/images/2026-03-12-security-pack.png +0 -0
  11. package/docs/images/adversarial-debate.png +0 -0
  12. package/docs/images/doc-verify-fix-plan.png +0 -0
  13. package/docs/images/hero.png +0 -0
  14. package/docs/images/pipeline-overview.png +0 -0
  15. package/docs/images/security-finding-card.png +0 -0
  16. package/docs/plans/2026-03-11-structured-output-migration-plan.md +288 -0
  17. package/docs/plans/2026-03-12-audit-bug-fixes-surgical-plan.md +193 -0
  18. package/docs/plans/2026-03-12-enterprise-security-pack-e2e-plan.md +59 -0
  19. package/docs/plans/2026-03-12-local-security-skills-integration-plan.md +39 -0
  20. package/docs/plans/2026-03-12-pr-review-strategic-fix-flow.md +78 -0
  21. package/evals/evals.json +366 -102
  22. package/modes/extended.md +2 -2
  23. package/modes/fix-loop.md +30 -30
  24. package/modes/fix-pipeline.md +32 -6
  25. package/modes/large-codebase.md +14 -15
  26. package/modes/local-sequential.md +44 -20
  27. package/modes/loop.md +56 -56
  28. package/modes/parallel.md +3 -3
  29. package/modes/scaled.md +2 -2
  30. package/modes/single-file.md +3 -3
  31. package/modes/small.md +11 -11
  32. package/package.json +11 -1
  33. package/prompts/fixer.md +37 -23
  34. package/prompts/hunter.md +39 -20
  35. package/prompts/referee.md +34 -20
  36. package/prompts/skeptic.md +25 -22
  37. package/schemas/coverage.schema.json +67 -0
  38. package/schemas/examples/findings.invalid.json +13 -0
  39. package/schemas/examples/findings.valid.json +17 -0
  40. package/schemas/findings.schema.json +76 -0
  41. package/schemas/fix-plan.schema.json +94 -0
  42. package/schemas/fix-report.schema.json +105 -0
  43. package/schemas/fix-strategy.schema.json +99 -0
  44. package/schemas/recon.schema.json +31 -0
  45. package/schemas/referee.schema.json +46 -0
  46. package/schemas/shared.schema.json +51 -0
  47. package/schemas/skeptic.schema.json +21 -0
  48. package/scripts/bug-hunter-state.cjs +35 -12
  49. package/scripts/code-index.cjs +11 -4
  50. package/scripts/fix-lock.cjs +95 -25
  51. package/scripts/payload-guard.cjs +24 -10
  52. package/scripts/pr-scope.cjs +181 -0
  53. package/scripts/prepublish-guard.cjs +82 -0
  54. package/scripts/render-report.cjs +346 -0
  55. package/scripts/run-bug-hunter.cjs +669 -33
  56. package/scripts/schema-runtime.cjs +273 -0
  57. package/scripts/schema-validate.cjs +40 -0
  58. package/scripts/tests/bug-hunter-state.test.cjs +68 -3
  59. package/scripts/tests/code-index.test.cjs +15 -0
  60. package/scripts/tests/fix-lock.test.cjs +60 -2
  61. package/scripts/tests/fixtures/flaky-worker.cjs +6 -1
  62. package/scripts/tests/fixtures/low-confidence-worker.cjs +8 -2
  63. package/scripts/tests/fixtures/success-worker.cjs +6 -1
  64. package/scripts/tests/payload-guard.test.cjs +154 -2
  65. package/scripts/tests/pr-scope.test.cjs +212 -0
  66. package/scripts/tests/render-report.test.cjs +180 -0
  67. package/scripts/tests/run-bug-hunter.test.cjs +686 -2
  68. package/scripts/tests/security-skills-integration.test.cjs +29 -0
  69. package/scripts/tests/skills-packaging.test.cjs +30 -0
  70. package/scripts/tests/worktree-harvest.test.cjs +67 -1
  71. package/scripts/worktree-harvest.cjs +62 -9
  72. package/skills/README.md +19 -0
  73. package/skills/commit-security-scan/SKILL.md +63 -0
  74. package/skills/security-review/SKILL.md +57 -0
  75. package/skills/threat-model-generation/SKILL.md +47 -0
  76. package/skills/vulnerability-validation/SKILL.md +59 -0
  77. package/templates/subagent-wrapper.md +12 -3
  78. package/modes/_dispatch.md +0 -121
@@ -3,6 +3,7 @@
3
3
  const childProcess = require('child_process');
4
4
  const fs = require('fs');
5
5
  const path = require('path');
6
+ const { validateArtifactFile, validateArtifactValue } = require('./schema-runtime.cjs');
6
7
 
7
8
  const BACKEND_PRIORITY = ['spawn_agent', 'subagent', 'teams', 'local-sequential'];
8
9
  const DEFAULT_TIMEOUT_MS = 120000;
@@ -17,7 +18,8 @@ const DEFAULT_EXPANSION_CAP = 40;
17
18
  function usage() {
18
19
  console.error('Usage:');
19
20
  console.error(' run-bug-hunter.cjs preflight [--skill-dir <path>] [--available-backends <csv>] [--backend <name>]');
20
- console.error(' run-bug-hunter.cjs run --files-json <path> [--mode <name>] [--skill-dir <path>] [--state <path>] [--chunk-size <n>] [--worker-cmd <template>] [--timeout-ms <n>] [--max-retries <n>] [--backoff-ms <n>] [--available-backends <csv>] [--backend <name>] [--fail-fast <true|false>] [--use-index <true|false>] [--index-path <path>] [--delta-mode <true|false>] [--changed-files-json <path>] [--delta-hops <n>] [--expand-on-low-confidence <true|false>] [--confidence-threshold <n>] [--canary-size <n>] [--expansion-cap <n>]');
21
+ console.error(' run-bug-hunter.cjs run --files-json <path> [--mode <name>] [--skill-dir <path>] [--state <path>] [--chunk-size <n>] [--worker-cmd <template>] [--timeout-ms <n>] [--max-retries <n>] [--backoff-ms <n>] [--available-backends <csv>] [--backend <name>] [--fail-fast <true|false>] [--use-index <true|false>] [--index-path <path>] [--delta-mode <true|false>] [--changed-files-json <path>] [--delta-hops <n>] [--expand-on-low-confidence <true|false>] [--confidence-threshold <n>] [--canary-size <n>] [--expansion-cap <n>] [--strategy-path <path>] [--strategy-markdown-path <path>]');
22
+ console.error(' run-bug-hunter.cjs phase --artifact <name> --output-path <path> --worker-cmd <template> [--phase-name <name>] [--skill-dir <path>] [--journal-path <path>] [--render-cmd <template>] [--render-output-path <path>] [--timeout-ms <n>] [--render-timeout-ms <n>] [--max-retries <n>] [--backoff-ms <n>]');
21
23
  console.error(' run-bug-hunter.cjs plan --files-json <path> [--mode <name>] [--skill-dir <path>] [--chunk-size <n>] [--plan-path <path>]');
22
24
  }
23
25
 
@@ -114,10 +116,23 @@ function requiredScripts(skillDir) {
114
116
  return [
115
117
  path.join(skillDir, 'scripts', 'bug-hunter-state.cjs'),
116
118
  path.join(skillDir, 'scripts', 'payload-guard.cjs'),
119
+ path.join(skillDir, 'scripts', 'schema-validate.cjs'),
120
+ path.join(skillDir, 'scripts', 'schema-runtime.cjs'),
121
+ path.join(skillDir, 'scripts', 'render-report.cjs'),
117
122
  path.join(skillDir, 'scripts', 'fix-lock.cjs'),
118
123
  path.join(skillDir, 'scripts', 'doc-lookup.cjs'),
119
124
  path.join(skillDir, 'scripts', 'context7-api.cjs'),
120
- path.join(skillDir, 'scripts', 'delta-mode.cjs')
125
+ path.join(skillDir, 'scripts', 'delta-mode.cjs'),
126
+ path.join(skillDir, 'scripts', 'pr-scope.cjs'),
127
+ path.join(skillDir, 'schemas', 'findings.schema.json'),
128
+ path.join(skillDir, 'schemas', 'skeptic.schema.json'),
129
+ path.join(skillDir, 'schemas', 'referee.schema.json'),
130
+ path.join(skillDir, 'schemas', 'coverage.schema.json'),
131
+ path.join(skillDir, 'schemas', 'fix-report.schema.json'),
132
+ path.join(skillDir, 'schemas', 'fix-plan.schema.json'),
133
+ path.join(skillDir, 'schemas', 'fix-strategy.schema.json'),
134
+ path.join(skillDir, 'schemas', 'recon.schema.json'),
135
+ path.join(skillDir, 'schemas', 'shared.schema.json')
121
136
  ];
122
137
  }
123
138
 
@@ -149,18 +164,38 @@ function runJsonScript(scriptPath, args) {
149
164
  return JSON.parse(output);
150
165
  }
151
166
 
167
+ function runTextScript(scriptPath, args) {
168
+ const result = childProcess.spawnSync('node', [scriptPath, ...args], {
169
+ encoding: 'utf8'
170
+ });
171
+ if (result.status !== 0) {
172
+ const stderr = (result.stderr || '').trim();
173
+ const stdout = (result.stdout || '').trim();
174
+ throw new Error(stderr || stdout || `Script failed: ${scriptPath}`);
175
+ }
176
+ return result.stdout || '';
177
+ }
178
+
152
179
  function appendJournal(logPath, event) {
153
180
  ensureDir(path.dirname(logPath));
154
181
  const line = JSON.stringify({ at: nowIso(), ...event });
155
182
  fs.appendFileSync(logPath, `${line}\n`, 'utf8');
156
183
  }
157
184
 
185
+ function shellQuote(value) {
186
+ const stringValue = String(value);
187
+ if (stringValue.length === 0) {
188
+ return "''";
189
+ }
190
+ return `'${stringValue.replace(/'/g, `'\\''`)}'`;
191
+ }
192
+
158
193
  function fillTemplate(template, variables) {
159
194
  return template.replace(/\{([a-zA-Z0-9_]+)\}/g, (match, key) => {
160
195
  if (!(key in variables)) {
161
- return match;
196
+ throw new Error(`Unknown template placeholder: ${key}`);
162
197
  }
163
- return String(variables[key]);
198
+ return shellQuote(variables[key]);
164
199
  });
165
200
  }
166
201
 
@@ -170,7 +205,8 @@ function sleep(ms) {
170
205
 
171
206
  function runCommandOnce({ command, timeoutMs }) {
172
207
  return new Promise((resolve) => {
173
- const child = childProcess.spawn('/bin/zsh', ['-lc', command], {
208
+ const shell = process.env.SHELL || '/bin/bash';
209
+ const child = childProcess.spawn(shell, ['-lc', command], {
174
210
  stdio: ['ignore', 'pipe', 'pipe']
175
211
  });
176
212
  let stdout = '';
@@ -213,7 +249,9 @@ async function runWithRetry({
213
249
  backoffMs,
214
250
  journalPath,
215
251
  phase,
216
- chunkId
252
+ chunkId,
253
+ beforeAttempt,
254
+ postAttempt
217
255
  }) {
218
256
  const attempts = maxRetries + 1;
219
257
  let lastResult = null;
@@ -227,20 +265,45 @@ async function runWithRetry({
227
265
  attempts,
228
266
  timeoutMs
229
267
  });
268
+ if (typeof beforeAttempt === 'function') {
269
+ await beforeAttempt({ attempt });
270
+ }
230
271
  const result = await runCommandOnce({ command, timeoutMs });
231
- lastResult = result;
272
+ let finalResult = result;
273
+
274
+ if (finalResult.ok && typeof postAttempt === 'function') {
275
+ const postAttemptResult = await postAttempt({ attempt });
276
+ if (!postAttemptResult.ok) {
277
+ const validationMessage = String(postAttemptResult.errorMessage || 'post-attempt validation failed');
278
+ appendJournal(journalPath, {
279
+ event: 'attempt-post-check-failed',
280
+ phase,
281
+ chunkId,
282
+ attempt,
283
+ errorMessage: validationMessage.slice(0, 500)
284
+ });
285
+ finalResult = {
286
+ ...finalResult,
287
+ ok: false,
288
+ stderr: validationMessage
289
+ };
290
+ }
291
+ }
292
+
232
293
  appendJournal(journalPath, {
233
294
  event: 'attempt-end',
234
295
  phase,
235
296
  chunkId,
236
297
  attempt,
237
- ok: result.ok,
238
- code: result.code,
239
- timeoutHit: result.timeoutHit,
240
- stderr: result.stderr.slice(0, 500)
298
+ ok: finalResult.ok,
299
+ code: finalResult.code,
300
+ timeoutHit: finalResult.timeoutHit,
301
+ stderr: finalResult.stderr.slice(0, 500)
241
302
  });
242
- if (result.ok) {
243
- return { ok: true, result, attemptsUsed: attempt };
303
+
304
+ lastResult = finalResult;
305
+ if (finalResult.ok) {
306
+ return { ok: true, result: finalResult, attemptsUsed: attempt };
244
307
  }
245
308
  if (attempt < attempts) {
246
309
  const delayMs = backoffMs * 2 ** (attempt - 1);
@@ -378,8 +441,8 @@ function buildConsistencyReport({ bugLedger, confidenceThreshold }) {
378
441
  }
379
442
 
380
443
  const lowConfidence = bugLedger.filter((entry) => {
381
- const confidence = entry.confidence;
382
- return confidence === null || confidence === undefined || Number(confidence) < confidenceThreshold;
444
+ const confidenceScore = entry.confidenceScore;
445
+ return confidenceScore === null || confidenceScore === undefined || Number(confidenceScore) < confidenceThreshold;
383
446
  }).length;
384
447
 
385
448
  return {
@@ -391,30 +454,69 @@ function buildConsistencyReport({ bugLedger, confidenceThreshold }) {
391
454
  };
392
455
  }
393
456
 
394
- function buildFixPlan({ bugLedger, confidenceThreshold, canarySize }) {
395
- const withConfidence = bugLedger.map((entry) => {
396
- const confidenceRaw = entry.confidence;
397
- const confidence = Number.isFinite(Number(confidenceRaw)) ? Number(confidenceRaw) : null;
457
+ function buildConflictSets(consistency) {
458
+ const conflicts = toArray(consistency && consistency.conflicts);
459
+ const bugIds = new Set();
460
+ const locations = new Set();
461
+
462
+ for (const conflict of conflicts) {
463
+ if (conflict && conflict.type === 'bug-id-reused' && conflict.bugId) {
464
+ bugIds.add(String(conflict.bugId));
465
+ }
466
+ if (conflict && conflict.type === 'location-claim-conflict' && conflict.location) {
467
+ locations.add(String(conflict.location));
468
+ }
469
+ }
470
+
471
+ return { bugIds, locations };
472
+ }
473
+
474
+ function applyConflictClassification(entry, classification, conflictSets) {
475
+ const bugId = String(entry.bugId || '').trim();
476
+ const location = `${entry.file || ''}|${entry.lines || ''}`;
477
+ const hasConflict = conflictSets.bugIds.has(bugId) || conflictSets.locations.has(location);
478
+ if (!hasConflict) {
479
+ return classification;
480
+ }
481
+ return {
482
+ strategy: 'manual-review',
483
+ executionStage: 'manual-review',
484
+ autofixEligible: false,
485
+ reason: 'Consistency conflict requires manual review before any fix is attempted.'
486
+ };
487
+ }
488
+
489
+ function buildFixPlan({ bugLedger, confidenceThreshold, canarySize, consistency }) {
490
+ const conflictSets = buildConflictSets(consistency);
491
+ const classifiedEntries = bugLedger.map((entry) => {
492
+ const confidenceRaw = entry.confidenceScore;
493
+ const confidenceScore = Number.isFinite(Number(confidenceRaw)) ? Number(confidenceRaw) : null;
494
+ const classification = applyConflictClassification(
495
+ entry,
496
+ classifyStrategy({ ...entry, confidenceScore }, confidenceThreshold),
497
+ conflictSets
498
+ );
398
499
  return {
399
500
  ...entry,
400
- confidence
501
+ confidenceScore,
502
+ ...classification
401
503
  };
402
504
  });
403
- const eligible = withConfidence
404
- .filter((entry) => entry.confidence !== null && entry.confidence >= confidenceThreshold)
505
+ const eligible = classifiedEntries
506
+ .filter((entry) => entry.autofixEligible === true)
405
507
  .sort((left, right) => {
406
508
  const severityDiff = severityRank(right.severity) - severityRank(left.severity);
407
509
  if (severityDiff !== 0) {
408
510
  return severityDiff;
409
511
  }
410
- const confidenceDiff = (right.confidence || 0) - (left.confidence || 0);
512
+ const confidenceDiff = (right.confidenceScore || 0) - (left.confidenceScore || 0);
411
513
  if (confidenceDiff !== 0) {
412
514
  return confidenceDiff;
413
515
  }
414
516
  return String(left.key).localeCompare(String(right.key));
415
517
  });
416
- const manualReview = withConfidence
417
- .filter((entry) => entry.confidence === null || entry.confidence < confidenceThreshold);
518
+ const manualReview = classifiedEntries
519
+ .filter((entry) => entry.autofixEligible !== true);
418
520
  const canary = eligible.slice(0, canarySize);
419
521
  const rollout = eligible.slice(canarySize);
420
522
 
@@ -423,7 +525,7 @@ function buildFixPlan({ bugLedger, confidenceThreshold, canarySize }) {
423
525
  confidenceThreshold,
424
526
  canarySize,
425
527
  totals: {
426
- findings: withConfidence.length,
528
+ findings: classifiedEntries.length,
427
529
  eligible: eligible.length,
428
530
  canary: canary.length,
429
531
  rollout: rollout.length,
@@ -435,6 +537,431 @@ function buildFixPlan({ bugLedger, confidenceThreshold, canarySize }) {
435
537
  };
436
538
  }
437
539
 
540
+ function classifyStrategy(entry, confidenceThreshold) {
541
+ const confidenceScore = Number.isFinite(Number(entry.confidenceScore)) ? Number(entry.confidenceScore) : null;
542
+ const claim = String(entry.claim || '').toLowerCase();
543
+ const crossReferences = toArray(entry.crossReferences);
544
+ const architecturalSignals = ['architecture', 'migration', 'schema', 'contract', 'signature', 'protocol'];
545
+ const refactorSignals = ['refactor', 'transaction', 'concurrency', 'race', 'lock ordering'];
546
+
547
+ if (confidenceScore === null || confidenceScore < confidenceThreshold) {
548
+ return {
549
+ strategy: 'manual-review',
550
+ executionStage: 'manual-review',
551
+ autofixEligible: false,
552
+ reason: 'Confidence is below the autofix threshold.'
553
+ };
554
+ }
555
+
556
+ if (architecturalSignals.some((signal) => claim.includes(signal)) || crossReferences.length >= 3) {
557
+ return {
558
+ strategy: 'architectural-remediation',
559
+ executionStage: 'report-only',
560
+ autofixEligible: false,
561
+ reason: 'Claim spans broader contracts or architecture boundaries.'
562
+ };
563
+ }
564
+
565
+ if (refactorSignals.some((signal) => claim.includes(signal)) || severityRank(entry.severity) >= 2 && crossReferences.length >= 2) {
566
+ return {
567
+ strategy: 'larger-refactor',
568
+ executionStage: 'manual-review',
569
+ autofixEligible: false,
570
+ reason: 'Fix likely needs coordinated multi-file changes beyond a surgical patch.'
571
+ };
572
+ }
573
+
574
+ return {
575
+ strategy: 'safe-autofix',
576
+ executionStage: severityRank(entry.severity) >= 2 ? 'canary' : 'rollout',
577
+ autofixEligible: true,
578
+ reason: 'Finding is localized enough for a guarded surgical fix.'
579
+ };
580
+ }
581
+
582
+ function recommendedActionForStrategy(strategy) {
583
+ if (strategy === 'architectural-remediation') {
584
+ return 'Do not auto-edit. Capture a remediation design and schedule a broader change.';
585
+ }
586
+ if (strategy === 'larger-refactor') {
587
+ return 'Pause before patching. Review interfaces, callers, and rollback scope with a human.';
588
+ }
589
+ if (strategy === 'manual-review') {
590
+ return 'Keep this in the report and require human approval before any edits.';
591
+ }
592
+ return 'Proceed through the guarded fix pipeline with canary verification and rollback safety.';
593
+ }
594
+
595
+ function buildFixStrategy({ bugLedger, confidenceThreshold, consistency }) {
596
+ const conflictSets = buildConflictSets(consistency);
597
+ const normalized = bugLedger.map((entry) => {
598
+ const confidenceScore = Number.isFinite(Number(entry.confidenceScore)) ? Number(entry.confidenceScore) : null;
599
+ const classification = applyConflictClassification(
600
+ entry,
601
+ classifyStrategy({ ...entry, confidenceScore }, confidenceThreshold),
602
+ conflictSets
603
+ );
604
+ const filePath = String(entry.file || '').trim() || 'unknown-file';
605
+ const clusterDir = path.dirname(filePath);
606
+ const clusterSeed = `${classification.strategy}|${classification.executionStage}|${clusterDir}`;
607
+ return {
608
+ ...entry,
609
+ confidenceScore,
610
+ file: filePath,
611
+ clusterDir,
612
+ clusterSeed,
613
+ ...classification
614
+ };
615
+ });
616
+
617
+ const byCluster = new Map();
618
+ for (const entry of normalized) {
619
+ if (!byCluster.has(entry.clusterSeed)) {
620
+ byCluster.set(entry.clusterSeed, []);
621
+ }
622
+ byCluster.get(entry.clusterSeed).push(entry);
623
+ }
624
+
625
+ const clusters = [...byCluster.entries()].map(([clusterSeed, entries], index) => {
626
+ const strategy = entries[0].strategy;
627
+ const executionStage = entries[0].executionStage;
628
+ const files = [...new Set(entries.map((entry) => entry.file))].sort();
629
+ const bugIds = [...new Set(entries.map((entry) => String(entry.bugId || entry.key || '').trim()).filter(Boolean))];
630
+ const maxSeverity = entries
631
+ .map((entry) => entry.severity)
632
+ .sort((left, right) => severityRank(right) - severityRank(left))[0] || 'LOW';
633
+ const reasons = [...new Set(entries.map((entry) => entry.reason).filter(Boolean))];
634
+ const firstDir = entries[0].clusterDir || path.dirname(files[0] || 'unknown-file');
635
+ return {
636
+ clusterId: `cluster-${index + 1}`,
637
+ strategy,
638
+ executionStage,
639
+ autofixEligible: entries.every((entry) => entry.autofixEligible),
640
+ bugIds,
641
+ files,
642
+ maxSeverity,
643
+ summary: `${bugIds.length} bug(s) in ${firstDir || '.'} classified as ${strategy}.`,
644
+ recommendedAction: recommendedActionForStrategy(strategy),
645
+ reasons
646
+ };
647
+ }).sort((left, right) => {
648
+ const stageRank = {
649
+ canary: 0,
650
+ rollout: 1,
651
+ 'manual-review': 2,
652
+ 'report-only': 3
653
+ };
654
+ const stageDiff = stageRank[left.executionStage] - stageRank[right.executionStage];
655
+ if (stageDiff !== 0) {
656
+ return stageDiff;
657
+ }
658
+ return severityRank(right.maxSeverity) - severityRank(left.maxSeverity);
659
+ });
660
+
661
+ const summary = {
662
+ confirmed: normalized.length,
663
+ safeAutofix: normalized.filter((entry) => entry.strategy === 'safe-autofix').length,
664
+ manualReview: normalized.filter((entry) => entry.strategy === 'manual-review').length,
665
+ largerRefactor: normalized.filter((entry) => entry.strategy === 'larger-refactor').length,
666
+ architecturalRemediation: normalized.filter((entry) => entry.strategy === 'architectural-remediation').length,
667
+ canaryCandidates: normalized.filter((entry) => entry.executionStage === 'canary').length,
668
+ rolloutCandidates: normalized.filter((entry) => entry.executionStage === 'rollout').length
669
+ };
670
+
671
+ return {
672
+ version: '3.1.0',
673
+ generatedAt: nowIso(),
674
+ confidenceThreshold,
675
+ summary,
676
+ clusters
677
+ };
678
+ }
679
+
680
+ function toCoverageStatus(chunkStatus) {
681
+ if (chunkStatus === 'done') {
682
+ return 'done';
683
+ }
684
+ if (chunkStatus === 'in_progress') {
685
+ return 'in_progress';
686
+ }
687
+ if (chunkStatus === 'failed') {
688
+ return 'failed';
689
+ }
690
+ return 'pending';
691
+ }
692
+
693
+ function buildCoverageArtifact({ state, fixPlan }) {
694
+ const fileEntries = toArray(state.chunks).flatMap((chunk) => {
695
+ return toArray(chunk.files).map((filePath) => {
696
+ return {
697
+ path: String(filePath),
698
+ status: toCoverageStatus(chunk.status)
699
+ };
700
+ });
701
+ });
702
+
703
+ const bugs = toArray(state.bugLedger).map((entry) => {
704
+ return {
705
+ bugId: String(entry.bugId || '').trim() || String(entry.key || '').trim(),
706
+ severity: String(entry.severity || 'Low'),
707
+ file: String(entry.file || '').trim(),
708
+ claim: String(entry.claim || '').trim()
709
+ };
710
+ });
711
+
712
+ const fixStatusByBugId = new Map();
713
+ for (const entry of toArray(fixPlan && fixPlan.canary)) {
714
+ fixStatusByBugId.set(String(entry.bugId || '').trim(), 'CANARY');
715
+ }
716
+ for (const entry of toArray(fixPlan && fixPlan.rollout)) {
717
+ fixStatusByBugId.set(String(entry.bugId || '').trim(), 'ROLLOUT');
718
+ }
719
+ for (const entry of toArray(fixPlan && fixPlan.manualReview)) {
720
+ fixStatusByBugId.set(String(entry.bugId || '').trim(), 'MANUAL_REVIEW');
721
+ }
722
+
723
+ const fixes = [...fixStatusByBugId.entries()]
724
+ .filter(([bugId]) => Boolean(bugId))
725
+ .map(([bugId, status]) => {
726
+ return {
727
+ bugId,
728
+ status
729
+ };
730
+ });
731
+
732
+ const hasOpenChunks = toArray(state.chunks).some((chunk) => chunk.status !== 'done');
733
+
734
+ return {
735
+ schemaVersion: 1,
736
+ iteration: 1,
737
+ status: hasOpenChunks ? 'IN_PROGRESS' : 'COMPLETE',
738
+ files: fileEntries,
739
+ bugs,
740
+ fixes
741
+ };
742
+ }
743
+
744
+ function renderCoverageMarkdown(coverage) {
745
+ const lines = [
746
+ '# Bug Hunter Coverage',
747
+ '',
748
+ `- Status: ${coverage.status}`,
749
+ `- Iteration: ${coverage.iteration}`,
750
+ `- Files: ${coverage.files.length}`,
751
+ `- Bugs: ${coverage.bugs.length}`,
752
+ `- Fix entries: ${coverage.fixes.length}`,
753
+ '',
754
+ '## Files'
755
+ ];
756
+
757
+ if (coverage.files.length === 0) {
758
+ lines.push('- None');
759
+ } else {
760
+ for (const entry of coverage.files) {
761
+ lines.push(`- ${entry.status} | ${entry.path}`);
762
+ }
763
+ }
764
+
765
+ lines.push('', '## Bugs');
766
+ if (coverage.bugs.length === 0) {
767
+ lines.push('- None');
768
+ } else {
769
+ for (const bug of coverage.bugs) {
770
+ lines.push(`- ${bug.bugId} | ${bug.severity} | ${bug.file} | ${bug.claim}`);
771
+ }
772
+ }
773
+
774
+ lines.push('', '## Fixes');
775
+ if (coverage.fixes.length === 0) {
776
+ lines.push('- None');
777
+ } else {
778
+ for (const fix of coverage.fixes) {
779
+ lines.push(`- ${fix.bugId} | ${fix.status}`);
780
+ }
781
+ }
782
+
783
+ return `${lines.join('\n')}\n`;
784
+ }
785
+
786
+ function validateFindingsArtifact(findingsJsonPath) {
787
+ if (!fs.existsSync(findingsJsonPath)) {
788
+ return {
789
+ ok: false,
790
+ errors: [`Missing findings artifact: ${findingsJsonPath}`]
791
+ };
792
+ }
793
+ return validateArtifactFile({
794
+ artifactName: 'findings',
795
+ filePath: findingsJsonPath
796
+ });
797
+ }
798
+
799
+ function validateNamedArtifact({ artifactName, filePath }) {
800
+ if (!fs.existsSync(filePath)) {
801
+ return {
802
+ ok: false,
803
+ errors: [`Missing ${artifactName} artifact: ${filePath}`]
804
+ };
805
+ }
806
+ return validateArtifactFile({
807
+ artifactName,
808
+ filePath
809
+ });
810
+ }
811
+
812
+ function removeFileIfExists(filePath) {
813
+ if (!filePath) {
814
+ return;
815
+ }
816
+ if (fs.existsSync(filePath)) {
817
+ fs.unlinkSync(filePath);
818
+ }
819
+ }
820
+
821
+ async function runPhase(options) {
822
+ const artifact = String(options.artifact || '').trim();
823
+ if (!artifact) {
824
+ throw new Error('--artifact is required for phase command');
825
+ }
826
+ if (!options['output-path']) {
827
+ throw new Error('--output-path is required for phase command');
828
+ }
829
+ if (!options['worker-cmd']) {
830
+ throw new Error('--worker-cmd is required for phase command');
831
+ }
832
+
833
+ const skillDir = resolveSkillDir(options);
834
+ const preflightResult = preflight(options);
835
+ if (!preflightResult.ok) {
836
+ throw new Error(`Missing helper scripts: ${preflightResult.missing.join(', ')}`);
837
+ }
838
+
839
+ const phaseName = options['phase-name'] || artifact;
840
+ const outputPath = path.resolve(options['output-path']);
841
+ const renderOutputPath = options['render-output-path']
842
+ ? path.resolve(options['render-output-path'])
843
+ : null;
844
+ const workerCmdTemplate = options['worker-cmd'];
845
+ const renderCmdTemplate = options['render-cmd'] || null;
846
+ const timeoutMs = toPositiveInt(options['timeout-ms'], DEFAULT_TIMEOUT_MS);
847
+ const renderTimeoutMs = toPositiveInt(options['render-timeout-ms'], timeoutMs);
848
+ const maxRetries = toPositiveInt(options['max-retries'], DEFAULT_MAX_RETRIES);
849
+ const backoffMs = toPositiveInt(options['backoff-ms'], DEFAULT_BACKOFF_MS);
850
+ const journalPath = path.resolve(
851
+ options['journal-path'] || path.join(path.dirname(outputPath), `${phaseName}.log`)
852
+ );
853
+ const templateVariables = {
854
+ artifact,
855
+ outputPath,
856
+ outputFilePath: outputPath,
857
+ renderOutputPath: renderOutputPath || '',
858
+ journalPath,
859
+ phaseName,
860
+ skillDir
861
+ };
862
+
863
+ ensureDir(path.dirname(outputPath));
864
+ if (renderOutputPath) {
865
+ ensureDir(path.dirname(renderOutputPath));
866
+ }
867
+ removeFileIfExists(outputPath);
868
+ removeFileIfExists(renderOutputPath);
869
+
870
+ appendJournal(journalPath, {
871
+ event: 'phase-start',
872
+ artifact,
873
+ phase: phaseName,
874
+ outputPath,
875
+ renderOutputPath
876
+ });
877
+
878
+ const workerCommand = fillTemplate(workerCmdTemplate, templateVariables);
879
+ const runResult = await runWithRetry({
880
+ command: workerCommand,
881
+ timeoutMs,
882
+ maxRetries,
883
+ backoffMs,
884
+ journalPath,
885
+ phase: phaseName,
886
+ chunkId: artifact,
887
+ beforeAttempt: async () => {
888
+ removeFileIfExists(outputPath);
889
+ removeFileIfExists(renderOutputPath);
890
+ },
891
+ postAttempt: async () => {
892
+ const validation = validateNamedArtifact({
893
+ artifactName: artifact,
894
+ filePath: outputPath
895
+ });
896
+ if (validation.ok) {
897
+ return { ok: true };
898
+ }
899
+ return {
900
+ ok: false,
901
+ errorMessage: validation.errors.join('; ')
902
+ };
903
+ }
904
+ });
905
+
906
+ if (!runResult.ok) {
907
+ const errorMessage = (runResult.result && runResult.result.stderr) || `${phaseName} failed`;
908
+ appendJournal(journalPath, {
909
+ event: 'phase-failed',
910
+ artifact,
911
+ phase: phaseName,
912
+ errorMessage: errorMessage.slice(0, 500)
913
+ });
914
+ throw new Error(errorMessage);
915
+ }
916
+
917
+ if (renderCmdTemplate) {
918
+ const renderCommand = fillTemplate(renderCmdTemplate, templateVariables);
919
+ appendJournal(journalPath, {
920
+ event: 'phase-render-start',
921
+ artifact,
922
+ phase: phaseName,
923
+ renderOutputPath
924
+ });
925
+ const renderResult = await runCommandOnce({
926
+ command: renderCommand,
927
+ timeoutMs: renderTimeoutMs
928
+ });
929
+ if (!renderResult.ok) {
930
+ const renderError = renderResult.stderr || renderResult.stdout || `${phaseName} render failed`;
931
+ appendJournal(journalPath, {
932
+ event: 'phase-render-failed',
933
+ artifact,
934
+ phase: phaseName,
935
+ errorMessage: renderError.slice(0, 500)
936
+ });
937
+ throw new Error(renderError);
938
+ }
939
+ appendJournal(journalPath, {
940
+ event: 'phase-render-end',
941
+ artifact,
942
+ phase: phaseName,
943
+ renderOutputPath
944
+ });
945
+ }
946
+
947
+ appendJournal(journalPath, {
948
+ event: 'phase-end',
949
+ artifact,
950
+ phase: phaseName,
951
+ attemptsUsed: runResult.attemptsUsed
952
+ });
953
+
954
+ return {
955
+ ok: true,
956
+ artifact,
957
+ phase: phaseName,
958
+ outputPath,
959
+ renderOutputPath,
960
+ journalPath,
961
+ attemptsUsed: runResult.attemptsUsed
962
+ };
963
+ }
964
+
438
965
  function loadIndex(indexPath) {
439
966
  if (!indexPath || !fs.existsSync(indexPath)) {
440
967
  return null;
@@ -513,7 +1040,21 @@ async function processPendingChunks({
513
1040
  backoffMs,
514
1041
  journalPath,
515
1042
  phase: 'chunk-worker',
516
- chunkId: chunk.id
1043
+ chunkId: chunk.id,
1044
+ beforeAttempt: async () => {
1045
+ removeFileIfExists(findingsJsonPath);
1046
+ removeFileIfExists(factsJsonPath);
1047
+ },
1048
+ postAttempt: async () => {
1049
+ const findingsValidation = validateFindingsArtifact(findingsJsonPath);
1050
+ if (findingsValidation.ok) {
1051
+ return { ok: true };
1052
+ }
1053
+ return {
1054
+ ok: false,
1055
+ errorMessage: findingsValidation.errors.join('; ')
1056
+ };
1057
+ }
517
1058
  });
518
1059
 
519
1060
  if (!runResult.ok) {
@@ -531,10 +1072,8 @@ async function processPendingChunks({
531
1072
  }
532
1073
 
533
1074
  let findings = [];
534
- if (fs.existsSync(findingsJsonPath)) {
535
- runJsonScript(stateScript, ['record-findings', statePath, findingsJsonPath, 'orchestrator']);
536
- findings = readJson(findingsJsonPath);
537
- }
1075
+ runJsonScript(stateScript, ['record-findings', statePath, findingsJsonPath, 'orchestrator']);
1076
+ findings = readJson(findingsJsonPath);
538
1077
 
539
1078
  if (fs.existsSync(factsJsonPath)) {
540
1079
  runJsonScript(stateScript, ['record-fact-card', statePath, chunk.id, factsJsonPath]);
@@ -662,6 +1201,10 @@ async function runPipeline(options) {
662
1201
  const chunksDir = path.resolve(path.dirname(statePath), 'chunks');
663
1202
  const consistencyReportPath = path.resolve(options['consistency-report'] || path.join(path.dirname(statePath), 'consistency.json'));
664
1203
  const fixPlanPath = path.resolve(options['fix-plan-path'] || path.join(path.dirname(statePath), 'fix-plan.json'));
1204
+ const strategyPath = path.resolve(options['strategy-path'] || path.join(path.dirname(statePath), 'fix-strategy.json'));
1205
+ const strategyMarkdownPath = path.resolve(options['strategy-markdown-path'] || path.join(path.dirname(statePath), 'fix-strategy.md'));
1206
+ const coveragePath = path.resolve(options['coverage-path'] || path.join(path.dirname(statePath), 'coverage.json'));
1207
+ const coverageMarkdownPath = path.resolve(options['coverage-markdown-path'] || path.join(path.dirname(statePath), 'coverage.md'));
665
1208
  const factsPath = path.resolve(options['facts-path'] || path.join(path.dirname(statePath), 'bug-hunter-facts.json'));
666
1209
  ensureDir(chunksDir);
667
1210
 
@@ -709,7 +1252,7 @@ async function runPipeline(options) {
709
1252
  const state = readJson(statePath);
710
1253
  const lowConfidenceFiles = normalizeFiles(state.bugLedger
711
1254
  .filter((entry) => {
712
- return entry.confidence === null || entry.confidence === undefined || Number(entry.confidence) < confidenceThreshold;
1255
+ return entry.confidenceScore === null || entry.confidenceScore === undefined || Number(entry.confidenceScore) < confidenceThreshold;
713
1256
  })
714
1257
  .map((entry) => entry.file));
715
1258
  if (lowConfidenceFiles.length > 0 && scope.indexPath) {
@@ -773,14 +1316,96 @@ async function runPipeline(options) {
773
1316
  writeJson(consistencyReportPath, consistency);
774
1317
  runJsonScript(stateScript, ['set-consistency', statePath, consistencyReportPath]);
775
1318
 
1319
+ const hasOpenOrFailedChunks = (status.summary.chunkStatus.pending || 0) > 0
1320
+ || (status.summary.chunkStatus.inProgress || 0) > 0
1321
+ || (status.summary.chunkStatus.failed || 0) > 0;
1322
+
1323
+ if (hasOpenOrFailedChunks) {
1324
+ appendJournal(journalPath, {
1325
+ event: 'fix-planning-skipped',
1326
+ reason: 'incomplete-or-failed-chunks',
1327
+ chunkStatus: status.summary.chunkStatus
1328
+ });
1329
+
1330
+ return {
1331
+ ok: true,
1332
+ backend,
1333
+ journalPath,
1334
+ statePath,
1335
+ indexPath: scope.indexPath,
1336
+ deltaMode: scope.deltaMode,
1337
+ deltaSummary: scope.deltaResult ? {
1338
+ selectedCount: (scope.deltaResult.selected || []).length,
1339
+ expansionCandidatesCount: (scope.deltaResult.expansionCandidates || []).length
1340
+ } : null,
1341
+ consistencyReportPath,
1342
+ strategyPath: null,
1343
+ strategyMarkdownPath: null,
1344
+ fixPlanPath: null,
1345
+ coveragePath: null,
1346
+ coverageMarkdownPath: null,
1347
+ factsPath,
1348
+ status: status.summary,
1349
+ consistency: {
1350
+ conflicts: consistency.conflicts.length,
1351
+ lowConfidenceFindings: consistency.lowConfidenceFindings
1352
+ },
1353
+ fixStrategy: null,
1354
+ fixPlan: null
1355
+ };
1356
+ }
1357
+
1358
+ const fixStrategy = buildFixStrategy({
1359
+ bugLedger: toArray(finalState.bugLedger),
1360
+ confidenceThreshold,
1361
+ consistency
1362
+ });
1363
+ const fixStrategyValidation = validateArtifactValue({
1364
+ artifactName: 'fix-strategy',
1365
+ value: fixStrategy
1366
+ });
1367
+ if (!fixStrategyValidation.ok) {
1368
+ throw new Error(`Generated invalid fix strategy artifact: ${fixStrategyValidation.errors.join('; ')}`);
1369
+ }
1370
+ writeJson(strategyPath, fixStrategy);
1371
+ ensureDir(path.dirname(strategyMarkdownPath));
1372
+ fs.writeFileSync(
1373
+ strategyMarkdownPath,
1374
+ runTextScript(path.join(skillDir, 'scripts', 'render-report.cjs'), ['fix-strategy', strategyPath]),
1375
+ 'utf8'
1376
+ );
1377
+
776
1378
  const fixPlan = buildFixPlan({
777
1379
  bugLedger: toArray(finalState.bugLedger),
778
1380
  confidenceThreshold,
779
- canarySize
1381
+ canarySize,
1382
+ consistency
780
1383
  });
1384
+ const fixPlanValidation = validateArtifactValue({
1385
+ artifactName: 'fix-plan',
1386
+ value: fixPlan
1387
+ });
1388
+ if (!fixPlanValidation.ok) {
1389
+ throw new Error(`Generated invalid fix plan artifact: ${fixPlanValidation.errors.join('; ')}`);
1390
+ }
781
1391
  writeJson(fixPlanPath, fixPlan);
782
1392
  runJsonScript(stateScript, ['set-fix-plan', statePath, fixPlanPath]);
783
1393
 
1394
+ const coverage = buildCoverageArtifact({
1395
+ state: finalState,
1396
+ fixPlan
1397
+ });
1398
+ const coverageValidation = validateArtifactValue({
1399
+ artifactName: 'coverage',
1400
+ value: coverage
1401
+ });
1402
+ if (!coverageValidation.ok) {
1403
+ throw new Error(`Generated invalid coverage artifact: ${coverageValidation.errors.join('; ')}`);
1404
+ }
1405
+ writeJson(coveragePath, coverage);
1406
+ ensureDir(path.dirname(coverageMarkdownPath));
1407
+ fs.writeFileSync(coverageMarkdownPath, renderCoverageMarkdown(coverage), 'utf8');
1408
+
784
1409
  writeJson(factsPath, finalState.factCards || {});
785
1410
 
786
1411
  appendJournal(journalPath, {
@@ -802,13 +1427,18 @@ async function runPipeline(options) {
802
1427
  expansionCandidatesCount: (scope.deltaResult.expansionCandidates || []).length
803
1428
  } : null,
804
1429
  consistencyReportPath,
1430
+ strategyPath,
1431
+ strategyMarkdownPath,
805
1432
  fixPlanPath,
1433
+ coveragePath,
1434
+ coverageMarkdownPath,
806
1435
  factsPath,
807
1436
  status: status.summary,
808
1437
  consistency: {
809
1438
  conflicts: consistency.conflicts.length,
810
1439
  lowConfidenceFindings: consistency.lowConfidenceFindings
811
1440
  },
1441
+ fixStrategy: fixStrategy.summary,
812
1442
  fixPlan: fixPlan.totals
813
1443
  };
814
1444
  }
@@ -835,6 +1465,12 @@ async function main() {
835
1465
  return;
836
1466
  }
837
1467
 
1468
+ if (command === 'phase') {
1469
+ const result = await runPhase(options);
1470
+ console.log(JSON.stringify(result, null, 2));
1471
+ return;
1472
+ }
1473
+
838
1474
  if (command === 'plan') {
839
1475
  if (!options['files-json']) {
840
1476
  throw new Error('--files-json is required for plan command');