auditor-lambda 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +2 -9
  2. package/audit-code-wrapper-lib.mjs +19 -915
  3. package/dispatch/merge-results.mjs +1 -1
  4. package/dist/cli/auditStep.d.ts +1 -33
  5. package/dist/cli/dispatch.d.ts +47 -0
  6. package/dist/cli/dispatch.js +116 -1
  7. package/dist/cli/mergeAndIngestCommand.js +55 -8
  8. package/dist/cli/nextStepCommand.js +43 -15
  9. package/dist/cli/prompts.d.ts +2 -0
  10. package/dist/cli/prompts.js +9 -0
  11. package/dist/cli/reviewRun.js +1 -1
  12. package/dist/cli/runToCompletion.js +21 -8
  13. package/dist/cli/semanticReviewStep.js +12 -1
  14. package/dist/cli/steps.d.ts +15 -0
  15. package/dist/cli.js +1 -8
  16. package/dist/io/artifacts.d.ts +9 -1
  17. package/dist/io/artifacts.js +7 -0
  18. package/dist/io/runArtifacts.d.ts +14 -0
  19. package/dist/io/runArtifacts.js +23 -0
  20. package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
  21. package/dist/orchestrator/designReviewPrompt.js +43 -2
  22. package/dist/orchestrator/executorResult.d.ts +25 -0
  23. package/dist/orchestrator/intakeExecutors.d.ts +19 -1
  24. package/dist/orchestrator/intakeExecutors.js +89 -3
  25. package/dist/orchestrator/nextStep.d.ts +1 -0
  26. package/dist/orchestrator/nextStep.js +1 -1
  27. package/dist/orchestrator/state.js +8 -1
  28. package/dist/providers/constants.d.ts +1 -1
  29. package/dist/providers/constants.js +1 -1
  30. package/dist/reporting/synthesis.d.ts +8 -0
  31. package/dist/reporting/synthesis.js +16 -1
  32. package/dist/supervisor/operatorHandoff.js +8 -1
  33. package/dist/types/auditScope.d.ts +16 -2
  34. package/dist/validation/sessionConfig.js +35 -0
  35. package/docs/contracts.md +0 -16
  36. package/docs/operator-guide.md +6 -8
  37. package/package.json +1 -1
  38. package/schemas/audit_findings.schema.json +1 -0
  39. package/scripts/postinstall.mjs +0 -174
  40. package/skills/audit-code/SKILL.md +17 -1
  41. package/skills/audit-code/audit-code.prompt.md +25 -0
  42. package/dist/mcp/server.d.ts +0 -72
  43. package/dist/mcp/server.js +0 -765
@@ -15,7 +15,7 @@ const artifactsDir = artifactsDirIdx !== -1 && process.argv[artifactsDirIdx + 1]
15
15
  : join(process.cwd(), ".audit-artifacts");
16
16
 
17
17
  const taskResultsDir = join(artifactsDir, "runs", runId, "task-results");
18
- const auditResultsPath = join(artifactsDir, "runs", runId, "audit-results.json");
18
+ const auditResultsPath = join(artifactsDir, "runs", runId, "run-results.json");
19
19
  const failedTasksPath = join(artifactsDir, "runs", runId, "failed-tasks.json");
20
20
  const tasksPath = join(artifactsDir, "runs", runId, "pending-audit-tasks.json");
21
21
 
@@ -1,7 +1,4 @@
1
- import type { AuditResult } from "../types.js";
2
1
  import type { AnalyzerSetting } from "@audit-tools/shared";
3
- import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
4
- import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
5
2
  export declare function runAuditStep(options: {
6
3
  root: string;
7
4
  artifactsDir: string;
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
23
20
  batchDir: string;
24
21
  }): Promise<{
25
22
  batchFiles: string[];
26
- bundle: Partial<{
27
- repo_manifest: import("../types.js").RepoManifest;
28
- file_disposition: import("@audit-tools/shared").FileDisposition;
29
- auto_fixes_applied: unknown;
30
- unit_manifest: import("../types.js").UnitManifest;
31
- graph_bundle: import("@audit-tools/shared").GraphBundle;
32
- surface_manifest: import("@audit-tools/shared").SurfaceManifest;
33
- critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
34
- flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
35
- risk_register: import("@audit-tools/shared").RiskRegister;
36
- design_assessment: import("../types/designAssessment.js").DesignAssessment;
37
- analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
38
- scope: import("../types/auditScope.js").AuditScopeManifest;
39
- coverage_matrix: import("../types.js").CoverageMatrix;
40
- runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
41
- runtime_validation_report: RuntimeValidationReport;
42
- external_analyzer_results: ExternalAnalyzerResults;
43
- syntax_resolution_status: unknown;
44
- audit_results: AuditResult[];
45
- audit_tasks: import("../types.js").AuditTask[];
46
- audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
47
- review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
48
- requeue_tasks: import("../types.js").AuditTask[];
49
- audit_report: string;
50
- audit_findings: import("@audit-tools/shared").AuditFindingsReport;
51
- synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
52
- audit_state: import("../types/auditState.js").AuditState;
53
- artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
54
- tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
55
- }>;
23
+ bundle: import("../io/artifacts.js").ArtifactBundle;
56
24
  audit_state: import("../types/auditState.js").AuditState;
57
25
  selected_obligation: string | null;
58
26
  selected_executor: string;
@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
20
  export interface ActiveDispatchState {
21
21
  run_id: string;
22
22
  created_at: string;
23
+ /** Emitted packets only (after canary/budget filtering). */
23
24
  packet_count: number;
25
+ /** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
24
26
  task_count: number;
25
27
  status: "active" | "merged";
28
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
29
+ phase: "canary" | "fan_out";
30
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
31
+ canary_packet_id: string | null;
32
+ /** Total packets that would have been emitted before a budget cap (present only when capped). */
33
+ budget_packet_count?: number;
34
+ /** packet_ids NOT emitted due to the budget cap. */
35
+ deferred_packet_ids?: string[];
36
+ /** task_ids NOT emitted due to the budget cap. */
37
+ deferred_task_ids?: string[];
26
38
  }
27
39
  export interface DispatchResultMapEntry {
28
40
  packet_id: string;
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
34
46
  run_id: string;
35
47
  entries: DispatchResultMapEntry[];
36
48
  }
49
+ export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
50
+ export interface DispatchFanout {
51
+ agent_count: number;
52
+ wave_count: number;
53
+ confirmation_recommended: boolean;
54
+ dispatch_summary: string;
55
+ }
56
+ /**
57
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
58
+ * number of agents (packets emitted this round, after canary/budget filtering)
59
+ * and the resolved wave size, derive the wave count, a human-readable summary,
60
+ * and whether the agent count exceeds the confirmation threshold (default 10).
61
+ * No LLM call, no side effects, no prompting.
62
+ */
63
+ export declare function computeDispatchFanout(params: {
64
+ agentCount: number;
65
+ waveSize: number;
66
+ confirmThreshold?: number;
67
+ }): DispatchFanout;
37
68
  export interface PrepareDispatchResult {
38
69
  run_id: string;
39
70
  dispatch_plan_path: string;
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
43
74
  skipped_task_count: number;
44
75
  /** Subagent parallelism resolved for this dispatch run. */
45
76
  wave_size: number;
77
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
78
+ phase: "canary" | "fan_out";
79
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
80
+ canary_packet_id: string | null;
81
+ /** Total agents that will be launched this run (packet_count after canary/budget). */
82
+ agent_count: number;
83
+ /** ceil(agent_count / max(1, wave_size)). */
84
+ wave_count: number;
85
+ /** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
86
+ confirmation_recommended: boolean;
87
+ /** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
88
+ dispatch_summary: string;
89
+ /** True when a max_packets budget capped the emitted packets this run. */
90
+ budget_capped: boolean;
91
+ /** Number of packets deferred (not emitted) due to the budget cap. */
92
+ deferred_packet_count: number;
46
93
  largest_packet: {
47
94
  packet_id: string;
48
95
  total_lines: number;
@@ -4,6 +4,7 @@ import { isAbsolute, join, relative, resolve } from "node:path";
4
4
  import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
5
5
  import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
6
6
  import { loadArtifactBundle } from "../io/artifacts.js";
7
+ import { writePacketSchemaFiles } from "../io/runArtifacts.js";
7
8
  import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
8
9
  import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
9
10
  import { resolveFreshSessionProviderName } from "../providers/index.js";
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
16
17
  export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
17
18
  export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
18
19
  export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
+ export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
21
+ /**
22
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
23
+ * number of agents (packets emitted this round, after canary/budget filtering)
24
+ * and the resolved wave size, derive the wave count, a human-readable summary,
25
+ * and whether the agent count exceeds the confirmation threshold (default 10).
26
+ * No LLM call, no side effects, no prompting.
27
+ */
28
+ export function computeDispatchFanout(params) {
29
+ const agentCount = params.agentCount;
30
+ const waveSize = params.waveSize;
31
+ const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
32
+ const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
33
+ const confirmationRecommended = agentCount > confirmThreshold;
34
+ const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
35
+ `${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
36
+ return {
37
+ agent_count: agentCount,
38
+ wave_count: waveCount,
39
+ confirmation_recommended: confirmationRecommended,
40
+ dispatch_summary: dispatchSummary,
41
+ };
42
+ }
19
43
  export function dispatchResultMapPath(runDir) {
20
44
  return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
21
45
  }
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
200
224
  const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
201
225
  const lensDefs = await readJsonFile(lensDefsPath);
202
226
  await mkdir(taskResultsDir, { recursive: true });
227
+ // FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
228
+ // it $refs) reachable from this run's task-results directory so packet workers
229
+ // can optionally self-validate before calling submit-packet.
230
+ await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
231
+ // FINDING-011: read the prior dispatch state (if any) so a fan-out round can
232
+ // detect a preceding canary that never produced an accepted result.
233
+ let priorActiveDispatch = null;
234
+ try {
235
+ priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
236
+ }
237
+ catch {
238
+ /* none yet */
239
+ }
203
240
  const priorResultTaskIds = new Set();
204
241
  for (const task of tasks) {
205
242
  if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
@@ -230,6 +267,34 @@ export async function prepareDispatchArtifacts(params) {
230
267
  if (resultPathSet.size !== resultPathByTaskId.size) {
231
268
  throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
232
269
  }
270
+ // Packets come back priority-ordered (high -> medium -> low), so packets[0] is
271
+ // the top-priority packet. Filtering composes in a fixed order: canary first
272
+ // (emit only the top packet on first contact), then the budget cap (top-K).
273
+ //
274
+ // FINDING-011: single-worker canary. On first contact with a multi-packet run,
275
+ // dispatch only the top packet; the held-back packets' tasks keep no result
276
+ // file, so they re-enter `dispatchTasks` on the next call (fan-out).
277
+ const firstContact = priorResultTaskIds.size === 0;
278
+ const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
279
+ const doCanary = firstContact && canaryEnabled && packets.length > 1;
280
+ const canaryPacketId = doCanary ? packets[0].packet_id : null;
281
+ const phase = doCanary ? "canary" : "fan_out";
282
+ const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
283
+ // FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
284
+ // packets at max_packets; the remainder are recorded as DEFERRED and excluded
285
+ // from the completion check so the run can finish honestly under budget.
286
+ // Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
287
+ // precedence: a canary round only emits 1 packet regardless of the budget.
288
+ const maxPackets = sessionConfig.dispatch?.max_packets;
289
+ const budgetCapped = typeof maxPackets === "number" &&
290
+ maxPackets >= 0 &&
291
+ maxPackets < postCanaryPackets.length;
292
+ const emitPackets = budgetCapped
293
+ ? postCanaryPackets.slice(0, maxPackets)
294
+ : postCanaryPackets;
295
+ const deferredPackets = budgetCapped
296
+ ? postCanaryPackets.slice(maxPackets)
297
+ : [];
233
298
  const plan = [];
234
299
  const resultMapEntries = [];
235
300
  for (const task of tasks) {
@@ -245,7 +310,7 @@ export async function prepareDispatchArtifacts(params) {
245
310
  let largestLines = 0;
246
311
  let largestEstimatedTokens = 0;
247
312
  const warnings = [];
248
- for (const packet of packets) {
313
+ for (const packet of emitPackets) {
249
314
  const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
250
315
  const packetTasks = packet.task_ids
251
316
  .map((taskId) => tasksById.get(taskId))
@@ -396,6 +461,11 @@ export async function prepareDispatchArtifacts(params) {
396
461
  "way to record results, and it writes them inside the artifacts directory for you.",
397
462
  "Produce one JSON array containing exactly one AuditResult object for each listed task.",
398
463
  "",
464
+ "Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
465
+ " $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
466
+ "You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
467
+ " the submit command performs the authoritative validation and will report any errors.",
468
+ "",
399
469
  "Required AuditResult fields:",
400
470
  " task_id copy from the task metadata",
401
471
  " unit_id copy from the task metadata",
@@ -524,20 +594,57 @@ export async function prepareDispatchArtifacts(params) {
524
594
  }
525
595
  }
526
596
  }
597
+ // FINDING-011: when advancing past a canary, warn if it never produced an
598
+ // accepted result. submit-packet writes the per-task result file ONLY after
599
+ // validation passes, so presence of that file == ACCEPTED. We map the recorded
600
+ // canary packet_id back to its task ids via the result map and check whether
601
+ // those tasks now have accepted results (i.e. landed in priorResultTaskIds).
602
+ if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
603
+ const canaryAccepted = priorActiveDispatch.run_id === runId
604
+ ? (await loadDispatchResultMap(runDir))?.entries
605
+ .filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
606
+ .every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
607
+ : false;
608
+ if (!canaryAccepted) {
609
+ warnings.push({
610
+ code: "canary_not_accepted",
611
+ message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
612
+ });
613
+ }
614
+ }
527
615
  const warningsPath = warnings.length > 0
528
616
  ? join(runDir, "dispatch-warnings.json")
529
617
  : null;
530
618
  if (warningsPath) {
531
619
  await writeJsonFile(warningsPath, warnings);
532
620
  }
621
+ // FINDING-013: record deferred packets/tasks so the completion obligation can
622
+ // exclude them under a budget cap (present only when actually capped).
623
+ const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
624
+ const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
533
625
  const activeDispatch = {
534
626
  run_id: runId,
535
627
  created_at: new Date().toISOString(),
536
628
  packet_count: plan.length,
537
629
  task_count: orderedTasks.length,
538
630
  status: "active",
631
+ phase,
632
+ canary_packet_id: canaryPacketId,
633
+ ...(budgetCapped
634
+ ? {
635
+ budget_packet_count: postCanaryPackets.length,
636
+ deferred_packet_ids: deferredPacketIds,
637
+ deferred_task_ids: deferredTaskIds,
638
+ }
639
+ : {}),
539
640
  };
540
641
  await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
642
+ // FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
643
+ const fanout = computeDispatchFanout({
644
+ agentCount: plan.length,
645
+ waveSize: waveSchedule.wave_size,
646
+ confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
647
+ });
541
648
  return {
542
649
  run_id: runId,
543
650
  dispatch_plan_path: dispatchPlanPath,
@@ -546,6 +653,14 @@ export async function prepareDispatchArtifacts(params) {
546
653
  task_count: orderedTasks.length,
547
654
  skipped_task_count: priorResultTaskIds.size,
548
655
  wave_size: waveSchedule.wave_size,
656
+ phase,
657
+ canary_packet_id: canaryPacketId,
658
+ agent_count: fanout.agent_count,
659
+ wave_count: fanout.wave_count,
660
+ confirmation_recommended: fanout.confirmation_recommended,
661
+ dispatch_summary: fanout.dispatch_summary,
662
+ budget_capped: budgetCapped,
663
+ deferred_packet_count: deferredPackets.length,
549
664
  largest_packet: largestPacketId
550
665
  ? {
551
666
  packet_id: largestPacketId,
@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
7
7
  import { addFileLineCountHints } from "./lineIndex.js";
8
8
  import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
9
9
  import { buildWorkerResult } from "./workerResult.js";
10
+ import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
11
+ // Schema pointer files prepare-dispatch copies into task-results/ for optional
12
+ // worker self-validation. They are expected, not stray — skip them when
13
+ // scanning for spurious files.
14
+ const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
10
15
  export async function cmdMergeAndIngest(argv) {
11
16
  const runId = getFlag(argv, "--run-id");
12
17
  if (!runId)
@@ -14,9 +19,28 @@ export async function cmdMergeAndIngest(argv) {
14
19
  const artifactsDir = getArtifactsDir(argv);
15
20
  const runDir = join(artifactsDir, "runs", runId);
16
21
  const taskResultsDir = join(runDir, "task-results");
17
- const auditResultsPath = join(runDir, "audit-results.json");
22
+ const auditResultsPath = join(runDir, "run-results.json");
18
23
  const taskPath = join(runDir, "task.json");
19
24
  const tasksPath = join(runDir, "pending-audit-tasks.json");
25
+ const mergeCompletePath = join(runDir, "merge-complete.json");
26
+ // Idempotency: a fully-merged run is terminal. A stray re-invocation for the
27
+ // same run-id (e.g. after the run already advanced to the next deepening
28
+ // round, which rewrites this run dir's pending-audit-tasks.json to the *next*
29
+ // round's tasks) must be a clean no-op — not a spurious "all results missing"
30
+ // hard failure that also truncates the transient results file. Replay the
31
+ // recorded summary and exit 0.
32
+ let priorSummary = null;
33
+ try {
34
+ priorSummary = await readJsonFile(mergeCompletePath);
35
+ }
36
+ catch (e) {
37
+ if (!isFileMissingError(e))
38
+ throw e;
39
+ }
40
+ if (priorSummary) {
41
+ console.log(JSON.stringify({ ...priorSummary, idempotent_replay: true }, null, 2));
42
+ return;
43
+ }
20
44
  const workerTask = await readJsonFile(taskPath);
21
45
  const resultMap = await loadDispatchResultMap(runDir);
22
46
  if (!resultMap) {
@@ -42,9 +66,14 @@ export async function cmdMergeAndIngest(argv) {
42
66
  const passing = [];
43
67
  const failing = [];
44
68
  const seenTaskIds = new Set();
45
- let spuriousFileCount = 0;
69
+ const spuriousFiles = [];
46
70
  const fallbackByTaskId = new Map();
47
71
  for (const filename of files) {
72
+ // Schema pointer files (audit_result/finding/audit_task .schema.json) are
73
+ // copied into task-results/ by prepare-dispatch for optional worker
74
+ // self-validation; they are expected, not stray.
75
+ if (PACKET_SCHEMA_FILENAME_SET.has(filename))
76
+ continue;
48
77
  const filePath = resolve(join(taskResultsDir, filename));
49
78
  if (expectedPaths.has(filePath))
50
79
  continue;
@@ -68,10 +97,16 @@ export async function cmdMergeAndIngest(argv) {
68
97
  // task-results/ dir are legitimate and must not inflate the count or bury
69
98
  // the real stray-file signal (3 -> 191 over a run before this fix).
70
99
  if (!isCanonicalResultFilename(filename)) {
71
- spuriousFileCount++;
72
- process.stderr.write(`[merge-and-ingest] Warning: unexpected file in task-results/: ${filename}\n`);
100
+ spuriousFiles.push(filename);
73
101
  }
74
102
  }
103
+ // Collapse stray-file warnings into a single stderr line so the real summary
104
+ // (emitted as the sole stdout JSON payload) is never buried under a wall of
105
+ // per-file warnings.
106
+ if (spuriousFiles.length > 0) {
107
+ process.stderr.write(`[merge-and-ingest] Warning: ${spuriousFiles.length} unexpected file(s) in ` +
108
+ `task-results/ ignored: ${spuriousFiles.join(", ")}\n`);
109
+ }
75
110
  for (const task of allTasks) {
76
111
  const entry = entryByTaskId.get(task.task_id);
77
112
  if (!entry) {
@@ -134,14 +169,18 @@ export async function cmdMergeAndIngest(argv) {
134
169
  failing.push({ task_id: taskId ?? task.task_id, errors: resultErrors });
135
170
  }
136
171
  }
137
- await writeJsonFile(auditResultsPath, passing);
138
172
  const failedTasksPath = join(runDir, "failed-tasks.json");
139
173
  if (failing.length > 0) {
140
174
  await writeJsonFile(failedTasksPath, failing);
141
175
  }
142
176
  if (passing.length === 0 && failing.length > 0) {
177
+ // Nothing merged and at least one failure: a blocked no-op. Do NOT write the
178
+ // transient results file here — truncating it to [] reads as catastrophic
179
+ // data loss on a re-run when the cumulative audit_results.jsonl store is in
180
+ // fact intact and the first merge had simply already succeeded.
143
181
  throw new Error(`All ${failing.length} assigned task result(s) were missing or invalid; blocked before ingestion. See ${failedTasksPath}`);
144
182
  }
183
+ await writeJsonFile(auditResultsPath, passing);
145
184
  const findingCount = passing.reduce((sum, result) => sum + result.findings.length, 0);
146
185
  let result = null;
147
186
  if (passing.length > 0) {
@@ -197,12 +236,12 @@ export async function cmdMergeAndIngest(argv) {
197
236
  errors: [],
198
237
  });
199
238
  await writeJsonFile(workerTask.result_path, workerResult);
200
- console.log(JSON.stringify({
239
+ const summaryPayload = {
201
240
  run_id: runId,
202
241
  status,
203
242
  accepted_count: passing.length,
204
243
  rejected_count: failing.length,
205
- spurious_file_count: spuriousFileCount,
244
+ spurious_file_count: spuriousFiles.length,
206
245
  finding_count: findingCount,
207
246
  audit_results_path: auditResultsPath,
208
247
  ...(retryDispatchPath ? { retry_dispatch_path: retryDispatchPath } : {}),
@@ -212,7 +251,15 @@ export async function cmdMergeAndIngest(argv) {
212
251
  progress_summary: workerResult.summary,
213
252
  next_likely_step: workerResult.next_likely_step,
214
253
  } : {}),
215
- }, null, 2));
254
+ };
255
+ // Record a completion marker for a fully-merged run so a stray re-invocation
256
+ // replays this summary (above) instead of re-processing — and possibly
257
+ // clobbering — terminal state. Only on full success: a partial merge is meant
258
+ // to be re-run after the failed packets are retried, so it stays replayable.
259
+ if (failing.length === 0) {
260
+ await writeJsonFile(mergeCompletePath, summaryPayload);
261
+ }
262
+ console.log(JSON.stringify(summaryPayload, null, 2));
216
263
  if (failing.length > 0) {
217
264
  process.exitCode = 2;
218
265
  }
@@ -35,6 +35,42 @@ async function runDeterministicForNextStep(params) {
35
35
  const FINALIZATION_CYCLE_TOLERANCE = 16;
36
36
  const seenStateSignatures = new Set();
37
37
  const obligationTrail = [];
38
+ // Build the terminal step for a deterministic loop that has stopped advancing
39
+ // (hit the run backstop or the finalization cycle guard). A rendered report is
40
+ // the deliverable: if synthesis already produced one — or the state is formally
41
+ // complete — present it instead of reporting the stopped loop as a bare
42
+ // "blocked" failure. A completed audit must never surface as blocked just
43
+ // because finalization kept churning (e.g. a runtime_validation <-> synthesis
44
+ // ping-pong, or revision churn from filesystem retries) after the report was
45
+ // written. With no report yet, the stop is a genuine block.
46
+ async function terminalStep(bundle, state, blockedReason) {
47
+ const reportRendered = state.status === "complete" || Boolean(bundle.audit_report);
48
+ await writeHandoffOnly({
49
+ root: params.root,
50
+ artifactsDir: params.artifactsDir,
51
+ bundle,
52
+ audit_state: state,
53
+ progress_summary: reportRendered && state.status !== "complete"
54
+ ? `Audit report already rendered; ending run. ${blockedReason}`
55
+ : blockedReason,
56
+ providerName: LOCAL_SUBPROCESS_PROVIDER_NAME,
57
+ });
58
+ if (!reportRendered) {
59
+ return { kind: "blocked", state, bundle, reason: blockedReason };
60
+ }
61
+ const promoted = await promoteFinalAuditReport({
62
+ artifactsDir: params.artifactsDir,
63
+ repoRoot: params.root,
64
+ });
65
+ return {
66
+ kind: "complete",
67
+ state,
68
+ bundle,
69
+ finalReportPath: promoted.promoted
70
+ ? join(params.root, AUDIT_REPORT_FILENAME)
71
+ : join(params.artifactsDir, AUDIT_REPORT_FILENAME),
72
+ };
73
+ }
38
74
  for (let index = 0; index < params.maxRuns; index++) {
39
75
  const bundle = await loadArtifactBundle(params.artifactsDir);
40
76
  const decision = decideNextStep(bundle);
@@ -318,24 +354,14 @@ async function runDeterministicForNextStep(params) {
318
354
  `progress; stopping. Cycling obligations: ${cycle.join(" -> ")}.`,
319
355
  timestamp: new Date().toISOString(),
320
356
  });
321
- return {
322
- kind: "blocked",
323
- state: result.audit_state,
324
- bundle: result.updated_bundle,
325
- reason: "Finalization is not converging: deterministic executors kept revisiting " +
326
- `prior artifact states (${cycle.join(" -> ")}). The report has been ` +
327
- "rendered; review whether these obligations are erroneously invalidating each other.",
328
- };
357
+ return await terminalStep(result.updated_bundle, result.audit_state, "Finalization is not converging: deterministic executors kept revisiting " +
358
+ `prior artifact states (${cycle.join(" -> ")}). Review whether these ` +
359
+ "obligations are erroneously invalidating each other.");
329
360
  }
330
361
  }
331
362
  const bundle = await loadArtifactBundle(params.artifactsDir);
332
363
  const state = deriveAuditState(bundle);
333
- return {
334
- kind: "blocked",
335
- state,
336
- bundle,
337
- reason: `Reached max run limit (${params.maxRuns}) before a review, report, or blocker step was ready.`,
338
- };
364
+ return await terminalStep(bundle, state, `Reached max run limit (${params.maxRuns}) before a review, report, or blocker step was ready.`);
339
365
  }
340
366
  export async function cmdNextStep(argv) {
341
367
  const root = getRootDir(argv);
@@ -429,7 +455,9 @@ export async function cmdNextStep(argv) {
429
455
  const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
430
456
  await mkdir(join(artifactsDir, "incoming"), { recursive: true });
431
457
  const continueCommand = nextStepCommand(root, artifactsDir);
432
- const prompt = renderDesignReviewPrompt(result.bundle);
458
+ const prompt = renderDesignReviewPrompt(result.bundle, {
459
+ max_units: sessionConfig.design_review?.max_units,
460
+ });
433
461
  const fullPrompt = [
434
462
  prompt,
435
463
  "## Results path",
@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
10
10
  dispatchQuotaPath: string | null;
11
11
  hostCanRestrictSubagentTools: boolean;
12
12
  hostCanSelectSubagentModel: boolean;
13
+ phase?: "canary" | "fan_out";
14
+ canaryPacketId?: string | null;
13
15
  }): string;
14
16
  export declare function renderSingleTaskFallbackStepPrompt(params: {
15
17
  singleTaskPromptPath: string;
@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
78
78
  "",
79
79
  "Launch one subagent for each entry in the plan.",
80
80
  ];
81
+ const canaryLines = params.phase === "canary"
82
+ ? [
83
+ "",
84
+ "This is a CANARY round: the plan contains only the single top-priority packet. " +
85
+ "Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
86
+ "on the following step once this packet's result is accepted.",
87
+ ]
88
+ : [];
81
89
  return [
82
90
  "# audit-code dispatch review",
83
91
  "",
84
92
  ...dispatchDataLines,
93
+ ...canaryLines,
85
94
  "",
86
95
  "Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
87
96
  "",
@@ -90,7 +90,7 @@ export async function ensureSemanticReviewRun(params) {
90
90
  const paths = getRunPaths(params.artifactsDir, runId);
91
91
  const pendingTasks = await addFileLineCountHints(params.root, buildPendingAuditTasks(params.bundle));
92
92
  const pendingTasksPath = join(paths.runDir, "pending-audit-tasks.json");
93
- const auditResultsPath = join(paths.runDir, "audit-results.json");
93
+ const auditResultsPath = join(paths.runDir, "run-results.json");
94
94
  const taskReadPaths = new Set();
95
95
  for (const pt of pendingTasks) {
96
96
  for (const fp of pt.file_paths)
@@ -70,7 +70,7 @@ async function buildParallelWaveSlots(params) {
70
70
  runCount += 1;
71
71
  const slotRunId = buildRunId(obligationId, runCount);
72
72
  const slotPaths = getRunPaths(artifactsDir, slotRunId);
73
- const slotAuditResultsPath = join(slotPaths.runDir, "audit-results.json");
73
+ const slotAuditResultsPath = join(slotPaths.runDir, "run-results.json");
74
74
  const slotPendingTasksPath = join(slotPaths.runDir, "pending-audit-tasks.json");
75
75
  const slotReadPaths = new Set();
76
76
  for (const t of group) {
@@ -398,7 +398,7 @@ async function runSingleWorkerStep(params) {
398
398
  ? join(paths.runDir, "pending-audit-tasks.json")
399
399
  : undefined;
400
400
  const providerAuditResultsPath = preferredExecutor === "agent"
401
- ? join(paths.runDir, "audit-results.json")
401
+ ? join(paths.runDir, "run-results.json")
402
402
  : auditResultsPath;
403
403
  const providerReadPaths = new Set();
404
404
  if (pendingAuditTasks) {
@@ -694,7 +694,7 @@ export async function cmdRunToCompletion(argv) {
694
694
  const blockPaths = getRunPaths(artifactsDir, blockRunId);
695
695
  const blockPendingTasks = await addFileLineCountHints(root, buildPendingAuditTasks(bundle));
696
696
  const blockPendingTasksPath = join(blockPaths.runDir, "pending-audit-tasks.json");
697
- const blockAuditResultsPath = join(blockPaths.runDir, "audit-results.json");
697
+ const blockAuditResultsPath = join(blockPaths.runDir, "run-results.json");
698
698
  const blockReadPaths = new Set();
699
699
  for (const pt of blockPendingTasks) {
700
700
  for (const fp of pt.file_paths)
@@ -1031,23 +1031,36 @@ export async function cmdRunToCompletion(argv) {
1031
1031
  const bundle = await loadArtifactBundle(artifactsDir);
1032
1032
  const decision = decideNextStep(bundle);
1033
1033
  const state = decision.state;
1034
- if (state.status === "complete") {
1034
+ // A rendered report is the deliverable: if synthesis already produced one (or
1035
+ // the state is formally complete), finish the run on it instead of stranding
1036
+ // it in the artifacts dir behind a bare "max run limit" non-completion. This
1037
+ // mirrors next-step's terminalStep so both loops present a completed audit the
1038
+ // same way, even when finalization churned (runtime_validation <-> synthesis
1039
+ // ping-pong, or filesystem-retry revision churn) up to the backstop. With no
1040
+ // report yet, the run limit is a genuine non-terminal stop.
1041
+ const reportRendered = state.status === "complete" || Boolean(bundle.audit_report);
1042
+ if (reportRendered) {
1035
1043
  await clearDispatchFiles(artifactsDir);
1036
1044
  }
1045
+ const terminalState = reportRendered && state.status !== "complete"
1046
+ ? { ...state, status: "complete" }
1047
+ : state;
1037
1048
  await emitEnvelope({
1038
1049
  root,
1039
1050
  artifactsDir,
1040
1051
  bundle,
1041
- audit_state: state,
1052
+ audit_state: terminalState,
1042
1053
  selected_obligation: lastResult?.obligation_id ?? decision.selected_obligation,
1043
1054
  selected_executor: lastResult?.selected_executor ?? decision.selected_executor,
1044
1055
  progress_made: anyProgress,
1045
1056
  artifacts_written: Array.from(artifactsWritten),
1046
- progress_summary: `Reached max run limit (${maxRuns}) before terminal state.`,
1047
- next_likely_step: state.status === "complete" ? null : decision.selected_obligation,
1057
+ progress_summary: reportRendered && state.status !== "complete"
1058
+ ? `Audit report already rendered; completing the run after reaching the max run limit (${maxRuns}) during finalization.`
1059
+ : `Reached max run limit (${maxRuns}) before terminal state.`,
1060
+ next_likely_step: reportRendered ? null : decision.selected_obligation,
1048
1061
  providerName: provider.name,
1049
1062
  });
1050
- if (state.status === "complete") {
1063
+ if (reportRendered) {
1051
1064
  await promoteFinalAuditReport({ artifactsDir, repoRoot: root });
1052
1065
  }
1053
1066
  }