auditor-lambda 0.9.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +2 -9
  2. package/audit-code-wrapper-lib.mjs +19 -920
  3. package/dist/cli/auditStep.d.ts +1 -33
  4. package/dist/cli/dispatch.d.ts +47 -0
  5. package/dist/cli/dispatch.js +116 -1
  6. package/dist/cli/mergeAndIngestCommand.js +10 -0
  7. package/dist/cli/nextStepCommand.js +3 -1
  8. package/dist/cli/prompts.d.ts +2 -0
  9. package/dist/cli/prompts.js +9 -0
  10. package/dist/cli/semanticReviewStep.js +12 -1
  11. package/dist/cli/steps.d.ts +15 -0
  12. package/dist/cli.js +1 -8
  13. package/dist/io/artifacts.d.ts +9 -1
  14. package/dist/io/artifacts.js +7 -0
  15. package/dist/io/runArtifacts.d.ts +14 -0
  16. package/dist/io/runArtifacts.js +23 -0
  17. package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
  18. package/dist/orchestrator/designReviewPrompt.js +43 -2
  19. package/dist/orchestrator/executorResult.d.ts +25 -0
  20. package/dist/orchestrator/intakeExecutors.d.ts +19 -1
  21. package/dist/orchestrator/intakeExecutors.js +89 -3
  22. package/dist/orchestrator/nextStep.d.ts +1 -0
  23. package/dist/orchestrator/nextStep.js +1 -1
  24. package/dist/orchestrator/state.js +8 -1
  25. package/dist/providers/constants.d.ts +1 -1
  26. package/dist/providers/constants.js +1 -1
  27. package/dist/reporting/synthesis.d.ts +8 -0
  28. package/dist/reporting/synthesis.js +16 -1
  29. package/dist/supervisor/operatorHandoff.js +2 -0
  30. package/dist/types/auditScope.d.ts +16 -2
  31. package/dist/validation/sessionConfig.js +35 -0
  32. package/docs/contracts.md +0 -16
  33. package/docs/operator-guide.md +6 -8
  34. package/package.json +1 -1
  35. package/schemas/audit_findings.schema.json +1 -0
  36. package/scripts/postinstall.mjs +0 -174
  37. package/skills/audit-code/SKILL.md +17 -1
  38. package/skills/audit-code/audit-code.prompt.md +25 -0
  39. package/dist/mcp/server.d.ts +0 -72
  40. package/dist/mcp/server.js +0 -765
@@ -1,7 +1,4 @@
1
- import type { AuditResult } from "../types.js";
2
1
  import type { AnalyzerSetting } from "@audit-tools/shared";
3
- import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
4
- import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
5
2
  export declare function runAuditStep(options: {
6
3
  root: string;
7
4
  artifactsDir: string;
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
23
20
  batchDir: string;
24
21
  }): Promise<{
25
22
  batchFiles: string[];
26
- bundle: Partial<{
27
- repo_manifest: import("../types.js").RepoManifest;
28
- file_disposition: import("@audit-tools/shared").FileDisposition;
29
- auto_fixes_applied: unknown;
30
- unit_manifest: import("../types.js").UnitManifest;
31
- graph_bundle: import("@audit-tools/shared").GraphBundle;
32
- surface_manifest: import("@audit-tools/shared").SurfaceManifest;
33
- critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
34
- flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
35
- risk_register: import("@audit-tools/shared").RiskRegister;
36
- design_assessment: import("../types/designAssessment.js").DesignAssessment;
37
- analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
38
- scope: import("../types/auditScope.js").AuditScopeManifest;
39
- coverage_matrix: import("../types.js").CoverageMatrix;
40
- runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
41
- runtime_validation_report: RuntimeValidationReport;
42
- external_analyzer_results: ExternalAnalyzerResults;
43
- syntax_resolution_status: unknown;
44
- audit_results: AuditResult[];
45
- audit_tasks: import("../types.js").AuditTask[];
46
- audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
47
- review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
48
- requeue_tasks: import("../types.js").AuditTask[];
49
- audit_report: string;
50
- audit_findings: import("@audit-tools/shared").AuditFindingsReport;
51
- synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
52
- audit_state: import("../types/auditState.js").AuditState;
53
- artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
54
- tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
55
- }>;
23
+ bundle: import("../io/artifacts.js").ArtifactBundle;
56
24
  audit_state: import("../types/auditState.js").AuditState;
57
25
  selected_obligation: string | null;
58
26
  selected_executor: string;
@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
20
  export interface ActiveDispatchState {
21
21
  run_id: string;
22
22
  created_at: string;
23
+ /** Emitted packets only (after canary/budget filtering). */
23
24
  packet_count: number;
25
+ /** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
24
26
  task_count: number;
25
27
  status: "active" | "merged";
28
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
29
+ phase: "canary" | "fan_out";
30
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
31
+ canary_packet_id: string | null;
32
+ /** Total packets that would have been emitted before a budget cap (present only when capped). */
33
+ budget_packet_count?: number;
34
+ /** packet_ids NOT emitted due to the budget cap. */
35
+ deferred_packet_ids?: string[];
36
+ /** task_ids NOT emitted due to the budget cap. */
37
+ deferred_task_ids?: string[];
26
38
  }
27
39
  export interface DispatchResultMapEntry {
28
40
  packet_id: string;
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
34
46
  run_id: string;
35
47
  entries: DispatchResultMapEntry[];
36
48
  }
49
+ export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
50
+ export interface DispatchFanout {
51
+ agent_count: number;
52
+ wave_count: number;
53
+ confirmation_recommended: boolean;
54
+ dispatch_summary: string;
55
+ }
56
+ /**
57
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
58
+ * number of agents (packets emitted this round, after canary/budget filtering)
59
+ * and the resolved wave size, derive the wave count, a human-readable summary,
60
+ * and whether the agent count exceeds the confirmation threshold (default 10).
61
+ * No LLM call, no side effects, no prompting.
62
+ */
63
+ export declare function computeDispatchFanout(params: {
64
+ agentCount: number;
65
+ waveSize: number;
66
+ confirmThreshold?: number;
67
+ }): DispatchFanout;
37
68
  export interface PrepareDispatchResult {
38
69
  run_id: string;
39
70
  dispatch_plan_path: string;
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
43
74
  skipped_task_count: number;
44
75
  /** Subagent parallelism resolved for this dispatch run. */
45
76
  wave_size: number;
77
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
78
+ phase: "canary" | "fan_out";
79
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
80
+ canary_packet_id: string | null;
81
+ /** Total agents that will be launched this run (packet_count after canary/budget). */
82
+ agent_count: number;
83
+ /** ceil(agent_count / max(1, wave_size)). */
84
+ wave_count: number;
85
+ /** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
86
+ confirmation_recommended: boolean;
87
+ /** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
88
+ dispatch_summary: string;
89
+ /** True when a max_packets budget capped the emitted packets this run. */
90
+ budget_capped: boolean;
91
+ /** Number of packets deferred (not emitted) due to the budget cap. */
92
+ deferred_packet_count: number;
46
93
  largest_packet: {
47
94
  packet_id: string;
48
95
  total_lines: number;
@@ -4,6 +4,7 @@ import { isAbsolute, join, relative, resolve } from "node:path";
4
4
  import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
5
5
  import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
6
6
  import { loadArtifactBundle } from "../io/artifacts.js";
7
+ import { writePacketSchemaFiles } from "../io/runArtifacts.js";
7
8
  import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
8
9
  import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
9
10
  import { resolveFreshSessionProviderName } from "../providers/index.js";
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
16
17
  export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
17
18
  export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
18
19
  export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
+ export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
21
+ /**
22
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
23
+ * number of agents (packets emitted this round, after canary/budget filtering)
24
+ * and the resolved wave size, derive the wave count, a human-readable summary,
25
+ * and whether the agent count exceeds the confirmation threshold (default 10).
26
+ * No LLM call, no side effects, no prompting.
27
+ */
28
+ export function computeDispatchFanout(params) {
29
+ const agentCount = params.agentCount;
30
+ const waveSize = params.waveSize;
31
+ const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
32
+ const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
33
+ const confirmationRecommended = agentCount > confirmThreshold;
34
+ const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
35
+ `${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
36
+ return {
37
+ agent_count: agentCount,
38
+ wave_count: waveCount,
39
+ confirmation_recommended: confirmationRecommended,
40
+ dispatch_summary: dispatchSummary,
41
+ };
42
+ }
19
43
  export function dispatchResultMapPath(runDir) {
20
44
  return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
21
45
  }
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
200
224
  const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
201
225
  const lensDefs = await readJsonFile(lensDefsPath);
202
226
  await mkdir(taskResultsDir, { recursive: true });
227
+ // FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
228
+ // it $refs) reachable from this run's task-results directory so packet workers
229
+ // can optionally self-validate before calling submit-packet.
230
+ await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
231
+ // FINDING-011: read the prior dispatch state (if any) so a fan-out round can
232
+ // detect a preceding canary that never produced an accepted result.
233
+ let priorActiveDispatch = null;
234
+ try {
235
+ priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
236
+ }
237
+ catch {
238
+ /* none yet */
239
+ }
203
240
  const priorResultTaskIds = new Set();
204
241
  for (const task of tasks) {
205
242
  if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
@@ -230,6 +267,34 @@ export async function prepareDispatchArtifacts(params) {
230
267
  if (resultPathSet.size !== resultPathByTaskId.size) {
231
268
  throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
232
269
  }
270
+ // Packets come back priority-ordered (high -> medium -> low), so packets[0] is
271
+ // the top-priority packet. Filtering composes in a fixed order: canary first
272
+ // (emit only the top packet on first contact), then the budget cap (top-K).
273
+ //
274
+ // FINDING-011: single-worker canary. On first contact with a multi-packet run,
275
+ // dispatch only the top packet; the held-back packets' tasks keep no result
276
+ // file, so they re-enter `dispatchTasks` on the next call (fan-out).
277
+ const firstContact = priorResultTaskIds.size === 0;
278
+ const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
279
+ const doCanary = firstContact && canaryEnabled && packets.length > 1;
280
+ const canaryPacketId = doCanary ? packets[0].packet_id : null;
281
+ const phase = doCanary ? "canary" : "fan_out";
282
+ const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
283
+ // FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
284
+ // packets at max_packets; the remainder are recorded as DEFERRED and excluded
285
+ // from the completion check so the run can finish honestly under budget.
286
+ // Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
287
+ // precedence: a canary round only emits 1 packet regardless of the budget.
288
+ const maxPackets = sessionConfig.dispatch?.max_packets;
289
+ const budgetCapped = typeof maxPackets === "number" &&
290
+ maxPackets >= 0 &&
291
+ maxPackets < postCanaryPackets.length;
292
+ const emitPackets = budgetCapped
293
+ ? postCanaryPackets.slice(0, maxPackets)
294
+ : postCanaryPackets;
295
+ const deferredPackets = budgetCapped
296
+ ? postCanaryPackets.slice(maxPackets)
297
+ : [];
233
298
  const plan = [];
234
299
  const resultMapEntries = [];
235
300
  for (const task of tasks) {
@@ -245,7 +310,7 @@ export async function prepareDispatchArtifacts(params) {
245
310
  let largestLines = 0;
246
311
  let largestEstimatedTokens = 0;
247
312
  const warnings = [];
248
- for (const packet of packets) {
313
+ for (const packet of emitPackets) {
249
314
  const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
250
315
  const packetTasks = packet.task_ids
251
316
  .map((taskId) => tasksById.get(taskId))
@@ -396,6 +461,11 @@ export async function prepareDispatchArtifacts(params) {
396
461
  "way to record results, and it writes them inside the artifacts directory for you.",
397
462
  "Produce one JSON array containing exactly one AuditResult object for each listed task.",
398
463
  "",
464
+ "Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
465
+ " $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
466
+ "You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
467
+ " the submit command performs the authoritative validation and will report any errors.",
468
+ "",
399
469
  "Required AuditResult fields:",
400
470
  " task_id copy from the task metadata",
401
471
  " unit_id copy from the task metadata",
@@ -524,20 +594,57 @@ export async function prepareDispatchArtifacts(params) {
524
594
  }
525
595
  }
526
596
  }
597
+ // FINDING-011: when advancing past a canary, warn if it never produced an
598
+ // accepted result. submit-packet writes the per-task result file ONLY after
599
+ // validation passes, so presence of that file == ACCEPTED. We map the recorded
600
+ // canary packet_id back to its task ids via the result map and check whether
601
+ // those tasks now have accepted results (i.e. landed in priorResultTaskIds).
602
+ if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
603
+ const canaryAccepted = priorActiveDispatch.run_id === runId
604
+ ? (await loadDispatchResultMap(runDir))?.entries
605
+ .filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
606
+ .every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
607
+ : false;
608
+ if (!canaryAccepted) {
609
+ warnings.push({
610
+ code: "canary_not_accepted",
611
+ message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
612
+ });
613
+ }
614
+ }
527
615
  const warningsPath = warnings.length > 0
528
616
  ? join(runDir, "dispatch-warnings.json")
529
617
  : null;
530
618
  if (warningsPath) {
531
619
  await writeJsonFile(warningsPath, warnings);
532
620
  }
621
+ // FINDING-013: record deferred packets/tasks so the completion obligation can
622
+ // exclude them under a budget cap (present only when actually capped).
623
+ const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
624
+ const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
533
625
  const activeDispatch = {
534
626
  run_id: runId,
535
627
  created_at: new Date().toISOString(),
536
628
  packet_count: plan.length,
537
629
  task_count: orderedTasks.length,
538
630
  status: "active",
631
+ phase,
632
+ canary_packet_id: canaryPacketId,
633
+ ...(budgetCapped
634
+ ? {
635
+ budget_packet_count: postCanaryPackets.length,
636
+ deferred_packet_ids: deferredPacketIds,
637
+ deferred_task_ids: deferredTaskIds,
638
+ }
639
+ : {}),
539
640
  };
540
641
  await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
642
+ // FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
643
+ const fanout = computeDispatchFanout({
644
+ agentCount: plan.length,
645
+ waveSize: waveSchedule.wave_size,
646
+ confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
647
+ });
541
648
  return {
542
649
  run_id: runId,
543
650
  dispatch_plan_path: dispatchPlanPath,
@@ -546,6 +653,14 @@ export async function prepareDispatchArtifacts(params) {
546
653
  task_count: orderedTasks.length,
547
654
  skipped_task_count: priorResultTaskIds.size,
548
655
  wave_size: waveSchedule.wave_size,
656
+ phase,
657
+ canary_packet_id: canaryPacketId,
658
+ agent_count: fanout.agent_count,
659
+ wave_count: fanout.wave_count,
660
+ confirmation_recommended: fanout.confirmation_recommended,
661
+ dispatch_summary: fanout.dispatch_summary,
662
+ budget_capped: budgetCapped,
663
+ deferred_packet_count: deferredPackets.length,
549
664
  largest_packet: largestPacketId
550
665
  ? {
551
666
  packet_id: largestPacketId,
@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
7
7
  import { addFileLineCountHints } from "./lineIndex.js";
8
8
  import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
9
9
  import { buildWorkerResult } from "./workerResult.js";
10
+ import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
11
+ // Schema pointer files prepare-dispatch copies into task-results/ for optional
12
+ // worker self-validation. They are expected, not stray — skip them when
13
+ // scanning for spurious files.
14
+ const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
10
15
  export async function cmdMergeAndIngest(argv) {
11
16
  const runId = getFlag(argv, "--run-id");
12
17
  if (!runId)
@@ -64,6 +69,11 @@ export async function cmdMergeAndIngest(argv) {
64
69
  const spuriousFiles = [];
65
70
  const fallbackByTaskId = new Map();
66
71
  for (const filename of files) {
72
+ // Schema pointer files (audit_result/finding/audit_task .schema.json) are
73
+ // copied into task-results/ by prepare-dispatch for optional worker
74
+ // self-validation; they are expected, not stray.
75
+ if (PACKET_SCHEMA_FILENAME_SET.has(filename))
76
+ continue;
67
77
  const filePath = resolve(join(taskResultsDir, filename));
68
78
  if (expectedPaths.has(filePath))
69
79
  continue;
@@ -455,7 +455,9 @@ export async function cmdNextStep(argv) {
455
455
  const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
456
456
  await mkdir(join(artifactsDir, "incoming"), { recursive: true });
457
457
  const continueCommand = nextStepCommand(root, artifactsDir);
458
- const prompt = renderDesignReviewPrompt(result.bundle);
458
+ const prompt = renderDesignReviewPrompt(result.bundle, {
459
+ max_units: sessionConfig.design_review?.max_units,
460
+ });
459
461
  const fullPrompt = [
460
462
  prompt,
461
463
  "## Results path",
@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
10
10
  dispatchQuotaPath: string | null;
11
11
  hostCanRestrictSubagentTools: boolean;
12
12
  hostCanSelectSubagentModel: boolean;
13
+ phase?: "canary" | "fan_out";
14
+ canaryPacketId?: string | null;
13
15
  }): string;
14
16
  export declare function renderSingleTaskFallbackStepPrompt(params: {
15
17
  singleTaskPromptPath: string;
@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
78
78
  "",
79
79
  "Launch one subagent for each entry in the plan.",
80
80
  ];
81
+ const canaryLines = params.phase === "canary"
82
+ ? [
83
+ "",
84
+ "This is a CANARY round: the plan contains only the single top-priority packet. " +
85
+ "Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
86
+ "on the following step once this packet's result is accepted.",
87
+ ]
88
+ : [];
81
89
  return [
82
90
  "# audit-code dispatch review",
83
91
  "",
84
92
  ...dispatchDataLines,
93
+ ...canaryLines,
85
94
  "",
86
95
  "Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
87
96
  "",
@@ -64,7 +64,10 @@ export async function renderSemanticReviewStep(params) {
64
64
  allowedCommands: [mergeCommand, continueCommand],
65
65
  allowedMcpTools: ["auditor_merge_and_ingest", "auditor_continue_audit"],
66
66
  progress: {
67
- summary: `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
67
+ summary: (dispatch.phase === "canary"
68
+ ? `Canary: dispatching only the top-priority packet (${dispatch.canary_packet_id}) before fan-out. `
69
+ : "") +
70
+ `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
68
71
  `${dispatch.task_count} task(s) in waves of ${dispatch.wave_size}` +
69
72
  (dispatch.skipped_task_count > 0
70
73
  ? `; ${dispatch.skipped_task_count} task(s) already completed.`
@@ -73,6 +76,12 @@ export async function renderSemanticReviewStep(params) {
73
76
  pending_tasks: dispatch.task_count,
74
77
  completed_tasks: dispatch.skipped_task_count,
75
78
  wave_size: dispatch.wave_size,
79
+ phase: dispatch.phase,
80
+ canary_packet_id: dispatch.canary_packet_id,
81
+ agent_count: dispatch.agent_count,
82
+ wave_count: dispatch.wave_count,
83
+ confirmation_recommended: dispatch.confirmation_recommended,
84
+ dispatch_summary: dispatch.dispatch_summary,
76
85
  },
77
86
  stopCondition: "Dispatch every packet, run merge-and-ingest once, then run next-step.",
78
87
  repoRoot: root,
@@ -91,6 +100,8 @@ export async function renderSemanticReviewStep(params) {
91
100
  dispatchQuotaPath: dispatch.dispatch_quota_path,
92
101
  hostCanRestrictSubagentTools: params.hostCanRestrictSubagentTools,
93
102
  hostCanSelectSubagentModel: params.hostCanSelectSubagentModel,
103
+ phase: dispatch.phase,
104
+ canaryPacketId: dispatch.canary_packet_id,
94
105
  }),
95
106
  access: {
96
107
  read_paths: [
@@ -17,6 +17,21 @@ export interface StepProgress {
17
17
  completed_tasks?: number;
18
18
  /** Subagent parallelism resolved for this dispatch run. */
19
19
  wave_size?: number;
20
+ /** "canary" when only the top packet was emitted this round; "fan_out" otherwise. */
21
+ phase?: "canary" | "fan_out";
22
+ /** packet_id of the emitted canary packet when `phase === "canary"`. */
23
+ canary_packet_id?: string | null;
24
+ /** Total agents (packets) that will be launched this run. */
25
+ agent_count?: number;
26
+ /** Number of dispatch waves for this run (`ceil(agent_count / wave_size)`). */
27
+ wave_count?: number;
28
+ /**
29
+ * True when `agent_count` exceeds the configured confirm threshold and the
30
+ * loader should pause for user confirmation before fan-out (FINDING-012).
31
+ */
32
+ confirmation_recommended?: boolean;
33
+ /** Human-readable fan-out summary, e.g. "12 agents across 3 waves (wave_size=4)". */
34
+ dispatch_summary?: string;
20
35
  }
21
36
  export interface StepArtifact {
22
37
  contract_version: typeof STEP_CONTRACT_VERSION;
package/dist/cli.js CHANGED
@@ -21,7 +21,6 @@ import { deriveAuditState } from "./orchestrator/state.js";
21
21
  import { createFreshSessionProvider, resolveFreshSessionProviderName, } from "./providers/index.js";
22
22
  import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
23
23
  import { clearDispatchFiles, ensureSupervisorDirs, } from "./io/runArtifacts.js";
24
- import { runAuditCodeMcpServer } from "./mcp/server.js";
25
24
  import { scheduleWave, buildProviderModelKey, readQuotaState, resolveLimits, resolveHostActiveSubagentLimit, computeMaxSafeConcurrency, getQuotaStatePath, lookupDiscoveredLimits, setQuotaStateDir, } from "./quota/index.js";
26
25
  import { DIRECT_CLI_DEFAULTS, getFlag, hasFlag, fromBase64Url, taskResultPath, getArtifactsDir, getRootDir, warnIfNotGitRepo, getBatchResultsDir, getMaxRuns, getAgentBatchSize, getParallelWorkers, getTimeoutMs, getExplicitProvider, getHostModel, getHostMaxActiveSubagents, resolveRunProviderName, chunkArray, getUiMode, looksLikeCliFlag, countLines, } from "./cli/args.js";
27
26
  import { ACTIVE_DISPATCH_FILENAME, loadDispatchResultMap, prepareDispatchArtifacts, } from "./cli/dispatch.js";
@@ -539,9 +538,6 @@ async function cmdCleanup(argv) {
539
538
  dry_run: dryRun,
540
539
  }, null, 2));
541
540
  }
542
- async function cmdMcp(argv) {
543
- await runAuditCodeMcpServer(argv.slice(3));
544
- }
545
541
  async function cmdQuota(argv) {
546
542
  const artifactsDir = getArtifactsDir(argv);
547
543
  const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
@@ -707,9 +703,6 @@ async function main(argv) {
707
703
  case "cleanup":
708
704
  await cmdCleanup(argv);
709
705
  return;
710
- case "mcp":
711
- await cmdMcp(argv);
712
- return;
713
706
  case "prepare-dispatch":
714
707
  await cmdPrepareDispatch(argv);
715
708
  return;
@@ -733,7 +726,7 @@ async function main(argv) {
733
726
  return;
734
727
  default:
735
728
  console.error(`Unknown command: ${command}`);
736
- console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
729
+ console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
737
730
  process.exitCode = 1;
738
731
  }
739
732
  }
@@ -12,6 +12,7 @@ import type { DesignAssessment } from "../types/designAssessment.js";
12
12
  import type { AnalyzerCapabilityRecord } from "../types/analyzerCapability.js";
13
13
  import type { AuditScopeManifest } from "../types/auditScope.js";
14
14
  import type { ToolingManifest } from "../types/toolingManifest.js";
15
+ import type { ActiveDispatchState } from "../cli/dispatch.js";
15
16
  type ArtifactPayloadMap = {
16
17
  repo_manifest: RepoManifest;
17
18
  file_disposition: FileDisposition;
@@ -45,8 +46,15 @@ type ArtifactPayloadMap = {
45
46
  /**
46
47
  * Audit artifacts accumulate phase-by-phase as the orchestrator advances.
47
48
  * Missing keys mean the corresponding artifact has not been produced yet.
49
+ *
50
+ * `active_dispatch` is loaded specially (like `tooling_manifest`): it lives at
51
+ * the artifacts root rather than as a standard pruned artifact, and carries the
52
+ * in-flight dispatch phase plus any budget-deferred task ids the completion
53
+ * obligation must exclude.
48
54
  */
49
- export type ArtifactBundle = Partial<ArtifactPayloadMap>;
55
+ export type ArtifactBundle = Partial<ArtifactPayloadMap> & {
56
+ active_dispatch?: ActiveDispatchState;
57
+ };
50
58
  export type ArtifactBundleKey = keyof ArtifactPayloadMap;
51
59
  type ArtifactPhase = "intake" | "analysis" | "execution" | "reporting" | "supervisor";
52
60
  interface ArtifactDefinition<K extends ArtifactBundleKey = ArtifactBundleKey> {
@@ -77,6 +77,13 @@ export async function loadArtifactBundle(root) {
77
77
  }
78
78
  }
79
79
  bundle.tooling_manifest = await buildToolingManifest();
80
+ // active-dispatch.json is written by prepare-dispatch at the artifacts root
81
+ // (not a standard ARTIFACT_DEFINITIONS entry). Load it so the completion
82
+ // obligation can exclude budget-deferred tasks. Absent on a fresh run.
83
+ const activeDispatch = await readOptionalJsonFile(join(root, "active-dispatch.json"));
84
+ if (activeDispatch !== undefined) {
85
+ bundle.active_dispatch = activeDispatch;
86
+ }
80
87
  return bundle;
81
88
  }
82
89
  export async function writeCoreArtifacts(root, bundle, options = {}) {
@@ -2,6 +2,20 @@ import type { AuditTask } from "../types.js";
2
2
  import type { WorkerTask } from "../types/workerSession.js";
3
3
  import type { RunPaths, DispatchBatchRun } from "./runArtifactTypes.js";
4
4
  export type { RunPaths, DispatchBatchRun } from "./runArtifactTypes.js";
5
+ /**
6
+ * Schema files copied into a dispatch run's `task-results/` directory so packet
7
+ * workers can optionally self-validate before submit. `audit_result.schema.json`
8
+ * `$ref`s the other two by relative filename, so all three must sit side-by-side
9
+ * for a validator to resolve them. Exported so merge-and-ingest can recognize
10
+ * them as legitimate (not stray) files in `task-results/`.
11
+ */
12
+ export declare const PACKET_SCHEMA_FILENAMES: readonly ["audit_result.schema.json", "finding.schema.json", "audit_task.schema.json"];
13
+ /**
14
+ * Copy {@link PACKET_SCHEMA_FILENAMES} into `targetDir` under their canonical
15
+ * filenames, making the AuditResult schema reachable from a dispatch run's
16
+ * `task-results/` directory.
17
+ */
18
+ export declare function writePacketSchemaFiles(targetDir: string, pkgRoot: string): Promise<void>;
5
19
  export declare function buildRunId(obligationId: string | null, index: number, now?: Date): string;
6
20
  export declare function getRunPaths(artifactsDir: string, runId: string): RunPaths;
7
21
  export declare function ensureSupervisorDirs(artifactsDir: string): Promise<void>;
@@ -7,6 +7,29 @@ const packageRoot = resolve(moduleDir, "..", "..");
7
7
  const auditResultSchemaPath = join(packageRoot, "schemas", "audit_result.schema.json");
8
8
  const auditResultsSchemaPath = join(packageRoot, "schemas", "audit_results.schema.json");
9
9
  const findingSchemaPath = join(packageRoot, "schemas", "finding.schema.json");
10
+ /**
11
+ * Schema files copied into a dispatch run's `task-results/` directory so packet
12
+ * workers can optionally self-validate before submit. `audit_result.schema.json`
13
+ * `$ref`s the other two by relative filename, so all three must sit side-by-side
14
+ * for a validator to resolve them. Exported so merge-and-ingest can recognize
15
+ * them as legitimate (not stray) files in `task-results/`.
16
+ */
17
+ export const PACKET_SCHEMA_FILENAMES = [
18
+ "audit_result.schema.json",
19
+ "finding.schema.json",
20
+ "audit_task.schema.json",
21
+ ];
22
+ /**
23
+ * Copy {@link PACKET_SCHEMA_FILENAMES} into `targetDir` under their canonical
24
+ * filenames, making the AuditResult schema reachable from a dispatch run's
25
+ * `task-results/` directory.
26
+ */
27
+ export async function writePacketSchemaFiles(targetDir, pkgRoot) {
28
+ await mkdir(targetDir, { recursive: true });
29
+ for (const name of PACKET_SCHEMA_FILENAMES) {
30
+ await writeFile(join(targetDir, name), await readFile(join(pkgRoot, "schemas", name), "utf8"), "utf8");
31
+ }
32
+ }
10
33
  const CURRENT_TASK_FILENAME = "current-task.json";
11
34
  const CURRENT_PROMPT_FILENAME = "current-prompt.md";
12
35
  const CURRENT_TASKS_FILENAME = "current-tasks.json";
@@ -1,2 +1,5 @@
1
1
  import type { ArtifactBundle } from "../io/artifacts.js";
2
- export declare function renderDesignReviewPrompt(bundle: ArtifactBundle): string;
2
+ export interface DesignReviewOptions {
3
+ max_units?: number;
4
+ }
5
+ export declare function renderDesignReviewPrompt(bundle: ArtifactBundle, options?: DesignReviewOptions): string;
@@ -45,6 +45,39 @@ function summarizeRisk(bundle) {
45
45
  ...lines,
46
46
  ].join("\n");
47
47
  }
48
+ function buildPrioritizedReadingList(bundle, maxUnits) {
49
+ const items = bundle.risk_register?.items ?? [];
50
+ const units = bundle.unit_manifest?.units ?? [];
51
+ if (items.length === 0 && units.length === 0) {
52
+ return "No risk or unit data available; read the repository root files to orient yourself.";
53
+ }
54
+ // Build a map from unit_id → file list for fast lookup
55
+ const unitFiles = new Map();
56
+ for (const unit of units) {
57
+ unitFiles.set(unit.unit_id, unit.files);
58
+ }
59
+ // Sort risk items by score descending, then take the top-N
60
+ const sorted = [...items].sort((a, b) => b.risk_score - a.risk_score);
61
+ const top = sorted.slice(0, maxUnits);
62
+ if (top.length === 0) {
63
+ // Fall back to listing all units if no risk data
64
+ const allUnits = units.slice(0, maxUnits);
65
+ const lines = allUnits.map((u) => `- **${u.unit_id}** — ${u.files.join(", ")}`);
66
+ return [
67
+ `Top ${allUnits.length} unit(s) (no risk scores available):`,
68
+ ...lines,
69
+ ].join("\n");
70
+ }
71
+ const lines = top.map((item) => {
72
+ const files = unitFiles.get(item.unit_id);
73
+ const fileList = files && files.length > 0 ? files.join(", ") : "(files unknown)";
74
+ return `- **${item.unit_id}** (risk score: ${item.risk_score}) — ${fileList}`;
75
+ });
76
+ return [
77
+ `Top ${top.length} highest-risk unit(s) by risk score (out of ${items.length} total):`,
78
+ ...lines,
79
+ ].join("\n");
80
+ }
48
81
  function summarizeSurfaces(bundle) {
49
82
  const surfaces = bundle.surface_manifest?.surfaces ?? [];
50
83
  if (surfaces.length === 0)
@@ -76,8 +109,12 @@ function formatDeterministicFindings(findings) {
76
109
  ...lines,
77
110
  ].join("\n");
78
111
  }
79
- export function renderDesignReviewPrompt(bundle) {
112
+ export function renderDesignReviewPrompt(bundle, options = {}) {
80
113
  const deterministicFindings = bundle.design_assessment?.findings ?? [];
114
+ const unitCount = bundle.unit_manifest?.units.length ?? 0;
115
+ const defaultMaxUnits = Math.max(5, Math.min(20, Math.ceil(unitCount / 5)));
116
+ const maxUnits = options.max_units ?? defaultMaxUnits;
117
+ const prioritizedReadingList = buildPrioritizedReadingList(bundle, maxUnits);
81
118
  return [
82
119
  "# Project design review",
83
120
  "",
@@ -117,7 +154,11 @@ export function renderDesignReviewPrompt(bundle) {
117
154
  "",
118
155
  "## What to assess",
119
156
  "",
120
- "Read the project source to understand what it does and how it works, then produce findings about:",
157
+ `Focus on the ${maxUnits} highest-risk units listed below; you need not read the entire repository, though you may follow any thread that demands more context. Produce findings about:`,
158
+ "",
159
+ "### Prioritised reading list",
160
+ "",
161
+ prioritizedReadingList,
121
162
  "",
122
163
  "- **Tool and library opportunities**: third-party tools, libraries, or frameworks that would improve the project. Concrete suggestions with rationale, not generic advice.",
123
164
  "- **Architecture pattern improvements**: structural changes that would improve extensibility, testability, or maintainability. Consider whether the current abstractions match the problem domain.",