auditor-lambda 0.9.2 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +2 -9
  2. package/audit-code-wrapper-lib.mjs +19 -920
  3. package/dist/cli/args.d.ts +11 -0
  4. package/dist/cli/args.js +14 -1
  5. package/dist/cli/auditStep.d.ts +1 -33
  6. package/dist/cli/dispatch.d.ts +47 -0
  7. package/dist/cli/dispatch.js +146 -11
  8. package/dist/cli/mergeAndIngestCommand.js +36 -9
  9. package/dist/cli/nextStepCommand.js +3 -1
  10. package/dist/cli/prompts.d.ts +2 -0
  11. package/dist/cli/prompts.js +11 -0
  12. package/dist/cli/semanticReviewStep.js +12 -1
  13. package/dist/cli/steps.d.ts +15 -0
  14. package/dist/cli.js +1 -8
  15. package/dist/io/artifacts.d.ts +9 -1
  16. package/dist/io/artifacts.js +7 -0
  17. package/dist/io/runArtifacts.d.ts +14 -0
  18. package/dist/io/runArtifacts.js +23 -0
  19. package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
  20. package/dist/orchestrator/designReviewPrompt.js +43 -2
  21. package/dist/orchestrator/executorResult.d.ts +25 -0
  22. package/dist/orchestrator/intakeExecutors.d.ts +19 -1
  23. package/dist/orchestrator/intakeExecutors.js +89 -3
  24. package/dist/orchestrator/nextStep.d.ts +1 -0
  25. package/dist/orchestrator/nextStep.js +1 -1
  26. package/dist/orchestrator/state.js +8 -1
  27. package/dist/providers/constants.d.ts +1 -1
  28. package/dist/providers/constants.js +1 -1
  29. package/dist/quota/index.d.ts +2 -0
  30. package/dist/quota/index.js +4 -0
  31. package/dist/reporting/synthesis.d.ts +8 -0
  32. package/dist/reporting/synthesis.js +16 -1
  33. package/dist/supervisor/operatorHandoff.js +2 -0
  34. package/dist/types/auditScope.d.ts +16 -2
  35. package/dist/validation/sessionConfig.js +35 -0
  36. package/docs/contracts.md +0 -16
  37. package/docs/operator-guide.md +6 -8
  38. package/package.json +1 -1
  39. package/schemas/audit_findings.schema.json +1 -0
  40. package/scripts/postinstall.mjs +0 -174
  41. package/skills/audit-code/SKILL.md +17 -1
  42. package/skills/audit-code/audit-code.prompt.md +25 -0
  43. package/dist/mcp/server.d.ts +0 -72
  44. package/dist/mcp/server.js +0 -765
@@ -24,6 +24,17 @@ export declare function digestId(value: string): string;
24
24
  export declare function safeArtifactStem(value: string): string;
25
25
  export declare function artifactNameForId(value: string, extension: string): string;
26
26
  export declare function quoteCommandArg(value: string): string;
27
+ /**
28
+ * Normalize a generated command token to POSIX path separators. These command
29
+ * strings are embedded in step prompts and `allowed_commands` and run by the
30
+ * host — often through a bash-like shell, which treats `\` as an escape and
31
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
32
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
33
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
34
+ * are touched, and no non-path argument in this CLI contains one, so this is a
35
+ * targeted normalization rather than a blanket rewrite.
36
+ */
37
+ export declare function toPosixCommandToken(value: string): string;
27
38
  export declare function renderCommand(argv: string[]): string;
28
39
  export declare function summarizeLaunchExit(result: {
29
40
  accepted?: boolean;
package/dist/cli/args.js CHANGED
@@ -80,8 +80,21 @@ export function artifactNameForId(value, extension) {
80
80
  export function quoteCommandArg(value) {
81
81
  return /[\s"]/u.test(value) ? `"${value.replace(/"/g, '\\"')}"` : value;
82
82
  }
83
+ /**
84
+ * Normalize a generated command token to POSIX path separators. These command
85
+ * strings are embedded in step prompts and `allowed_commands` and run by the
86
+ * host — often through a bash-like shell, which treats `\` as an escape and
87
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
88
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
89
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
90
+ * are touched, and no non-path argument in this CLI contains one, so this is a
91
+ * targeted normalization rather than a blanket rewrite.
92
+ */
93
+ export function toPosixCommandToken(value) {
94
+ return value.includes("\\") ? value.replace(/\\/g, "/") : value;
95
+ }
83
96
  export function renderCommand(argv) {
84
- return argv.map((item) => quoteCommandArg(item)).join(" ");
97
+ return argv.map((item) => quoteCommandArg(toPosixCommandToken(item))).join(" ");
85
98
  }
86
99
  export function summarizeLaunchExit(result) {
87
100
  if (result.accepted !== false && !result.error) {
@@ -1,7 +1,4 @@
1
- import type { AuditResult } from "../types.js";
2
1
  import type { AnalyzerSetting } from "@audit-tools/shared";
3
- import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
4
- import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
5
2
  export declare function runAuditStep(options: {
6
3
  root: string;
7
4
  artifactsDir: string;
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
23
20
  batchDir: string;
24
21
  }): Promise<{
25
22
  batchFiles: string[];
26
- bundle: Partial<{
27
- repo_manifest: import("../types.js").RepoManifest;
28
- file_disposition: import("@audit-tools/shared").FileDisposition;
29
- auto_fixes_applied: unknown;
30
- unit_manifest: import("../types.js").UnitManifest;
31
- graph_bundle: import("@audit-tools/shared").GraphBundle;
32
- surface_manifest: import("@audit-tools/shared").SurfaceManifest;
33
- critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
34
- flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
35
- risk_register: import("@audit-tools/shared").RiskRegister;
36
- design_assessment: import("../types/designAssessment.js").DesignAssessment;
37
- analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
38
- scope: import("../types/auditScope.js").AuditScopeManifest;
39
- coverage_matrix: import("../types.js").CoverageMatrix;
40
- runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
41
- runtime_validation_report: RuntimeValidationReport;
42
- external_analyzer_results: ExternalAnalyzerResults;
43
- syntax_resolution_status: unknown;
44
- audit_results: AuditResult[];
45
- audit_tasks: import("../types.js").AuditTask[];
46
- audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
47
- review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
48
- requeue_tasks: import("../types.js").AuditTask[];
49
- audit_report: string;
50
- audit_findings: import("@audit-tools/shared").AuditFindingsReport;
51
- synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
52
- audit_state: import("../types/auditState.js").AuditState;
53
- artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
54
- tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
55
- }>;
23
+ bundle: import("../io/artifacts.js").ArtifactBundle;
56
24
  audit_state: import("../types/auditState.js").AuditState;
57
25
  selected_obligation: string | null;
58
26
  selected_executor: string;
@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
20
  export interface ActiveDispatchState {
21
21
  run_id: string;
22
22
  created_at: string;
23
+ /** Emitted packets only (after canary/budget filtering). */
23
24
  packet_count: number;
25
+ /** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
24
26
  task_count: number;
25
27
  status: "active" | "merged";
28
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
29
+ phase: "canary" | "fan_out";
30
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
31
+ canary_packet_id: string | null;
32
+ /** Total packets that would have been emitted before a budget cap (present only when capped). */
33
+ budget_packet_count?: number;
34
+ /** packet_ids NOT emitted due to the budget cap. */
35
+ deferred_packet_ids?: string[];
36
+ /** task_ids NOT emitted due to the budget cap. */
37
+ deferred_task_ids?: string[];
26
38
  }
27
39
  export interface DispatchResultMapEntry {
28
40
  packet_id: string;
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
34
46
  run_id: string;
35
47
  entries: DispatchResultMapEntry[];
36
48
  }
49
+ export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
50
+ export interface DispatchFanout {
51
+ agent_count: number;
52
+ wave_count: number;
53
+ confirmation_recommended: boolean;
54
+ dispatch_summary: string;
55
+ }
56
+ /**
57
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
58
+ * number of agents (packets emitted this round, after canary/budget filtering)
59
+ * and the resolved wave size, derive the wave count, a human-readable summary,
60
+ * and whether the agent count exceeds the confirmation threshold (default 10).
61
+ * No LLM call, no side effects, no prompting.
62
+ */
63
+ export declare function computeDispatchFanout(params: {
64
+ agentCount: number;
65
+ waveSize: number;
66
+ confirmThreshold?: number;
67
+ }): DispatchFanout;
37
68
  export interface PrepareDispatchResult {
38
69
  run_id: string;
39
70
  dispatch_plan_path: string;
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
43
74
  skipped_task_count: number;
44
75
  /** Subagent parallelism resolved for this dispatch run. */
45
76
  wave_size: number;
77
+ /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
78
+ phase: "canary" | "fan_out";
79
+ /** packet_id of the emitted canary packet when phase==="canary", else null. */
80
+ canary_packet_id: string | null;
81
+ /** Total agents that will be launched this run (packet_count after canary/budget). */
82
+ agent_count: number;
83
+ /** ceil(agent_count / max(1, wave_size)). */
84
+ wave_count: number;
85
+ /** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
86
+ confirmation_recommended: boolean;
87
+ /** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
88
+ dispatch_summary: string;
89
+ /** True when a max_packets budget capped the emitted packets this run. */
90
+ budget_capped: boolean;
91
+ /** Number of packets deferred (not emitted) due to the budget cap. */
92
+ deferred_packet_count: number;
46
93
  largest_packet: {
47
94
  packet_id: string;
48
95
  total_lines: number;
@@ -4,11 +4,12 @@ import { isAbsolute, join, relative, resolve } from "node:path";
4
4
  import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
5
5
  import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
6
6
  import { loadArtifactBundle } from "../io/artifacts.js";
7
+ import { writePacketSchemaFiles } from "../io/runArtifacts.js";
7
8
  import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
8
9
  import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
9
10
  import { resolveFreshSessionProviderName } from "../providers/index.js";
10
11
  import { loadSessionConfig } from "../supervisor/sessionConfig.js";
11
- import { scheduleWave, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
12
+ import { computeDispatchCapacity, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
12
13
  import { taskResultPath, packetPromptPath, artifactNameForId, toBase64Url, fromBase64Url, getFlag, } from "./args.js";
13
14
  export const LARGE_FILE_PACKET_TARGET_LINES = 2500;
14
15
  export const SMALL_MODEL_HINT_MAX_LINES = 500;
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
16
17
  export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
17
18
  export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
18
19
  export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
20
+ export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
21
+ /**
22
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
23
+ * number of agents (packets emitted this round, after canary/budget filtering)
24
+ * and the resolved wave size, derive the wave count, a human-readable summary,
25
+ * and whether the agent count exceeds the confirmation threshold (default 10).
26
+ * No LLM call, no side effects, no prompting.
27
+ */
28
+ export function computeDispatchFanout(params) {
29
+ const agentCount = params.agentCount;
30
+ const waveSize = params.waveSize;
31
+ const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
32
+ const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
33
+ const confirmationRecommended = agentCount > confirmThreshold;
34
+ const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
35
+ `${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
36
+ return {
37
+ agent_count: agentCount,
38
+ wave_count: waveCount,
39
+ confirmation_recommended: confirmationRecommended,
40
+ dispatch_summary: dispatchSummary,
41
+ };
42
+ }
19
43
  export function dispatchResultMapPath(runDir) {
20
44
  return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
21
45
  }
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
200
224
  const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
201
225
  const lensDefs = await readJsonFile(lensDefsPath);
202
226
  await mkdir(taskResultsDir, { recursive: true });
227
+ // FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
228
+ // it $refs) reachable from this run's task-results directory so packet workers
229
+ // can optionally self-validate before calling submit-packet.
230
+ await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
231
+ // FINDING-011: read the prior dispatch state (if any) so a fan-out round can
232
+ // detect a preceding canary that never produced an accepted result.
233
+ let priorActiveDispatch = null;
234
+ try {
235
+ priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
236
+ }
237
+ catch {
238
+ /* none yet */
239
+ }
203
240
  const priorResultTaskIds = new Set();
204
241
  for (const task of tasks) {
205
242
  if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
@@ -230,6 +267,44 @@ export async function prepareDispatchArtifacts(params) {
230
267
  if (resultPathSet.size !== resultPathByTaskId.size) {
231
268
  throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
232
269
  }
270
+ // Packets come back priority-ordered (high -> medium -> low), so packets[0] is
271
+ // the top-priority packet. Filtering composes in a fixed order: canary first
272
+ // (emit only the top packet on first contact), then the budget cap (top-K).
273
+ //
274
+ // FINDING-011: single-worker canary. On first contact with a multi-packet run,
275
+ // dispatch only the top packet; the held-back packets' tasks keep no result
276
+ // file, so they re-enter `dispatchTasks` on the next call (fan-out).
277
+ //
278
+ // Graduation signal: the canary fires only on the FIRST dispatch of a run and
279
+ // then fans out. "First dispatch" is recorded directly by active-dispatch.json
280
+ // (written at the end of every prepareDispatch), so derive it from
281
+ // priorActiveDispatch.run_id. The previous signal — "no pending task has a
282
+ // result file" (priorResultTaskIds.size === 0) — silently broke: merge-and-ingest
283
+ // prunes accepted task_ids out of pending-audit-tasks.json, so post-canary the
284
+ // still-pending tasks have no result files, priorResultTaskIds stayed empty, and
285
+ // the canary re-fired every cycle (1 packet forever, never reaching fan-out).
286
+ const priorDispatchThisRun = priorActiveDispatch?.run_id === runId;
287
+ const firstContact = !priorDispatchThisRun;
288
+ const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
289
+ const doCanary = firstContact && canaryEnabled && packets.length > 1;
290
+ const canaryPacketId = doCanary ? packets[0].packet_id : null;
291
+ const phase = doCanary ? "canary" : "fan_out";
292
+ const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
293
+ // FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
294
+ // packets at max_packets; the remainder are recorded as DEFERRED and excluded
295
+ // from the completion check so the run can finish honestly under budget.
296
+ // Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
297
+ // precedence: a canary round only emits 1 packet regardless of the budget.
298
+ const maxPackets = sessionConfig.dispatch?.max_packets;
299
+ const budgetCapped = typeof maxPackets === "number" &&
300
+ maxPackets >= 0 &&
301
+ maxPackets < postCanaryPackets.length;
302
+ const emitPackets = budgetCapped
303
+ ? postCanaryPackets.slice(0, maxPackets)
304
+ : postCanaryPackets;
305
+ const deferredPackets = budgetCapped
306
+ ? postCanaryPackets.slice(maxPackets)
307
+ : [];
233
308
  const plan = [];
234
309
  const resultMapEntries = [];
235
310
  for (const task of tasks) {
@@ -245,7 +320,7 @@ export async function prepareDispatchArtifacts(params) {
245
320
  let largestLines = 0;
246
321
  let largestEstimatedTokens = 0;
247
322
  const warnings = [];
248
- for (const packet of packets) {
323
+ for (const packet of emitPackets) {
249
324
  const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
250
325
  const packetTasks = packet.task_ids
251
326
  .map((taskId) => tasksById.get(taskId))
@@ -396,6 +471,11 @@ export async function prepareDispatchArtifacts(params) {
396
471
  "way to record results, and it writes them inside the artifacts directory for you.",
397
472
  "Produce one JSON array containing exactly one AuditResult object for each listed task.",
398
473
  "",
474
+ "Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
475
+ " $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
476
+ "You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
477
+ " the submit command performs the authoritative validation and will report any errors.",
478
+ "",
399
479
  "Required AuditResult fields:",
400
480
  " task_id copy from the task metadata",
401
481
  " unit_id copy from the task metadata",
@@ -484,17 +564,27 @@ export async function prepareDispatchArtifacts(params) {
484
564
  DEFAULT_EMPIRICAL_HALF_LIFE_HOURS;
485
565
  const quotaSource = buildQuotaSource({ halfLifeHours });
486
566
  const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(quotaProviderKey).catch(() => null);
487
- const waveSchedule = scheduleWave({
567
+ // Size the dispatch just-in-time against the full pending layout (one token
568
+ // estimate per emitted packet) and the host pool's current limits, rather than
569
+ // a preset wave size. `parallel_workers` is no longer the ambition — it is
570
+ // folded into hostConcurrencyLimit as a ceiling (resolveHostActiveSubagentLimit).
571
+ // Today there is a single pool (the conversation host's subagents); a
572
+ // heterogeneous provider pool slots in here without changing the call.
573
+ const hostPool = {
574
+ id: quotaProviderKey,
488
575
  providerName: quotaProviderName,
489
- sessionConfig,
490
576
  hostModel,
491
- requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
492
- estimatedSlotTokens: perPacketTokens,
493
- quotaStateEntry,
494
577
  hostConcurrencyLimit,
578
+ quotaStateEntry,
495
579
  discoveredLimits,
496
580
  quotaSourceSnapshot,
581
+ };
582
+ const dispatchCapacity = computeDispatchCapacity({
583
+ pools: [hostPool],
584
+ sessionConfig,
585
+ pendingItemTokens: perPacketTokens,
497
586
  });
587
+ const waveSchedule = dispatchCapacity.primary.schedule;
498
588
  const dispatchQuota = {
499
589
  contract_version: "audit-code-dispatch-quota/v1alpha2",
500
590
  run_id: runId,
@@ -503,9 +593,9 @@ export async function prepareDispatchArtifacts(params) {
503
593
  confidence: waveSchedule.confidence,
504
594
  source: waveSchedule.source,
505
595
  host_concurrency_limit: waveSchedule.host_concurrency_limit,
506
- wave_size: waveSchedule.wave_size,
507
- estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
508
- cooldown_until: waveSchedule.cooldown_until,
596
+ wave_size: dispatchCapacity.total_slots,
597
+ estimated_wave_tokens: dispatchCapacity.estimated_wave_tokens,
598
+ cooldown_until: dispatchCapacity.cooldown_until,
509
599
  quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
510
600
  backoff_state: null,
511
601
  };
@@ -524,20 +614,57 @@ export async function prepareDispatchArtifacts(params) {
524
614
  }
525
615
  }
526
616
  }
617
+ // FINDING-011: when advancing past a canary, warn if it never produced an
618
+ // accepted result. submit-packet writes the per-task result file ONLY after
619
+ // validation passes, so presence of that file == ACCEPTED. We map the recorded
620
+ // canary packet_id back to its task ids via the result map and check whether
621
+ // those tasks now have accepted results (i.e. landed in priorResultTaskIds).
622
+ if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
623
+ const canaryAccepted = priorActiveDispatch.run_id === runId
624
+ ? (await loadDispatchResultMap(runDir))?.entries
625
+ .filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
626
+ .every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
627
+ : false;
628
+ if (!canaryAccepted) {
629
+ warnings.push({
630
+ code: "canary_not_accepted",
631
+ message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
632
+ });
633
+ }
634
+ }
527
635
  const warningsPath = warnings.length > 0
528
636
  ? join(runDir, "dispatch-warnings.json")
529
637
  : null;
530
638
  if (warningsPath) {
531
639
  await writeJsonFile(warningsPath, warnings);
532
640
  }
641
+ // FINDING-013: record deferred packets/tasks so the completion obligation can
642
+ // exclude them under a budget cap (present only when actually capped).
643
+ const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
644
+ const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
533
645
  const activeDispatch = {
534
646
  run_id: runId,
535
647
  created_at: new Date().toISOString(),
536
648
  packet_count: plan.length,
537
649
  task_count: orderedTasks.length,
538
650
  status: "active",
651
+ phase,
652
+ canary_packet_id: canaryPacketId,
653
+ ...(budgetCapped
654
+ ? {
655
+ budget_packet_count: postCanaryPackets.length,
656
+ deferred_packet_ids: deferredPacketIds,
657
+ deferred_task_ids: deferredTaskIds,
658
+ }
659
+ : {}),
539
660
  };
540
661
  await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
662
+ // FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
663
+ const fanout = computeDispatchFanout({
664
+ agentCount: plan.length,
665
+ waveSize: dispatchCapacity.total_slots,
666
+ confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
667
+ });
541
668
  return {
542
669
  run_id: runId,
543
670
  dispatch_plan_path: dispatchPlanPath,
@@ -545,7 +672,15 @@ export async function prepareDispatchArtifacts(params) {
545
672
  packet_count: plan.length,
546
673
  task_count: orderedTasks.length,
547
674
  skipped_task_count: priorResultTaskIds.size,
548
- wave_size: waveSchedule.wave_size,
675
+ wave_size: dispatchCapacity.total_slots,
676
+ phase,
677
+ canary_packet_id: canaryPacketId,
678
+ agent_count: fanout.agent_count,
679
+ wave_count: fanout.wave_count,
680
+ confirmation_recommended: fanout.confirmation_recommended,
681
+ dispatch_summary: fanout.dispatch_summary,
682
+ budget_capped: budgetCapped,
683
+ deferred_packet_count: deferredPackets.length,
549
684
  largest_packet: largestPacketId
550
685
  ? {
551
686
  packet_id: largestPacketId,
@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
7
7
  import { addFileLineCountHints } from "./lineIndex.js";
8
8
  import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
9
9
  import { buildWorkerResult } from "./workerResult.js";
10
+ import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
11
+ // Schema pointer files prepare-dispatch copies into task-results/ for optional
12
+ // worker self-validation. They are expected, not stray — skip them when
13
+ // scanning for spurious files.
14
+ const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
10
15
  export async function cmdMergeAndIngest(argv) {
11
16
  const runId = getFlag(argv, "--run-id");
12
17
  if (!runId)
@@ -60,10 +65,22 @@ export async function cmdMergeAndIngest(argv) {
60
65
  }
61
66
  const passing = [];
62
67
  const failing = [];
68
+ // Pending tasks that were NOT dispatched this round (canary emitted only the
69
+ // top packet, or a budget cap deferred packets). They are not failures — they
70
+ // re-enter dispatch on the next round — so they are tracked separately and must
71
+ // never inflate rejected_count, force a non-zero exit, or gate the completion
72
+ // marker. Conflating them with failures is what produced the spurious
73
+ // "180 failed" + exit-2 on every canary merge.
74
+ const notDispatched = [];
63
75
  const seenTaskIds = new Set();
64
76
  const spuriousFiles = [];
65
77
  const fallbackByTaskId = new Map();
66
78
  for (const filename of files) {
79
+ // Schema pointer files (audit_result/finding/audit_task .schema.json) are
80
+ // copied into task-results/ by prepare-dispatch for optional worker
81
+ // self-validation; they are expected, not stray.
82
+ if (PACKET_SCHEMA_FILENAME_SET.has(filename))
83
+ continue;
67
84
  const filePath = resolve(join(taskResultsDir, filename));
68
85
  if (expectedPaths.has(filePath))
69
86
  continue;
@@ -100,10 +117,9 @@ export async function cmdMergeAndIngest(argv) {
100
117
  for (const task of allTasks) {
101
118
  const entry = entryByTaskId.get(task.task_id);
102
119
  if (!entry) {
103
- failing.push({
104
- task_id: task.task_id,
105
- errors: ["Missing dispatch result-map entry for assigned task."],
106
- });
120
+ // No result-map entry => this pending task was not dispatched this round.
121
+ // Leave it pending for the next dispatch; it is not a failure.
122
+ notDispatched.push(task.task_id);
107
123
  continue;
108
124
  }
109
125
  const filePath = entry.result_path;
@@ -187,7 +203,11 @@ export async function cmdMergeAndIngest(argv) {
187
203
  try {
188
204
  const dispatch = await readJsonFile(activeDispatchPath);
189
205
  if (dispatch.run_id === runId) {
190
- dispatch.status = failing.length > 0 ? "active" : "merged";
206
+ // "merged" only when this round is fully drained: every dispatched task
207
+ // accepted AND nothing held back. A canary (notDispatched > 0) stays
208
+ // "active" because the fan-out round on the same run-id still has to merge.
209
+ dispatch.status =
210
+ failing.length > 0 || notDispatched.length > 0 ? "active" : "merged";
191
211
  await writeJsonFile(activeDispatchPath, dispatch);
192
212
  }
193
213
  }
@@ -211,7 +231,11 @@ export async function cmdMergeAndIngest(argv) {
211
231
  process.stderr.write(`[merge-and-ingest] ${passing.length} accepted, ${failing.length} failed. ` +
212
232
  `Retry packets: ${failedPacketIds.join(", ")}\n`);
213
233
  }
214
- const status = failing.length > 0
234
+ // "partial" whenever work remains for this run — either genuine dispatched
235
+ // failures (failing) or tasks held back this round (notDispatched). The exit
236
+ // code below distinguishes the two: only genuine failures exit non-zero, so a
237
+ // canary reports status "partial" but exits 0 (progressing, not an error).
238
+ const status = failing.length > 0 || notDispatched.length > 0
215
239
  ? "partial"
216
240
  : (result?.progress_made ? "completed" : "no_progress");
217
241
  const workerResult = buildWorkerResult({
@@ -231,6 +255,7 @@ export async function cmdMergeAndIngest(argv) {
231
255
  status,
232
256
  accepted_count: passing.length,
233
257
  rejected_count: failing.length,
258
+ not_dispatched_count: notDispatched.length,
234
259
  spurious_file_count: spuriousFiles.length,
235
260
  finding_count: findingCount,
236
261
  audit_results_path: auditResultsPath,
@@ -244,9 +269,11 @@ export async function cmdMergeAndIngest(argv) {
244
269
  };
245
270
  // Record a completion marker for a fully-merged run so a stray re-invocation
246
271
  // replays this summary (above) instead of re-processing — and possibly
247
- // clobbering — terminal state. Only on full success: a partial merge is meant
248
- // to be re-run after the failed packets are retried, so it stays replayable.
249
- if (failing.length === 0) {
272
+ // clobbering — terminal state. Only when this round is fully drained: genuine
273
+ // failures stay replayable for retry, and a canary (notDispatched > 0) must NOT
274
+ // be marked complete or the fan-out merge on the same run-id would short-circuit
275
+ // to an idempotent replay and silently drop the fan-out results.
276
+ if (failing.length === 0 && notDispatched.length === 0) {
250
277
  await writeJsonFile(mergeCompletePath, summaryPayload);
251
278
  }
252
279
  console.log(JSON.stringify(summaryPayload, null, 2));
@@ -455,7 +455,9 @@ export async function cmdNextStep(argv) {
455
455
  const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
456
456
  await mkdir(join(artifactsDir, "incoming"), { recursive: true });
457
457
  const continueCommand = nextStepCommand(root, artifactsDir);
458
- const prompt = renderDesignReviewPrompt(result.bundle);
458
+ const prompt = renderDesignReviewPrompt(result.bundle, {
459
+ max_units: sessionConfig.design_review?.max_units,
460
+ });
459
461
  const fullPrompt = [
460
462
  prompt,
461
463
  "## Results path",
@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
10
10
  dispatchQuotaPath: string | null;
11
11
  hostCanRestrictSubagentTools: boolean;
12
12
  hostCanSelectSubagentModel: boolean;
13
+ phase?: "canary" | "fan_out";
14
+ canaryPacketId?: string | null;
13
15
  }): string;
14
16
  export declare function renderSingleTaskFallbackStepPrompt(params: {
15
17
  singleTaskPromptPath: string;
@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
78
78
  "",
79
79
  "Launch one subagent for each entry in the plan.",
80
80
  ];
81
+ const canaryLines = params.phase === "canary"
82
+ ? [
83
+ "",
84
+ "This is a CANARY round: the plan contains only the single top-priority packet. " +
85
+ "Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
86
+ "on the following step once this packet's result is accepted.",
87
+ ]
88
+ : [];
81
89
  return [
82
90
  "# audit-code dispatch review",
83
91
  "",
84
92
  ...dispatchDataLines,
93
+ ...canaryLines,
85
94
  "",
86
95
  "Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
87
96
  "",
@@ -98,6 +107,8 @@ export function renderDispatchReviewPrompt(params) {
98
107
  "",
99
108
  "**After all waves complete:**",
100
109
  "",
110
+ "Run these backend commands directly — do not pipe them through a token-compression wrapper (e.g. `opentoken wrap`). Their JSON / prompt-contract output is parsed verbatim by the backend and this host, and wrapping corrupts it.",
111
+ "",
101
112
  "Run exactly:",
102
113
  "",
103
114
  ` ${mergeCommand}`,
@@ -64,7 +64,10 @@ export async function renderSemanticReviewStep(params) {
64
64
  allowedCommands: [mergeCommand, continueCommand],
65
65
  allowedMcpTools: ["auditor_merge_and_ingest", "auditor_continue_audit"],
66
66
  progress: {
67
- summary: `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
67
+ summary: (dispatch.phase === "canary"
68
+ ? `Canary: dispatching only the top-priority packet (${dispatch.canary_packet_id}) before fan-out. `
69
+ : "") +
70
+ `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
68
71
  `${dispatch.task_count} task(s) in waves of ${dispatch.wave_size}` +
69
72
  (dispatch.skipped_task_count > 0
70
73
  ? `; ${dispatch.skipped_task_count} task(s) already completed.`
@@ -73,6 +76,12 @@ export async function renderSemanticReviewStep(params) {
73
76
  pending_tasks: dispatch.task_count,
74
77
  completed_tasks: dispatch.skipped_task_count,
75
78
  wave_size: dispatch.wave_size,
79
+ phase: dispatch.phase,
80
+ canary_packet_id: dispatch.canary_packet_id,
81
+ agent_count: dispatch.agent_count,
82
+ wave_count: dispatch.wave_count,
83
+ confirmation_recommended: dispatch.confirmation_recommended,
84
+ dispatch_summary: dispatch.dispatch_summary,
76
85
  },
77
86
  stopCondition: "Dispatch every packet, run merge-and-ingest once, then run next-step.",
78
87
  repoRoot: root,
@@ -91,6 +100,8 @@ export async function renderSemanticReviewStep(params) {
91
100
  dispatchQuotaPath: dispatch.dispatch_quota_path,
92
101
  hostCanRestrictSubagentTools: params.hostCanRestrictSubagentTools,
93
102
  hostCanSelectSubagentModel: params.hostCanSelectSubagentModel,
103
+ phase: dispatch.phase,
104
+ canaryPacketId: dispatch.canary_packet_id,
94
105
  }),
95
106
  access: {
96
107
  read_paths: [
@@ -17,6 +17,21 @@ export interface StepProgress {
17
17
  completed_tasks?: number;
18
18
  /** Subagent parallelism resolved for this dispatch run. */
19
19
  wave_size?: number;
20
+ /** "canary" when only the top packet was emitted this round; "fan_out" otherwise. */
21
+ phase?: "canary" | "fan_out";
22
+ /** packet_id of the emitted canary packet when `phase === "canary"`. */
23
+ canary_packet_id?: string | null;
24
+ /** Total agents (packets) that will be launched this run. */
25
+ agent_count?: number;
26
+ /** Number of dispatch waves for this run (`ceil(agent_count / wave_size)`). */
27
+ wave_count?: number;
28
+ /**
29
+ * True when `agent_count` exceeds the configured confirm threshold and the
30
+ * loader should pause for user confirmation before fan-out (FINDING-012).
31
+ */
32
+ confirmation_recommended?: boolean;
33
+ /** Human-readable fan-out summary, e.g. "12 agents across 3 waves (wave_size=4)". */
34
+ dispatch_summary?: string;
20
35
  }
21
36
  export interface StepArtifact {
22
37
  contract_version: typeof STEP_CONTRACT_VERSION;
package/dist/cli.js CHANGED
@@ -21,7 +21,6 @@ import { deriveAuditState } from "./orchestrator/state.js";
21
21
  import { createFreshSessionProvider, resolveFreshSessionProviderName, } from "./providers/index.js";
22
22
  import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
23
23
  import { clearDispatchFiles, ensureSupervisorDirs, } from "./io/runArtifacts.js";
24
- import { runAuditCodeMcpServer } from "./mcp/server.js";
25
24
  import { scheduleWave, buildProviderModelKey, readQuotaState, resolveLimits, resolveHostActiveSubagentLimit, computeMaxSafeConcurrency, getQuotaStatePath, lookupDiscoveredLimits, setQuotaStateDir, } from "./quota/index.js";
26
25
  import { DIRECT_CLI_DEFAULTS, getFlag, hasFlag, fromBase64Url, taskResultPath, getArtifactsDir, getRootDir, warnIfNotGitRepo, getBatchResultsDir, getMaxRuns, getAgentBatchSize, getParallelWorkers, getTimeoutMs, getExplicitProvider, getHostModel, getHostMaxActiveSubagents, resolveRunProviderName, chunkArray, getUiMode, looksLikeCliFlag, countLines, } from "./cli/args.js";
27
26
  import { ACTIVE_DISPATCH_FILENAME, loadDispatchResultMap, prepareDispatchArtifacts, } from "./cli/dispatch.js";
@@ -539,9 +538,6 @@ async function cmdCleanup(argv) {
539
538
  dry_run: dryRun,
540
539
  }, null, 2));
541
540
  }
542
- async function cmdMcp(argv) {
543
- await runAuditCodeMcpServer(argv.slice(3));
544
- }
545
541
  async function cmdQuota(argv) {
546
542
  const artifactsDir = getArtifactsDir(argv);
547
543
  const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
@@ -707,9 +703,6 @@ async function main(argv) {
707
703
  case "cleanup":
708
704
  await cmdCleanup(argv);
709
705
  return;
710
- case "mcp":
711
- await cmdMcp(argv);
712
- return;
713
706
  case "prepare-dispatch":
714
707
  await cmdPrepareDispatch(argv);
715
708
  return;
@@ -733,7 +726,7 @@ async function main(argv) {
733
726
  return;
734
727
  default:
735
728
  console.error(`Unknown command: ${command}`);
736
- console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
729
+ console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
737
730
  process.exitCode = 1;
738
731
  }
739
732
  }