npm - auditor-lambda - Versions diffs - 0.9.2 → 0.10.1 - Mend

auditor-lambda 0.9.2 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +2 -9
package/audit-code-wrapper-lib.mjs +19 -920
package/dist/cli/args.d.ts +11 -0
package/dist/cli/args.js +14 -1
package/dist/cli/auditStep.d.ts +1 -33
package/dist/cli/dispatch.d.ts +47 -0
package/dist/cli/dispatch.js +146 -11
package/dist/cli/mergeAndIngestCommand.js +36 -9
package/dist/cli/nextStepCommand.js +3 -1
package/dist/cli/prompts.d.ts +2 -0
package/dist/cli/prompts.js +11 -0
package/dist/cli/semanticReviewStep.js +12 -1
package/dist/cli/steps.d.ts +15 -0
package/dist/cli.js +1 -8
package/dist/io/artifacts.d.ts +9 -1
package/dist/io/artifacts.js +7 -0
package/dist/io/runArtifacts.d.ts +14 -0
package/dist/io/runArtifacts.js +23 -0
package/dist/orchestrator/designReviewPrompt.d.ts +4 -1
package/dist/orchestrator/designReviewPrompt.js +43 -2
package/dist/orchestrator/executorResult.d.ts +25 -0
package/dist/orchestrator/intakeExecutors.d.ts +19 -1
package/dist/orchestrator/intakeExecutors.js +89 -3
package/dist/orchestrator/nextStep.d.ts +1 -0
package/dist/orchestrator/nextStep.js +1 -1
package/dist/orchestrator/state.js +8 -1
package/dist/providers/constants.d.ts +1 -1
package/dist/providers/constants.js +1 -1
package/dist/quota/index.d.ts +2 -0
package/dist/quota/index.js +4 -0
package/dist/reporting/synthesis.d.ts +8 -0
package/dist/reporting/synthesis.js +16 -1
package/dist/supervisor/operatorHandoff.js +2 -0
package/dist/types/auditScope.d.ts +16 -2
package/dist/validation/sessionConfig.js +35 -0
package/docs/contracts.md +0 -16
package/docs/operator-guide.md +6 -8
package/package.json +1 -1
package/schemas/audit_findings.schema.json +1 -0
package/scripts/postinstall.mjs +0 -174
package/skills/audit-code/SKILL.md +17 -1
package/skills/audit-code/audit-code.prompt.md +25 -0
package/dist/mcp/server.d.ts +0 -72
package/dist/mcp/server.js +0 -765

package/dist/cli/args.d.ts CHANGED Viewed

@@ -24,6 +24,17 @@ export declare function digestId(value: string): string;
 export declare function safeArtifactStem(value: string): string;
 export declare function artifactNameForId(value: string, extension: string): string;
 export declare function quoteCommandArg(value: string): string;
+/**
+ * Normalize a generated command token to POSIX path separators. These command
+ * strings are embedded in step prompts and `allowed_commands` and run by the
+ * host — often through a bash-like shell, which treats `\` as an escape and
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
+ * are touched, and no non-path argument in this CLI contains one, so this is a
+ * targeted normalization rather than a blanket rewrite.
+ */
+export declare function toPosixCommandToken(value: string): string;
 export declare function renderCommand(argv: string[]): string;
 export declare function summarizeLaunchExit(result: {
     accepted?: boolean;

package/dist/cli/args.js CHANGED Viewed

@@ -80,8 +80,21 @@ export function artifactNameForId(value, extension) {
 export function quoteCommandArg(value) {
     return /[\s"]/u.test(value) ? `"${value.replace(/"/g, '\\"')}"` : value;
 }
+/**
+ * Normalize a generated command token to POSIX path separators. These command
+ * strings are embedded in step prompts and `allowed_commands` and run by the
+ * host — often through a bash-like shell, which treats `\` as an escape and
+ * silently corrupts Windows absolute paths (`node C:\a\b.mjs` collapses to
+ * `node C:ab.mjs`). Node accepts forward slashes on Windows, and `/` survives
+ * bash, PowerShell, and cmd alike. Only tokens that actually carry a backslash
+ * are touched, and no non-path argument in this CLI contains one, so this is a
+ * targeted normalization rather than a blanket rewrite.
+ */
+export function toPosixCommandToken(value) {
+    return value.includes("\\") ? value.replace(/\\/g, "/") : value;
+}
 export function renderCommand(argv) {
-    return argv.map((item) => quoteCommandArg(item)).join(" ");
+    return argv.map((item) => quoteCommandArg(toPosixCommandToken(item))).join(" ");
 }
 export function summarizeLaunchExit(result) {
     if (result.accepted !== false && !result.error) {

package/dist/cli/auditStep.d.ts CHANGED Viewed

@@ -1,7 +1,4 @@
-import type { AuditResult } from "../types.js";
 import type { AnalyzerSetting } from "@audit-tools/shared";
-import type { RuntimeValidationReport } from "../types/runtimeValidation.js";
-import type { ExternalAnalyzerResults } from "../types/externalAnalyzer.js";
 export declare function runAuditStep(options: {
     root: string;
     artifactsDir: string;
@@ -23,36 +20,7 @@ export declare function ingestBatchAuditResults(options: {
     batchDir: string;
 }): Promise<{
     batchFiles: string[];
-    bundle: Partial<{
-        repo_manifest: import("../types.js").RepoManifest;
-        file_disposition: import("@audit-tools/shared").FileDisposition;
-        auto_fixes_applied: unknown;
-        unit_manifest: import("../types.js").UnitManifest;
-        graph_bundle: import("@audit-tools/shared").GraphBundle;
-        surface_manifest: import("@audit-tools/shared").SurfaceManifest;
-        critical_flows: import("@audit-tools/shared").CriticalFlowManifest;
-        flow_coverage: import("../types/flowCoverage.js").FlowCoverageManifest;
-        risk_register: import("@audit-tools/shared").RiskRegister;
-        design_assessment: import("../types/designAssessment.js").DesignAssessment;
-        analyzer_capability: import("../types/analyzerCapability.js").AnalyzerCapabilityRecord;
-        scope: import("../types/auditScope.js").AuditScopeManifest;
-        coverage_matrix: import("../types.js").CoverageMatrix;
-        runtime_validation_tasks: import("../types/runtimeValidation.js").RuntimeValidationTaskManifest;
-        runtime_validation_report: RuntimeValidationReport;
-        external_analyzer_results: ExternalAnalyzerResults;
-        syntax_resolution_status: unknown;
-        audit_results: AuditResult[];
-        audit_tasks: import("../types.js").AuditTask[];
-        audit_plan_metrics: import("../types/reviewPlanning.js").AuditPlanMetrics;
-        review_packets: import("../types/reviewPlanning.js").ReviewPacket[];
-        requeue_tasks: import("../types.js").AuditTask[];
-        audit_report: string;
-        audit_findings: import("@audit-tools/shared").AuditFindingsReport;
-        synthesis_narrative: import("../types/synthesisNarrative.js").SynthesisNarrativeRecord;
-        audit_state: import("../types/auditState.js").AuditState;
-        artifact_metadata: import("../types/artifactMetadata.js").ArtifactMetadataManifest;
-        tooling_manifest: import("../types/toolingManifest.js").ToolingManifest;
-    }>;
+    bundle: import("../io/artifacts.js").ArtifactBundle;
     audit_state: import("../types/auditState.js").AuditState;
     selected_obligation: string | null;
     selected_executor: string;

package/dist/cli/dispatch.d.ts CHANGED Viewed

@@ -20,9 +20,21 @@ export declare const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
 export interface ActiveDispatchState {
     run_id: string;
     created_at: string;
+    /** Emitted packets only (after canary/budget filtering). */
     packet_count: number;
+    /** Tasks remaining this round (not-yet-done), not just emitted-packet tasks. */
     task_count: number;
     status: "active" | "merged";
+    /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
+    phase: "canary" | "fan_out";
+    /** packet_id of the emitted canary packet when phase==="canary", else null. */
+    canary_packet_id: string | null;
+    /** Total packets that would have been emitted before a budget cap (present only when capped). */
+    budget_packet_count?: number;
+    /** packet_ids NOT emitted due to the budget cap. */
+    deferred_packet_ids?: string[];
+    /** task_ids NOT emitted due to the budget cap. */
+    deferred_task_ids?: string[];
 }
 export interface DispatchResultMapEntry {
     packet_id: string;
@@ -34,6 +46,25 @@ export interface DispatchResultMap {
     run_id: string;
     entries: DispatchResultMapEntry[];
 }
+export declare const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
+export interface DispatchFanout {
+    agent_count: number;
+    wave_count: number;
+    confirmation_recommended: boolean;
+    dispatch_summary: string;
+}
+/**
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
+ * number of agents (packets emitted this round, after canary/budget filtering)
+ * and the resolved wave size, derive the wave count, a human-readable summary,
+ * and whether the agent count exceeds the confirmation threshold (default 10).
+ * No LLM call, no side effects, no prompting.
+ */
+export declare function computeDispatchFanout(params: {
+    agentCount: number;
+    waveSize: number;
+    confirmThreshold?: number;
+}): DispatchFanout;
 export interface PrepareDispatchResult {
     run_id: string;
     dispatch_plan_path: string;
@@ -43,6 +74,22 @@ export interface PrepareDispatchResult {
     skipped_task_count: number;
     /** Subagent parallelism resolved for this dispatch run. */
     wave_size: number;
+    /** "canary" on first contact when only the top packet was emitted; "fan_out" otherwise. */
+    phase: "canary" | "fan_out";
+    /** packet_id of the emitted canary packet when phase==="canary", else null. */
+    canary_packet_id: string | null;
+    /** Total agents that will be launched this run (packet_count after canary/budget). */
+    agent_count: number;
+    /** ceil(agent_count / max(1, wave_size)). */
+    wave_count: number;
+    /** True when agent_count exceeds sessionConfig.dispatch?.confirm_threshold (default 10). */
+    confirmation_recommended: boolean;
+    /** Human-readable summary, e.g. "12 agents across 3 waves (wave_size=4)". */
+    dispatch_summary: string;
+    /** True when a max_packets budget capped the emitted packets this run. */
+    budget_capped: boolean;
+    /** Number of packets deferred (not emitted) due to the budget cap. */
+    deferred_packet_count: number;
     largest_packet: {
         packet_id: string;
         total_lines: number;

package/dist/cli/dispatch.js CHANGED Viewed

@@ -4,11 +4,12 @@ import { isAbsolute, join, relative, resolve } from "node:path";
 import { isFileMissingError, readJsonFile, writeJsonFile, DEFAULT_EMPIRICAL_HALF_LIFE_HOURS, } from "@audit-tools/shared";
 import { buildQuotaSource } from "@audit-tools/shared/quota/compositeQuotaSource";
 import { loadArtifactBundle } from "../io/artifacts.js";
+import { writePacketSchemaFiles } from "../io/runArtifacts.js";
 import { orderTasksForPacketReview, buildReviewPackets, sizeIndexFromManifest, } from "../orchestrator/reviewPackets.js";
 import { buildFileAnchorSummary } from "../orchestrator/fileAnchors.js";
 import { resolveFreshSessionProviderName } from "../providers/index.js";
 import { loadSessionConfig } from "../supervisor/sessionConfig.js";
-import { scheduleWave, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
+import { computeDispatchCapacity, buildProviderModelKey, resolveHostModel, readQuotaState, resolveHostActiveSubagentLimit, lookupDiscoveredLimits, mergeDiscoveredLimits, } from "../quota/index.js";
 import { taskResultPath, packetPromptPath, artifactNameForId, toBase64Url, fromBase64Url, getFlag, } from "./args.js";
 export const LARGE_FILE_PACKET_TARGET_LINES = 2500;
 export const SMALL_MODEL_HINT_MAX_LINES = 500;
@@ -16,6 +17,29 @@ export const SMALL_MODEL_HINT_MAX_ESTIMATED_TOKENS = 3000;
 export const DEEP_MODEL_HINT_MIN_ESTIMATED_TOKENS = 9000;
 export const DISPATCH_RESULT_MAP_FILENAME = "dispatch-result-map.json";
 export const ACTIVE_DISPATCH_FILENAME = "active-dispatch.json";
+export const DEFAULT_DISPATCH_CONFIRM_THRESHOLD = 10;
+/**
+ * FINDING-012: pure-arithmetic fan-out summary the loader can gate on. Given the
+ * number of agents (packets emitted this round, after canary/budget filtering)
+ * and the resolved wave size, derive the wave count, a human-readable summary,
+ * and whether the agent count exceeds the confirmation threshold (default 10).
+ * No LLM call, no side effects, no prompting.
+ */
+export function computeDispatchFanout(params) {
+    const agentCount = params.agentCount;
+    const waveSize = params.waveSize;
+    const waveCount = Math.ceil(agentCount / Math.max(1, waveSize));
+    const confirmThreshold = params.confirmThreshold ?? DEFAULT_DISPATCH_CONFIRM_THRESHOLD;
+    const confirmationRecommended = agentCount > confirmThreshold;
+    const dispatchSummary = `${agentCount} agent${agentCount !== 1 ? "s" : ""} across ` +
+        `${waveCount} wave${waveCount !== 1 ? "s" : ""} (wave_size=${waveSize})`;
+    return {
+        agent_count: agentCount,
+        wave_count: waveCount,
+        confirmation_recommended: confirmationRecommended,
+        dispatch_summary: dispatchSummary,
+    };
+}
 export function dispatchResultMapPath(runDir) {
     return join(runDir, DISPATCH_RESULT_MAP_FILENAME);
 }
@@ -200,6 +224,19 @@ export async function prepareDispatchArtifacts(params) {
     const lensDefsPath = join(params.packageRoot, "dispatch", "lens-definitions.json");
     const lensDefs = await readJsonFile(lensDefsPath);
     await mkdir(taskResultsDir, { recursive: true });
+    // FINDING-009: make the AuditResult JSON-Schema (and the two sibling schemas
+    // it $refs) reachable from this run's task-results directory so packet workers
+    // can optionally self-validate before calling submit-packet.
+    await writePacketSchemaFiles(taskResultsDir, params.packageRoot);
+    // FINDING-011: read the prior dispatch state (if any) so a fan-out round can
+    // detect a preceding canary that never produced an accepted result.
+    let priorActiveDispatch = null;
+    try {
+        priorActiveDispatch = await readJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME));
+    }
+    catch {
+        /* none yet */
+    }
     const priorResultTaskIds = new Set();
     for (const task of tasks) {
         if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
@@ -230,6 +267,44 @@ export async function prepareDispatchArtifacts(params) {
     if (resultPathSet.size !== resultPathByTaskId.size) {
         throw new Error("prepare-dispatch generated duplicate result paths; task ids must be uniquely addressable.");
     }
+    // Packets come back priority-ordered (high -> medium -> low), so packets[0] is
+    // the top-priority packet. Filtering composes in a fixed order: canary first
+    // (emit only the top packet on first contact), then the budget cap (top-K).
+    //
+    // FINDING-011: single-worker canary. On first contact with a multi-packet run,
+    // dispatch only the top packet; the held-back packets' tasks keep no result
+    // file, so they re-enter `dispatchTasks` on the next call (fan-out).
+    //
+    // Graduation signal: the canary fires only on the FIRST dispatch of a run and
+    // then fans out. "First dispatch" is recorded directly by active-dispatch.json
+    // (written at the end of every prepareDispatch), so derive it from
+    // priorActiveDispatch.run_id. The previous signal — "no pending task has a
+    // result file" (priorResultTaskIds.size === 0) — silently broke: merge-and-ingest
+    // prunes accepted task_ids out of pending-audit-tasks.json, so post-canary the
+    // still-pending tasks have no result files, priorResultTaskIds stayed empty, and
+    // the canary re-fired every cycle (1 packet forever, never reaching fan-out).
+    const priorDispatchThisRun = priorActiveDispatch?.run_id === runId;
+    const firstContact = !priorDispatchThisRun;
+    const canaryEnabled = sessionConfig.dispatch?.canary !== false; // default on
+    const doCanary = firstContact && canaryEnabled && packets.length > 1;
+    const canaryPacketId = doCanary ? packets[0].packet_id : null;
+    const phase = doCanary ? "canary" : "fan_out";
+    const postCanaryPackets = doCanary ? packets.slice(0, 1) : packets;
+    // FINDING-013: top-K coverage budget. Cap the (already priority-ordered)
+    // packets at max_packets; the remainder are recorded as DEFERRED and excluded
+    // from the completion check so the run can finish honestly under budget.
+    // Budget defaults OFF (no cap) so default behavior is unchanged. Canary takes
+    // precedence: a canary round only emits 1 packet regardless of the budget.
+    const maxPackets = sessionConfig.dispatch?.max_packets;
+    const budgetCapped = typeof maxPackets === "number" &&
+        maxPackets >= 0 &&
+        maxPackets < postCanaryPackets.length;
+    const emitPackets = budgetCapped
+        ? postCanaryPackets.slice(0, maxPackets)
+        : postCanaryPackets;
+    const deferredPackets = budgetCapped
+        ? postCanaryPackets.slice(maxPackets)
+        : [];
     const plan = [];
     const resultMapEntries = [];
     for (const task of tasks) {
@@ -245,7 +320,7 @@ export async function prepareDispatchArtifacts(params) {
     let largestLines = 0;
     let largestEstimatedTokens = 0;
     const warnings = [];
-    for (const packet of packets) {
+    for (const packet of emitPackets) {
         const promptPath = packetPromptPath(taskResultsDir, packet.packet_id);
         const packetTasks = packet.task_ids
             .map((taskId) => tasksById.get(taskId))
@@ -396,6 +471,11 @@ export async function prepareDispatchArtifacts(params) {
             "way to record results, and it writes them inside the artifacts directory for you.",
             "Produce one JSON array containing exactly one AuditResult object for each listed task.",
             "",
+            "Schema file (resolve relative to this prompt's directory): audit_result.schema.json",
+            "  $refs resolved from the same directory: finding.schema.json, audit_task.schema.json",
+            "You MAY validate your JSON array against the schema before calling submit-packet. This is optional;",
+            "  the submit command performs the authoritative validation and will report any errors.",
+            "",
             "Required AuditResult fields:",
             "  task_id       copy from the task metadata",
             "  unit_id       copy from the task metadata",
@@ -484,17 +564,27 @@ export async function prepareDispatchArtifacts(params) {
         DEFAULT_EMPIRICAL_HALF_LIFE_HOURS;
     const quotaSource = buildQuotaSource({ halfLifeHours });
     const quotaSourceSnapshot = await quotaSource.queryCurrentUsage(quotaProviderKey).catch(() => null);
-    const waveSchedule = scheduleWave({
+    // Size the dispatch just-in-time against the full pending layout (one token
+    // estimate per emitted packet) and the host pool's current limits, rather than
+    // a preset wave size. `parallel_workers` is no longer the ambition — it is
+    // folded into hostConcurrencyLimit as a ceiling (resolveHostActiveSubagentLimit).
+    // Today there is a single pool (the conversation host's subagents); a
+    // heterogeneous provider pool slots in here without changing the call.
+    const hostPool = {
+        id: quotaProviderKey,
         providerName: quotaProviderName,
-        sessionConfig,
         hostModel,
-        requestedConcurrency: sessionConfig.parallel_workers ?? plan.length,
-        estimatedSlotTokens: perPacketTokens,
-        quotaStateEntry,
         hostConcurrencyLimit,
+        quotaStateEntry,
         discoveredLimits,
         quotaSourceSnapshot,
+    };
+    const dispatchCapacity = computeDispatchCapacity({
+        pools: [hostPool],
+        sessionConfig,
+        pendingItemTokens: perPacketTokens,
     });
+    const waveSchedule = dispatchCapacity.primary.schedule;
     const dispatchQuota = {
         contract_version: "audit-code-dispatch-quota/v1alpha2",
         run_id: runId,
@@ -503,9 +593,9 @@ export async function prepareDispatchArtifacts(params) {
         confidence: waveSchedule.confidence,
         source: waveSchedule.source,
         host_concurrency_limit: waveSchedule.host_concurrency_limit,
-        wave_size: waveSchedule.wave_size,
-        estimated_wave_tokens: waveSchedule.estimated_wave_tokens,
-        cooldown_until: waveSchedule.cooldown_until,
+        wave_size: dispatchCapacity.total_slots,
+        estimated_wave_tokens: dispatchCapacity.estimated_wave_tokens,
+        cooldown_until: dispatchCapacity.cooldown_until,
         quota_source_snapshot: waveSchedule.quota_source_snapshot ?? null,
         backoff_state: null,
     };
@@ -524,20 +614,57 @@ export async function prepareDispatchArtifacts(params) {
             }
         }
     }
+    // FINDING-011: when advancing past a canary, warn if it never produced an
+    // accepted result. submit-packet writes the per-task result file ONLY after
+    // validation passes, so presence of that file == ACCEPTED. We map the recorded
+    // canary packet_id back to its task ids via the result map and check whether
+    // those tasks now have accepted results (i.e. landed in priorResultTaskIds).
+    if (!doCanary && priorActiveDispatch?.phase === "canary" && priorActiveDispatch.canary_packet_id) {
+        const canaryAccepted = priorActiveDispatch.run_id === runId
+            ? (await loadDispatchResultMap(runDir))?.entries
+                .filter((entry) => entry.packet_id === priorActiveDispatch.canary_packet_id)
+                .every((entry) => priorResultTaskIds.has(entry.task_id)) ?? false
+            : false;
+        if (!canaryAccepted) {
+            warnings.push({
+                code: "canary_not_accepted",
+                message: `Canary packet ${priorActiveDispatch.canary_packet_id} did not produce an accepted result before fan-out; remaining packets are being dispatched anyway.`,
+            });
+        }
+    }
     const warningsPath = warnings.length > 0
         ? join(runDir, "dispatch-warnings.json")
         : null;
     if (warningsPath) {
         await writeJsonFile(warningsPath, warnings);
     }
+    // FINDING-013: record deferred packets/tasks so the completion obligation can
+    // exclude them under a budget cap (present only when actually capped).
+    const deferredPacketIds = deferredPackets.map((packet) => packet.packet_id);
+    const deferredTaskIds = deferredPackets.flatMap((packet) => packet.task_ids);
     const activeDispatch = {
         run_id: runId,
         created_at: new Date().toISOString(),
         packet_count: plan.length,
         task_count: orderedTasks.length,
         status: "active",
+        phase,
+        canary_packet_id: canaryPacketId,
+        ...(budgetCapped
+            ? {
+                budget_packet_count: postCanaryPackets.length,
+                deferred_packet_ids: deferredPacketIds,
+                deferred_task_ids: deferredTaskIds,
+            }
+            : {}),
     };
     await writeJsonFile(join(artifactsDir, ACTIVE_DISPATCH_FILENAME), activeDispatch);
+    // FINDING-012: pure-arithmetic fan-out summary the loader can gate on.
+    const fanout = computeDispatchFanout({
+        agentCount: plan.length,
+        waveSize: dispatchCapacity.total_slots,
+        confirmThreshold: sessionConfig.dispatch?.confirm_threshold,
+    });
     return {
         run_id: runId,
         dispatch_plan_path: dispatchPlanPath,
@@ -545,7 +672,15 @@ export async function prepareDispatchArtifacts(params) {
         packet_count: plan.length,
         task_count: orderedTasks.length,
         skipped_task_count: priorResultTaskIds.size,
-        wave_size: waveSchedule.wave_size,
+        wave_size: dispatchCapacity.total_slots,
+        phase,
+        canary_packet_id: canaryPacketId,
+        agent_count: fanout.agent_count,
+        wave_count: fanout.wave_count,
+        confirmation_recommended: fanout.confirmation_recommended,
+        dispatch_summary: fanout.dispatch_summary,
+        budget_capped: budgetCapped,
+        deferred_packet_count: deferredPackets.length,
         largest_packet: largestPacketId
             ? {
                 packet_id: largestPacketId,

package/dist/cli/mergeAndIngestCommand.js CHANGED Viewed

@@ -7,6 +7,11 @@ import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchRes
 import { addFileLineCountHints } from "./lineIndex.js";
 import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
 import { buildWorkerResult } from "./workerResult.js";
+import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
+// Schema pointer files prepare-dispatch copies into task-results/ for optional
+// worker self-validation. They are expected, not stray — skip them when
+// scanning for spurious files.
+const PACKET_SCHEMA_FILENAME_SET = new Set(PACKET_SCHEMA_FILENAMES);
 export async function cmdMergeAndIngest(argv) {
     const runId = getFlag(argv, "--run-id");
     if (!runId)
@@ -60,10 +65,22 @@ export async function cmdMergeAndIngest(argv) {
     }
     const passing = [];
     const failing = [];
+    // Pending tasks that were NOT dispatched this round (canary emitted only the
+    // top packet, or a budget cap deferred packets). They are not failures — they
+    // re-enter dispatch on the next round — so they are tracked separately and must
+    // never inflate rejected_count, force a non-zero exit, or gate the completion
+    // marker. Conflating them with failures is what produced the spurious
+    // "180 failed" + exit-2 on every canary merge.
+    const notDispatched = [];
     const seenTaskIds = new Set();
     const spuriousFiles = [];
     const fallbackByTaskId = new Map();
     for (const filename of files) {
+        // Schema pointer files (audit_result/finding/audit_task .schema.json) are
+        // copied into task-results/ by prepare-dispatch for optional worker
+        // self-validation; they are expected, not stray.
+        if (PACKET_SCHEMA_FILENAME_SET.has(filename))
+            continue;
         const filePath = resolve(join(taskResultsDir, filename));
         if (expectedPaths.has(filePath))
             continue;
@@ -100,10 +117,9 @@ export async function cmdMergeAndIngest(argv) {
     for (const task of allTasks) {
         const entry = entryByTaskId.get(task.task_id);
         if (!entry) {
-            failing.push({
-                task_id: task.task_id,
-                errors: ["Missing dispatch result-map entry for assigned task."],
-            });
+            // No result-map entry => this pending task was not dispatched this round.
+            // Leave it pending for the next dispatch; it is not a failure.
+            notDispatched.push(task.task_id);
             continue;
         }
         const filePath = entry.result_path;
@@ -187,7 +203,11 @@ export async function cmdMergeAndIngest(argv) {
     try {
         const dispatch = await readJsonFile(activeDispatchPath);
         if (dispatch.run_id === runId) {
-            dispatch.status = failing.length > 0 ? "active" : "merged";
+            // "merged" only when this round is fully drained: every dispatched task
+            // accepted AND nothing held back. A canary (notDispatched > 0) stays
+            // "active" because the fan-out round on the same run-id still has to merge.
+            dispatch.status =
+                failing.length > 0 || notDispatched.length > 0 ? "active" : "merged";
             await writeJsonFile(activeDispatchPath, dispatch);
         }
     }
@@ -211,7 +231,11 @@ export async function cmdMergeAndIngest(argv) {
         process.stderr.write(`[merge-and-ingest] ${passing.length} accepted, ${failing.length} failed. ` +
             `Retry packets: ${failedPacketIds.join(", ")}\n`);
     }
-    const status = failing.length > 0
+    // "partial" whenever work remains for this run — either genuine dispatched
+    // failures (failing) or tasks held back this round (notDispatched). The exit
+    // code below distinguishes the two: only genuine failures exit non-zero, so a
+    // canary reports status "partial" but exits 0 (progressing, not an error).
+    const status = failing.length > 0 || notDispatched.length > 0
         ? "partial"
         : (result?.progress_made ? "completed" : "no_progress");
     const workerResult = buildWorkerResult({
@@ -231,6 +255,7 @@ export async function cmdMergeAndIngest(argv) {
         status,
         accepted_count: passing.length,
         rejected_count: failing.length,
+        not_dispatched_count: notDispatched.length,
         spurious_file_count: spuriousFiles.length,
         finding_count: findingCount,
         audit_results_path: auditResultsPath,
@@ -244,9 +269,11 @@ export async function cmdMergeAndIngest(argv) {
     };
     // Record a completion marker for a fully-merged run so a stray re-invocation
     // replays this summary (above) instead of re-processing — and possibly
-    // clobbering — terminal state. Only on full success: a partial merge is meant
-    // to be re-run after the failed packets are retried, so it stays replayable.
-    if (failing.length === 0) {
+    // clobbering — terminal state. Only when this round is fully drained: genuine
+    // failures stay replayable for retry, and a canary (notDispatched > 0) must NOT
+    // be marked complete or the fan-out merge on the same run-id would short-circuit
+    // to an idempotent replay and silently drop the fan-out results.
+    if (failing.length === 0 && notDispatched.length === 0) {
         await writeJsonFile(mergeCompletePath, summaryPayload);
     }
     console.log(JSON.stringify(summaryPayload, null, 2));

package/dist/cli/nextStepCommand.js CHANGED Viewed

@@ -455,7 +455,9 @@ export async function cmdNextStep(argv) {
         const designReviewResultsPath = join(artifactsDir, "incoming", "design-review-findings.json");
         await mkdir(join(artifactsDir, "incoming"), { recursive: true });
         const continueCommand = nextStepCommand(root, artifactsDir);
-        const prompt = renderDesignReviewPrompt(result.bundle);
+        const prompt = renderDesignReviewPrompt(result.bundle, {
+            max_units: sessionConfig.design_review?.max_units,
+        });
         const fullPrompt = [
             prompt,
             "## Results path",

package/dist/cli/prompts.d.ts CHANGED Viewed

@@ -10,6 +10,8 @@ export declare function renderDispatchReviewPrompt(params: {
     dispatchQuotaPath: string | null;
     hostCanRestrictSubagentTools: boolean;
     hostCanSelectSubagentModel: boolean;
+    phase?: "canary" | "fan_out";
+    canaryPacketId?: string | null;
 }): string;
 export declare function renderSingleTaskFallbackStepPrompt(params: {
     singleTaskPromptPath: string;

package/dist/cli/prompts.js CHANGED Viewed

@@ -78,10 +78,19 @@ export function renderDispatchReviewPrompt(params) {
             "",
             "Launch one subagent for each entry in the plan.",
         ];
+    const canaryLines = params.phase === "canary"
+        ? [
+            "",
+            "This is a CANARY round: the plan contains only the single top-priority packet. " +
+                "Dispatch it, run merge-and-ingest, then run next-step — the remaining packets fan out " +
+                "on the following step once this packet's result is accepted.",
+        ]
+        : [];
     return [
         "# audit-code dispatch review",
         "",
         ...dispatchDataLines,
+        ...canaryLines,
         "",
         "Pass each `entry.prompt_path` literally to its subagent; do not load packet prompt files into this orchestrator context.",
         "",
@@ -98,6 +107,8 @@ export function renderDispatchReviewPrompt(params) {
         "",
         "**After all waves complete:**",
         "",
+        "Run these backend commands directly — do not pipe them through a token-compression wrapper (e.g. `opentoken wrap`). Their JSON / prompt-contract output is parsed verbatim by the backend and this host, and wrapping corrupts it.",
+        "",
         "Run exactly:",
         "",
         `  ${mergeCommand}`,

package/dist/cli/semanticReviewStep.js CHANGED Viewed

@@ -64,7 +64,10 @@ export async function renderSemanticReviewStep(params) {
         allowedCommands: [mergeCommand, continueCommand],
         allowedMcpTools: ["auditor_merge_and_ingest", "auditor_continue_audit"],
         progress: {
-            summary: `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
+            summary: (dispatch.phase === "canary"
+                ? `Canary: dispatching only the top-priority packet (${dispatch.canary_packet_id}) before fan-out. `
+                : "") +
+                `Dispatching ${dispatch.packet_count} review packet(s) covering ` +
                 `${dispatch.task_count} task(s) in waves of ${dispatch.wave_size}` +
                 (dispatch.skipped_task_count > 0
                     ? `; ${dispatch.skipped_task_count} task(s) already completed.`
@@ -73,6 +76,12 @@ export async function renderSemanticReviewStep(params) {
             pending_tasks: dispatch.task_count,
             completed_tasks: dispatch.skipped_task_count,
             wave_size: dispatch.wave_size,
+            phase: dispatch.phase,
+            canary_packet_id: dispatch.canary_packet_id,
+            agent_count: dispatch.agent_count,
+            wave_count: dispatch.wave_count,
+            confirmation_recommended: dispatch.confirmation_recommended,
+            dispatch_summary: dispatch.dispatch_summary,
         },
         stopCondition: "Dispatch every packet, run merge-and-ingest once, then run next-step.",
         repoRoot: root,
@@ -91,6 +100,8 @@ export async function renderSemanticReviewStep(params) {
             dispatchQuotaPath: dispatch.dispatch_quota_path,
             hostCanRestrictSubagentTools: params.hostCanRestrictSubagentTools,
             hostCanSelectSubagentModel: params.hostCanSelectSubagentModel,
+            phase: dispatch.phase,
+            canaryPacketId: dispatch.canary_packet_id,
         }),
         access: {
             read_paths: [

package/dist/cli/steps.d.ts CHANGED Viewed

@@ -17,6 +17,21 @@ export interface StepProgress {
     completed_tasks?: number;
     /** Subagent parallelism resolved for this dispatch run. */
     wave_size?: number;
+    /** "canary" when only the top packet was emitted this round; "fan_out" otherwise. */
+    phase?: "canary" | "fan_out";
+    /** packet_id of the emitted canary packet when `phase === "canary"`. */
+    canary_packet_id?: string | null;
+    /** Total agents (packets) that will be launched this run. */
+    agent_count?: number;
+    /** Number of dispatch waves for this run (`ceil(agent_count / wave_size)`). */
+    wave_count?: number;
+    /**
+     * True when `agent_count` exceeds the configured confirm threshold and the
+     * loader should pause for user confirmation before fan-out (FINDING-012).
+     */
+    confirmation_recommended?: boolean;
+    /** Human-readable fan-out summary, e.g. "12 agents across 3 waves (wave_size=4)". */
+    dispatch_summary?: string;
 }
 export interface StepArtifact {
     contract_version: typeof STEP_CONTRACT_VERSION;

package/dist/cli.js CHANGED Viewed

@@ -21,7 +21,6 @@ import { deriveAuditState } from "./orchestrator/state.js";
 import { createFreshSessionProvider, resolveFreshSessionProviderName, } from "./providers/index.js";
 import { getSessionConfigPath, loadSessionConfig, readSessionConfigFile, } from "./supervisor/sessionConfig.js";
 import { clearDispatchFiles, ensureSupervisorDirs, } from "./io/runArtifacts.js";
-import { runAuditCodeMcpServer } from "./mcp/server.js";
 import { scheduleWave, buildProviderModelKey, readQuotaState, resolveLimits, resolveHostActiveSubagentLimit, computeMaxSafeConcurrency, getQuotaStatePath, lookupDiscoveredLimits, setQuotaStateDir, } from "./quota/index.js";
 import { DIRECT_CLI_DEFAULTS, getFlag, hasFlag, fromBase64Url, taskResultPath, getArtifactsDir, getRootDir, warnIfNotGitRepo, getBatchResultsDir, getMaxRuns, getAgentBatchSize, getParallelWorkers, getTimeoutMs, getExplicitProvider, getHostModel, getHostMaxActiveSubagents, resolveRunProviderName, chunkArray, getUiMode, looksLikeCliFlag, countLines, } from "./cli/args.js";
 import { ACTIVE_DISPATCH_FILENAME, loadDispatchResultMap, prepareDispatchArtifacts, } from "./cli/dispatch.js";
@@ -539,9 +538,6 @@ async function cmdCleanup(argv) {
         dry_run: dryRun,
     }, null, 2));
 }
-async function cmdMcp(argv) {
-    await runAuditCodeMcpServer(argv.slice(3));
-}
 async function cmdQuota(argv) {
     const artifactsDir = getArtifactsDir(argv);
     const sessionConfig = await loadSessionConfig(artifactsDir).catch(() => ({}));
@@ -707,9 +703,6 @@ async function main(argv) {
         case "cleanup":
             await cmdCleanup(argv);
             return;
-        case "mcp":
-            await cmdMcp(argv);
-            return;
         case "prepare-dispatch":
             await cmdPrepareDispatch(argv);
             return;
@@ -733,7 +726,7 @@ async function main(argv) {
             return;
         default:
             console.error(`Unknown command: ${command}`);
-            console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, mcp, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
+            console.error("Available commands: sample-run, advance-audit, next-step, run-to-completion, worker-run, import-external-analyzer, intake, plan, ingest-results, explain-task, update-runtime-validation, validate, validate-results, requeue, synthesize, cleanup, prepare-dispatch, merge-and-ingest, submit-packet, validate-result, quota, status, dispatch-status");
             process.exitCode = 1;
     }
 }