npm - @delegance/claude-autopilot - Versions diffs - 5.5.2 → 7.2.0 - Mend

@delegance/claude-autopilot 5.5.2 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/CHANGELOG.md +1776 -6
package/README.md +65 -1
package/bin/_launcher.js +38 -23
package/dist/src/adapters/council/openai.js +12 -6
package/dist/src/adapters/deploy/_http.d.ts +43 -0
package/dist/src/adapters/deploy/_http.js +99 -0
package/dist/src/adapters/deploy/fly.d.ts +206 -0
package/dist/src/adapters/deploy/fly.js +696 -0
package/dist/src/adapters/deploy/index.d.ts +2 -0
package/dist/src/adapters/deploy/index.js +33 -0
package/dist/src/adapters/deploy/render.d.ts +181 -0
package/dist/src/adapters/deploy/render.js +550 -0
package/dist/src/adapters/deploy/types.d.ts +67 -3
package/dist/src/adapters/deploy/vercel.d.ts +17 -1
package/dist/src/adapters/deploy/vercel.js +29 -49
package/dist/src/adapters/pricing.d.ts +36 -0
package/dist/src/adapters/pricing.js +40 -0
package/dist/src/adapters/review-engine/codex.js +10 -7
package/dist/src/cli/autopilot.d.ts +75 -0
package/dist/src/cli/autopilot.js +750 -0
package/dist/src/cli/brainstorm.d.ts +23 -0
package/dist/src/cli/brainstorm.js +131 -0
package/dist/src/cli/costs.d.ts +15 -1
package/dist/src/cli/costs.js +99 -10
package/dist/src/cli/dashboard/index.d.ts +5 -0
package/dist/src/cli/dashboard/index.js +49 -0
package/dist/src/cli/dashboard/login.d.ts +22 -0
package/dist/src/cli/dashboard/login.js +260 -0
package/dist/src/cli/dashboard/logout.d.ts +12 -0
package/dist/src/cli/dashboard/logout.js +45 -0
package/dist/src/cli/dashboard/status.d.ts +30 -0
package/dist/src/cli/dashboard/status.js +65 -0
package/dist/src/cli/dashboard/upload.d.ts +16 -0
package/dist/src/cli/dashboard/upload.js +48 -0
package/dist/src/cli/deploy.d.ts +3 -3
package/dist/src/cli/deploy.js +34 -9
package/dist/src/cli/engine-flag-deprecation.d.ts +14 -0
package/dist/src/cli/engine-flag-deprecation.js +20 -0
package/dist/src/cli/fix.d.ts +18 -0
package/dist/src/cli/fix.js +105 -11
package/dist/src/cli/help-text.d.ts +52 -0
package/dist/src/cli/help-text.js +416 -0
package/dist/src/cli/implement.d.ts +91 -0
package/dist/src/cli/implement.js +196 -0
package/dist/src/cli/index.d.ts +2 -1
package/dist/src/cli/index.js +774 -245
package/dist/src/cli/json-envelope.d.ts +187 -0
package/dist/src/cli/json-envelope.js +270 -0
package/dist/src/cli/json-mode.d.ts +33 -0
package/dist/src/cli/json-mode.js +201 -0
package/dist/src/cli/migrate.d.ts +111 -0
package/dist/src/cli/migrate.js +305 -0
package/dist/src/cli/plan.d.ts +81 -0
package/dist/src/cli/plan.js +149 -0
package/dist/src/cli/pr.d.ts +106 -0
package/dist/src/cli/pr.js +191 -19
package/dist/src/cli/preflight.js +26 -0
package/dist/src/cli/review.d.ts +27 -0
package/dist/src/cli/review.js +126 -0
package/dist/src/cli/runs-watch-renderer.d.ts +45 -0
package/dist/src/cli/runs-watch-renderer.js +275 -0
package/dist/src/cli/runs-watch.d.ts +41 -0
package/dist/src/cli/runs-watch.js +395 -0
package/dist/src/cli/runs.d.ts +122 -0
package/dist/src/cli/runs.js +902 -0
package/dist/src/cli/scaffold.d.ts +39 -0
package/dist/src/cli/scaffold.js +287 -0
package/dist/src/cli/scan.d.ts +93 -0
package/dist/src/cli/scan.js +166 -40
package/dist/src/cli/setup.d.ts +30 -0
package/dist/src/cli/setup.js +137 -0
package/dist/src/cli/spec.d.ts +66 -0
package/dist/src/cli/spec.js +132 -0
package/dist/src/cli/validate.d.ts +29 -0
package/dist/src/cli/validate.js +131 -0
package/dist/src/core/config/schema.d.ts +9 -0
package/dist/src/core/config/schema.js +7 -0
package/dist/src/core/config/types.d.ts +11 -0
package/dist/src/core/council/runner.d.ts +10 -1
package/dist/src/core/council/runner.js +25 -3
package/dist/src/core/council/types.d.ts +7 -0
package/dist/src/core/errors.d.ts +1 -1
package/dist/src/core/errors.js +11 -0
package/dist/src/core/logging/redaction.d.ts +13 -0
package/dist/src/core/logging/redaction.js +20 -0
package/dist/src/core/migrate/schema-validator.js +15 -1
package/dist/src/core/phases/static-rules.d.ts +5 -1
package/dist/src/core/phases/static-rules.js +2 -5
package/dist/src/core/run-state/budget.d.ts +88 -0
package/dist/src/core/run-state/budget.js +141 -0
package/dist/src/core/run-state/cli-internal.d.ts +21 -0
package/dist/src/core/run-state/cli-internal.js +174 -0
package/dist/src/core/run-state/events.d.ts +59 -0
package/dist/src/core/run-state/events.js +512 -0
package/dist/src/core/run-state/lock.d.ts +61 -0
package/dist/src/core/run-state/lock.js +206 -0
package/dist/src/core/run-state/phase-context.d.ts +60 -0
package/dist/src/core/run-state/phase-context.js +108 -0
package/dist/src/core/run-state/phase-registry.d.ts +137 -0
package/dist/src/core/run-state/phase-registry.js +162 -0
package/dist/src/core/run-state/phase-runner.d.ts +80 -0
package/dist/src/core/run-state/phase-runner.js +447 -0
package/dist/src/core/run-state/provider-readback.d.ts +130 -0
package/dist/src/core/run-state/provider-readback.js +426 -0
package/dist/src/core/run-state/replay-decision.d.ts +69 -0
package/dist/src/core/run-state/replay-decision.js +144 -0
package/dist/src/core/run-state/resolve-engine.d.ts +45 -0
package/dist/src/core/run-state/resolve-engine.js +74 -0
package/dist/src/core/run-state/resume-preflight.d.ts +66 -0
package/dist/src/core/run-state/resume-preflight.js +116 -0
package/dist/src/core/run-state/run-phase-with-lifecycle.d.ts +69 -0
package/dist/src/core/run-state/run-phase-with-lifecycle.js +193 -0
package/dist/src/core/run-state/runs.d.ts +57 -0
package/dist/src/core/run-state/runs.js +288 -0
package/dist/src/core/run-state/snapshot.d.ts +14 -0
package/dist/src/core/run-state/snapshot.js +114 -0
package/dist/src/core/run-state/state.d.ts +40 -0
package/dist/src/core/run-state/state.js +164 -0
package/dist/src/core/run-state/types.d.ts +284 -0
package/dist/src/core/run-state/types.js +19 -0
package/dist/src/core/run-state/ulid.d.ts +11 -0
package/dist/src/core/run-state/ulid.js +95 -0
package/dist/src/core/schema-alignment/extractor/index.d.ts +1 -1
package/dist/src/core/schema-alignment/extractor/index.js +2 -2
package/dist/src/core/schema-alignment/extractor/prisma.d.ts +13 -1
package/dist/src/core/schema-alignment/extractor/prisma.js +65 -10
package/dist/src/core/schema-alignment/git-history.d.ts +19 -0
package/dist/src/core/schema-alignment/git-history.js +53 -0
package/dist/src/core/static-rules/rules/brand-tokens.js +2 -2
package/dist/src/core/static-rules/rules/schema-alignment.js +14 -4
package/dist/src/dashboard/auto-upload.d.ts +26 -0
package/dist/src/dashboard/auto-upload.js +107 -0
package/dist/src/dashboard/config.d.ts +22 -0
package/dist/src/dashboard/config.js +109 -0
package/dist/src/dashboard/upload/canonical.d.ts +3 -0
package/dist/src/dashboard/upload/canonical.js +16 -0
package/dist/src/dashboard/upload/chain.d.ts +9 -0
package/dist/src/dashboard/upload/chain.js +27 -0
package/dist/src/dashboard/upload/snapshot.d.ts +23 -0
package/dist/src/dashboard/upload/snapshot.js +66 -0
package/dist/src/dashboard/upload/uploader.d.ts +54 -0
package/dist/src/dashboard/upload/uploader.js +330 -0
package/package.json +19 -3
package/scripts/autoregress.ts +1 -1
package/scripts/test-runner.mjs +4 -0
package/skills/claude-autopilot.md +1 -1
package/skills/make-interfaces-feel-better/SKILL.md +104 -0
package/skills/simplify-ui/SKILL.md +103 -0
package/skills/ui/SKILL.md +117 -0
package/skills/ui-ux-pro-max/SKILL.md +90 -0

package/dist/src/core/run-state/phase-runner.d.ts ADDED Viewed

@@ -0,0 +1,80 @@
+import { type BudgetCheck, type BudgetConfig } from './budget.ts';
+import { type PhaseContext } from './phase-context.ts';
+import { type ReadbackResult } from './provider-readback.ts';
+import { type ExternalRef, type WriterId } from './types.ts';
+/** What `RunPhase.onResume` receives when a previous attempt of the same
+ *  phaseIdx exists. Phase 6 will fully wire this; in Phase 2 we expose the
+ *  shape so callers can author against it without a later breaking change. */
+export interface PhaseResumeContext {
+    runDir: string;
+    runId: string;
+    phaseIdx: number;
+    /** All externalRefs recorded for this phase across prior attempts. */
+    externalRefs: ExternalRef[];
+    /** How many `phase.start` events have been observed for this phaseIdx
+     *  (i.e. the attempt count of the prior run). */
+    attempts: number;
+    /** Whether the previous attempt was a phase.success (was the phase already
+     *  done before the current resume began?). */
+    succeeded: boolean;
+}
+/** The phase contract — the only object an existing pipeline needs to
+ *  implement to be run by the engine. Existing phases are wrapped, NOT
+ *  rewritten; in Phase 2 we ship the wrapper but no actual phase consumes
+ *  it yet. */
+export interface RunPhase<I = unknown, O = unknown> {
+    readonly name: string;
+    readonly idempotent: boolean;
+    readonly hasSideEffects: boolean;
+    estimateCost?(input: I): {
+        lowUSD: number;
+        highUSD: number;
+    };
+    run(input: I, ctx: PhaseContext): Promise<O>;
+    /** Called when resuming after a previous failure / completion. Decides
+     *  whether to skip, retry, abort, or bubble to a human. Default behavior
+     *  (when this method is absent) is encoded in `runPhase` itself: idempotent
+     *  phases retry, side-effecting phases require `--force-replay`. */
+    onResume?(prev: PhaseResumeContext): Promise<'skip' | 'retry' | 'abort' | 'needs-human'>;
+}
+/** What the caller passes in. We require runDir/runId/writerId to be already
+ *  established (the run-creator already did this). */
+export interface ParentRunContext {
+    runDir: string;
+    runId: string;
+    writerId: WriterId;
+    /** Index of this phase within the run's `phases[]`. */
+    phaseIdx: number;
+    /** When true, override the side-effects gate even if a prior success
+     *  exists. Records a `run.warning` event noting the override. */
+    forceReplay?: boolean;
+    /** Phase 4 — optional budget enforcement config. When omitted the
+     *  runner is back-compat: no `budget.check` event, no preflight, no
+     *  rejection. When present, the runner consults `checkPhaseBudget`
+     *  BEFORE emitting `phase.start` and may throw `budget_exceeded`. */
+    budget?: BudgetConfig;
+    /** When true, a `pause` budget decision becomes `hard-fail` instead of
+     *  prompting the user. Callers in CI / `--json` mode MUST set this.
+     *  Default: false (interactive). */
+    nonInteractive?: boolean;
+    /** Override the interactive confirm prompt. Returning `true` proceeds,
+     *  `false` rejects. Mainly a test seam; the default uses readline. */
+    confirmBudgetPause?: (check: BudgetCheck) => Promise<boolean>;
+    /** Phase 6 — override the readback layer. Defaults to `verifyRefs` from
+     *  `provider-readback.ts`, which uses the registered providers. Tests
+     *  inject a stub to avoid hitting `gh` / network. */
+    verifyRefs?: (refs: ReadonlyArray<ExternalRef>) => Promise<ReadbackResult[]>;
+}
+export type { PhaseContext } from './phase-context.ts';
+/** Run a single phase with full lifecycle instrumentation.
+ *
+ *  Emits, in order:
+ *    phase.start  — always (unless idempotent short-circuit fires first)
+ *    phase.cost   — zero or more, emitted by the phase via ctx.emitCost
+ *    phase.externalRef — zero or more, via ctx.emitExternalRef
+ *    phase.success | phase.failed — exactly one
+ *
+ *  Writes phases/<name>.json after either terminal event so a crash between
+ *  the event and the snapshot is recoverable from events.ndjson. */
+export declare function runPhase<I, O>(phase: RunPhase<I, O>, input: I, parentCtx: ParentRunContext): Promise<O>;
+//# sourceMappingURL=phase-runner.d.ts.map

package/dist/src/core/run-state/phase-runner.js ADDED Viewed

@@ -0,0 +1,447 @@
+// src/core/run-state/phase-runner.ts
+//
+// v6 Phase 2 — phase wrapper / lifecycle layer.
+//
+// `runPhase` is the orchestrator that wraps a single `RunPhase` invocation:
+//
+//   1. emit phase.start (with attempt counter + idempotent/hasSideEffects
+//      flags)
+//   2. call phase.run(input, ctx) — the user's phase body
+//   3. on success → emit phase.success + write phases/<name>.json snapshot
+//   4. on throw   → emit phase.failed + write a failed snapshot + rethrow
+//
+// Idempotency / side-effect gating:
+//
+//   - If a prior phase.success exists for this (runDir, phaseIdx) AND
+//     `phase.idempotent === true`, the runner short-circuits with a
+//     `phase.skipped` event-shaped recording (we use the existing
+//     phase.success replay-equivalence — a one-shot phase.success is OK
+//     because the snapshot will be rewritten with attempts++ and a
+//     "skipped"-flavored note in `meta`, plus we emit a `run.warning`
+//     with reason `idempotent-replay` so observers can attribute the
+//     short-circuit). See "skipped variant" below for the exact event.
+//   - If a prior phase.success exists AND `phase.hasSideEffects === true`,
+//     the runner refuses without `--force-replay`: it throws GuardrailError
+//     `needs_human` carrying the prior externalRefs in `details` so a CI /
+//     human consumer can resolve.
+//
+// What this file deliberately does NOT do (Phase 4+ work):
+//
+//   - Budget enforcement. `estimateCost` is part of the interface but the
+//     policy check lives in a future budget enforcer.
+//   - Provider read-back ("is PR #123 still open?"). Phase 6 wires `onResume`
+//     to consult externalRefs + read back; Phase 2 just records refs.
+//   - Locking. `runPhase` does NOT acquire the per-run advisory lock — the
+//     caller (createRun / future resume verb) holds it for the lifetime of
+//     the run. We just need a writerId to stamp events; we accept it from
+//     parentCtx.
+//
+// Spec: docs/specs/v6-run-state-engine.md "Phase contract", "Run lifecycle",
+// "Idempotency rules + external operation ledger".
+import * as readline from 'node:readline';
+import { GuardrailError } from "../errors.js";
+import { checkPhaseBudget } from "./budget.js";
+import { appendEvent, readEvents } from "./events.js";
+import { buildPhaseContext, collectExternalRefs, countPhaseAttempts, countPhaseSuccesses, sumPhaseCost, } from "./phase-context.js";
+import { decideReplay, } from "./replay-decision.js";
+import { verifyRefs as defaultVerifyRefs, } from "./provider-readback.js";
+import { readPhaseSnapshot, writePhaseSnapshot } from "./snapshot.js";
+import { RUN_STATE_SCHEMA_VERSION, } from "./types.js";
+// ----------------------------------------------------------------------------
+// runPhase — the orchestrator
+// ----------------------------------------------------------------------------
+/** Run a single phase with full lifecycle instrumentation.
+ *
+ *  Emits, in order:
+ *    phase.start  — always (unless idempotent short-circuit fires first)
+ *    phase.cost   — zero or more, emitted by the phase via ctx.emitCost
+ *    phase.externalRef — zero or more, via ctx.emitExternalRef
+ *    phase.success | phase.failed — exactly one
+ *
+ *  Writes phases/<name>.json after either terminal event so a crash between
+ *  the event and the snapshot is recoverable from events.ndjson. */
+export async function runPhase(phase, input, parentCtx) {
+    const { runDir, runId, writerId, phaseIdx, forceReplay, budget, nonInteractive, confirmBudgetPause, verifyRefs, } = parentCtx;
+    // -- Idempotency / side-effect gating (Phase 6) ------------------------
+    // We replay events.ndjson once up-front to detect prior outcomes for this
+    // phaseIdx. Cheap — Phase 1 already reads the whole file for replayState.
+    const prior = readEvents(runDir);
+    const priorSuccessCount = countPhaseSuccesses(prior.events, phaseIdx);
+    const priorAttemptCount = countPhaseAttempts(prior.events, phaseIdx);
+    const priorRefs = collectExternalRefs(prior.events, phaseIdx);
+    if (priorSuccessCount > 0) {
+        // Run readbacks ONLY when we'd actually need them (side-effect phases
+        // with refs). Idempotent / no-side-effect / no-refs branches don't
+        // need a network call to decide.
+        let readbacks = [];
+        if (phase.hasSideEffects && !phase.idempotent && priorRefs.length > 0 && !forceReplay) {
+            const verifier = verifyRefs ?? defaultVerifyRefs;
+            try {
+                readbacks = await verifier(priorRefs);
+            }
+            catch {
+                // Defense in depth — verifyRefs is supposed to fail-closed per ref,
+                // but if the wrapper itself throws we collapse all refs to unknown.
+                readbacks = priorRefs.map(r => ({
+                    refKind: r.kind,
+                    refId: r.id,
+                    existsOnPlatform: false,
+                    currentState: 'unknown',
+                }));
+            }
+        }
+        const decision = decideReplay({
+            phaseName: phase.name,
+            hasPriorSuccess: true,
+            priorAttempts: priorAttemptCount,
+            idempotent: phase.idempotent,
+            hasSideEffects: phase.hasSideEffects,
+            externalRefs: priorRefs,
+            readbacks,
+            forceReplay: forceReplay === true,
+        });
+        if (decision.decision === 'skip-already-applied') {
+            return handleSkipAlreadyApplied({
+                decision,
+                phase,
+                phaseIdx,
+                priorEvents: prior.events,
+                priorAttemptCount,
+                priorRefs,
+                runDir,
+                runId,
+                writerId,
+            });
+        }
+        if (decision.decision === 'needs-human') {
+            appendEvent(runDir, {
+                event: 'phase.needs-human',
+                phase: phase.name,
+                phaseIdx,
+                reason: decision.reason,
+                nextActions: [
+                    `Inspect prior externalRefs for phase ${phase.name}.`,
+                    `Re-run with --force-replay if you accept the risk of duplicate side effects.`,
+                ],
+            }, { writerId, runId });
+            throw new GuardrailError(`phase ${phase.name} previously succeeded; ${decision.reason}`, {
+                code: 'superseded',
+                provider: 'run-state',
+                details: {
+                    runDir,
+                    phaseIdx,
+                    priorRefs,
+                    readbacks: decision.readbacksConsulted,
+                    reason: 'side-effecting-replay-needs-human',
+                },
+            });
+        }
+        if (decision.decision === 'abort') {
+            throw new GuardrailError(`phase ${phase.name} aborted by replay decision: ${decision.reason}`, {
+                code: 'user_input',
+                provider: 'run-state',
+                details: { runDir, phaseIdx, priorRefs, reason: 'replay-decision-abort' },
+            });
+        }
+        // decision.decision === 'retry' — continue. If forceReplay drove this,
+        // record an explicit replay.override event so the durable log shows the
+        // override happened (per spec).
+        if (forceReplay === true) {
+            appendEvent(runDir, {
+                event: 'replay.override',
+                phase: phase.name,
+                phaseIdx,
+                reason: decision.reason,
+                refsConsulted: priorRefs,
+            }, { writerId, runId });
+        }
+    }
+    // -- Budget preflight (Phase 4) ----------------------------------------
+    // Runs AFTER idempotency gating (we don't gate replays we're already
+    // going to skip) and BEFORE phase.start (a rejection means the phase
+    // never started — no phase.start, no phase.failed; the runner throws
+    // GuardrailError budget_exceeded so the caller sees a typed failure
+    // and the run can be marked aborted/paused at the orchestrator level).
+    if (budget) {
+        const actualSoFarUSD = sumRunCost(prior.events);
+        const estimate = phase.estimateCost ? phase.estimateCost(input) : null;
+        const check = checkPhaseBudget({
+            budget,
+            phaseName: phase.name,
+            phaseIdx,
+            estimatedCost: estimate,
+            actualSoFarUSD,
+            nonInteractive: nonInteractive === true,
+        });
+        appendEvent(runDir, {
+            event: 'budget.check',
+            phase: phase.name,
+            phaseIdx,
+            decision: check.decision,
+            estimatedHigh: check.estimatedHigh,
+            actualSoFar: check.actualSoFar,
+            reserveApplied: check.reserveApplied,
+            capRemaining: check.capRemaining,
+            reason: check.reason,
+            scope: check.scope,
+        }, { writerId, runId });
+        if (check.decision === 'hard-fail') {
+            throw new GuardrailError(`phase ${phase.name} blocked by budget: ${check.reason}`, {
+                code: 'budget_exceeded',
+                provider: 'run-state',
+                details: {
+                    runDir,
+                    phaseIdx,
+                    check,
+                },
+            });
+        }
+        if (check.decision === 'pause') {
+            const confirm = confirmBudgetPause ?? defaultConfirmBudgetPause;
+            const proceed = await confirm(check);
+            if (!proceed) {
+                throw new GuardrailError(`phase ${phase.name} blocked by budget (user denied resume): ${check.reason}`, {
+                    code: 'budget_exceeded',
+                    provider: 'run-state',
+                    details: {
+                        runDir,
+                        phaseIdx,
+                        check,
+                        userDenied: true,
+                    },
+                });
+            }
+        }
+    }
+    // -- Phase start --------------------------------------------------------
+    const attempt = priorAttemptCount + 1;
+    const startedAtMs = Date.now();
+    appendEvent(runDir, {
+        event: 'phase.start',
+        phase: phase.name,
+        phaseIdx,
+        idempotent: phase.idempotent,
+        hasSideEffects: phase.hasSideEffects,
+        attempt,
+    }, { writerId, runId });
+    // Build the per-phase context. `subPhase` is wired below.
+    const ctx = buildPhaseContext({
+        runDir,
+        runId,
+        phaseName: phase.name,
+        phaseIdx,
+        writerId,
+        subPhase: makeSubPhaseFactory({ runDir, runId, writerId, parentPhaseIdx: phaseIdx }),
+    });
+    // -- Execute ------------------------------------------------------------
+    let output;
+    try {
+        output = await phase.run(input, ctx);
+    }
+    catch (err) {
+        const durationMs = Date.now() - startedAtMs;
+        const message = err instanceof Error ? err.message : String(err);
+        const errorCode = err instanceof GuardrailError ? err.code : undefined;
+        appendEvent(runDir, {
+            event: 'phase.failed',
+            phase: phase.name,
+            phaseIdx,
+            durationMs,
+            error: message,
+            ...(errorCode !== undefined ? { errorCode } : {}),
+        }, { writerId, runId });
+        // Re-read events to capture costs / refs the phase emitted before throw.
+        const after = readEvents(runDir);
+        const failedSnapshot = {
+            schema_version: RUN_STATE_SCHEMA_VERSION,
+            name: phase.name,
+            index: phaseIdx,
+            status: 'failed',
+            idempotent: phase.idempotent,
+            hasSideEffects: phase.hasSideEffects,
+            startedAt: new Date(startedAtMs).toISOString(),
+            endedAt: new Date().toISOString(),
+            durationMs,
+            costUSD: sumPhaseCost(after.events, phaseIdx),
+            attempts: attempt,
+            lastError: message,
+            artifacts: [],
+            externalRefs: collectExternalRefs(after.events, phaseIdx),
+        };
+        writePhaseSnapshot(runDir, failedSnapshot);
+        throw err;
+    }
+    // -- Success ------------------------------------------------------------
+    const durationMs = Date.now() - startedAtMs;
+    appendEvent(runDir, {
+        event: 'phase.success',
+        phase: phase.name,
+        phaseIdx,
+        durationMs,
+        artifacts: [],
+    }, { writerId, runId });
+    // Re-read to capture costs / refs the phase emitted during run().
+    const after = readEvents(runDir);
+    // Phase 6 — persist the phase output so a future skip-already-applied
+    // can return it without re-execution. Only persist values that JSON
+    // round-trip cleanly; if the phase returned something non-serializable
+    // (a function, a class instance with circular refs, a Buffer, …) we
+    // store undefined and rely on the phase being idempotent enough that a
+    // future caller doesn't actually need the prior value.
+    const persistedResult = jsonRoundTrip(output);
+    const successSnapshot = {
+        schema_version: RUN_STATE_SCHEMA_VERSION,
+        name: phase.name,
+        index: phaseIdx,
+        status: 'succeeded',
+        idempotent: phase.idempotent,
+        hasSideEffects: phase.hasSideEffects,
+        startedAt: new Date(startedAtMs).toISOString(),
+        endedAt: new Date().toISOString(),
+        durationMs,
+        costUSD: sumPhaseCost(after.events, phaseIdx),
+        attempts: attempt,
+        artifacts: [],
+        externalRefs: collectExternalRefs(after.events, phaseIdx),
+        ...(persistedResult !== undefined ? { result: persistedResult } : {}),
+    };
+    writePhaseSnapshot(runDir, successSnapshot);
+    return output;
+}
+/** Phase 6 — handle a `skip-already-applied` decision. Surfaces the prior
+ *  result from the persisted snapshot if available; otherwise records the
+ *  skip and rewrites the snapshot with `meta.skipped=true` then throws a
+ *  typed `superseded` so the caller can react (matches the Phase 2
+ *  contract for idempotent short-circuits). */
+function handleSkipAlreadyApplied(opts) {
+    const { decision, phase, phaseIdx, priorEvents, priorAttemptCount, priorRefs, runDir, runId, writerId, } = opts;
+    appendEvent(runDir, {
+        event: 'run.warning',
+        message: `phase ${phase.name} short-circuited: ${decision.reason}`,
+        details: {
+            phase: phase.name,
+            phaseIdx,
+            reason: 'skip-already-applied',
+            decision: decision.decision,
+            readbacks: decision.readbacksConsulted,
+        },
+    }, { writerId, runId });
+    const priorSnapshot = readPhaseSnapshot(runDir, phase.name);
+    const persistedResult = priorSnapshot?.result;
+    const refreshed = {
+        schema_version: RUN_STATE_SCHEMA_VERSION,
+        name: phase.name,
+        index: phaseIdx,
+        status: 'succeeded',
+        idempotent: phase.idempotent,
+        hasSideEffects: phase.hasSideEffects,
+        costUSD: sumPhaseCost(priorEvents, phaseIdx),
+        attempts: priorAttemptCount, // unchanged — we did NOT start
+        artifacts: priorSnapshot?.artifacts ?? [],
+        externalRefs: priorRefs.length > 0 ? priorRefs : (priorSnapshot?.externalRefs ?? []),
+        meta: { skipped: true, reason: 'skip-already-applied', decisionReason: decision.reason },
+        ...(persistedResult !== undefined ? { result: persistedResult } : {}),
+    };
+    writePhaseSnapshot(runDir, refreshed);
+    // If we have a prior result, return it. Otherwise throw `superseded` so
+    // the caller knows to consult the snapshot / onResume hook (matches the
+    // Phase 2 contract for idempotent short-circuits without a stored value).
+    if (persistedResult !== undefined) {
+        return persistedResult;
+    }
+    throw new GuardrailError(`phase ${phase.name} was already completed (skip-already-applied) but ` +
+        `no prior result is persisted — the caller should consult phases/${phase.name}.json or onResume.`, {
+        code: 'superseded',
+        provider: 'run-state',
+        details: {
+            runDir,
+            phaseIdx,
+            priorRefs,
+            readbacks: decision.readbacksConsulted,
+            decision: 'skip-already-applied',
+        },
+    });
+}
+/** JSON round-trip a value to detect serializability. Returns the round-
+ *  tripped value on success, undefined on any failure (circular refs,
+ *  bigint, function, undefined, etc.). Persisting only round-trippable
+ *  values keeps the snapshot file deterministic and prevents subtle
+ *  type-drift between the in-memory value and what gets restored. */
+function jsonRoundTrip(value) {
+    if (value === undefined)
+        return undefined;
+    try {
+        const serialized = JSON.stringify(value);
+        if (serialized === undefined)
+            return undefined;
+        return JSON.parse(serialized);
+    }
+    catch {
+        return undefined;
+    }
+}
+/** Build a `subPhase` callable bound to a parent phase. Sub-phases use a
+ *  synthetic phaseIdx derived from the parent's index plus a monotonic
+ *  counter so the durable log distinguishes "outer phase 1, child 0" from
+ *  "outer phase 1, child 1".
+ *
+ *  Encoding: subPhase index = (parentPhaseIdx + 1) * 1000 + childOrdinal.
+ *  The +1 offset is critical: without it, parent index 0 (the FIRST phase
+ *  of any pipeline, since createRun is 0-based) would yield child indices
+ *  1, 2, 3… which collide with the regular top-level phases at those
+ *  exact indices — a sub-phase's idempotency / side-effect events would
+ *  then incorrectly gate the real top-level phase. Caught by Cursor
+ *  Bugbot on PR #87 (HIGH). With the +1 offset:
+ *    parent=0 → children 1001, 1002, 1003
+ *    parent=1 → children 2001, 2002, 2003
+ *    parent=N (N<999) → children (N+1)*1000+1..N
+ *  Top-level pipelines have ~10 phases in practice, so the 1000 multiplier
+ *  + the +1 offset keep collisions impossible at any realistic depth.
+ *  Phase 6 may revisit this if nested sub-phases ever need a real tree
+ *  representation. */
+function makeSubPhaseFactory(opts) {
+    let childOrdinal = 0;
+    return async function subPhase(child, input) {
+        const childIdx = (opts.parentPhaseIdx + 1) * 1000 + (childOrdinal += 1);
+        return runPhase(child, input, {
+            runDir: opts.runDir,
+            runId: opts.runId,
+            writerId: opts.writerId,
+            phaseIdx: childIdx,
+        });
+    };
+}
+// ----------------------------------------------------------------------------
+// Phase 4 — budget helpers
+// ----------------------------------------------------------------------------
+/** Sum every `phase.cost` event across the WHOLE run (not just the current
+ *  phaseIdx). The budget cap is run-wide; sub-phase costs and prior-phase
+ *  costs both count against `perRunUSD`. */
+function sumRunCost(events) {
+    let total = 0;
+    for (const ev of events) {
+        if (ev.event === 'phase.cost')
+            total += ev.costUSD;
+    }
+    return total;
+}
+/** Default interactive confirm prompt used when no `confirmBudgetPause`
+ *  override is supplied. Uses node:readline so the runner doesn't pull in
+ *  a dependency just for prompting. */
+async function defaultConfirmBudgetPause(check) {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    try {
+        const message = `Budget warning: ${check.reason}\n` +
+            `  phase: ${check.phase} (idx ${check.phaseIdx})\n` +
+            `  actualSoFar: $${check.actualSoFar.toFixed(2)}\n` +
+            `  reserveApplied: $${check.reserveApplied.toFixed(2)}\n` +
+            `  capRemaining: $${check.capRemaining.toFixed(2)}\n` +
+            `Continue and accept the overage? [y/N] `;
+        const answer = await new Promise(resolve => rl.question(message, resolve));
+        return /^y(es)?$/i.test(answer.trim());
+    }
+    finally {
+        rl.close();
+    }
+}
+//# sourceMappingURL=phase-runner.js.map

package/dist/src/core/run-state/provider-readback.d.ts ADDED Viewed

@@ -0,0 +1,130 @@
+import type { ExternalRef, ExternalRefKind } from './types.ts';
+/** Canonical platform-state vocabulary. Every readback maps its provider's
+ *  raw state into one of these so the decision matrix stays provider-agnostic.
+ *  `unknown` is the fail-closed sentinel — any time the readback can't make a
+ *  confident assertion it returns `unknown` and the caller treats that as
+ *  needs-human. */
+export type ReadbackState = 'open' | 'closed' | 'merged' | 'live' | 'failed' | 'rolled-back' | 'unknown';
+/** What a readback returns when asked to verify a single external ref. */
+export interface ReadbackResult {
+    refKind: ExternalRefKind;
+    refId: string;
+    /** Whether the platform reports the ref still exists. False on 404,
+     *  hard error, missing ID, or any throw. */
+    existsOnPlatform: boolean;
+    currentState: ReadbackState;
+    /** Free-form provider-specific metadata. Engine doesn't introspect.
+     *  Surfaces in the replay decision's `details` for human triage. */
+    metadata?: Record<string, unknown>;
+}
+/** A readback verifies one ref kind against its source-of-truth platform.
+ *  Implementations MUST NOT throw — any failure (network, auth, unknown
+ *  shape) collapses to `existsOnPlatform: false, currentState: 'unknown'`.
+ *  This is the fail-closed contract: an unknown-state readback always
+ *  routes to needs-human, never to a silent skip-already-applied. */
+export interface ProviderReadback {
+    /** Stable identifier — useful in logs / decision details. */
+    readonly name: string;
+    /** Which ref kinds this readback handles. The registry filters first by
+     *  kind; if multiple entries match a kind, `providers` then disambiguates
+     *  on `ref.provider`. */
+    readonly handles: ReadonlyArray<ExternalRefKind>;
+    /** Optional provider-name allowlist. When present, the registry only
+     *  routes a ref to this readback if `ref.provider` is in this list. Lets
+     *  multiple readbacks share a kind (e.g. vercel/fly/render all handle
+     *  `deploy`) without shadowing each other. Omit for kind-exclusive
+     *  readbacks (e.g. github handles `github-pr`). */
+    readonly providers?: ReadonlyArray<string>;
+    verifyRef(ref: ExternalRef): Promise<ReadbackResult>;
+}
+/** Test seam — replace the gh CLI invocation in tests without monkey-patching
+ *  child_process. Returns null on any failure (matches runSafe semantics). */
+export interface GhRunner {
+    (args: string[]): string | null;
+}
+export declare function makeGithubReadback(opts?: {
+    gh?: GhRunner;
+}): ProviderReadback;
+/** Minimal adapter-status surface. Mirrors `DeployAdapter.status()` from
+ *  `src/adapters/deploy/types.ts` but typed locally so this module doesn't
+ *  pull the adapter package at init time. */
+export interface DeployStatusFetcher {
+    status(input: {
+        deployId: string;
+    }): Promise<{
+        status: 'pass' | 'fail' | 'in-progress' | 'fail_rolled_back' | 'fail_rollback_failed';
+        deployId: string;
+        deployUrl?: string;
+    }>;
+}
+export type DeployAdapterResolver = (provider: string) => DeployStatusFetcher | null;
+/** Register a resolver that maps a provider name (e.g. "vercel") to a
+ *  status-fetcher. The CLI wires this from `src/adapters/deploy/index.ts`
+ *  during boot; tests inject mocks directly. */
+export declare function registerDeployAdapterResolver(resolver: DeployAdapterResolver | null): void;
+/** Reset the registered resolver. Test-only seam. */
+export declare function __resetDeployAdapterResolver(): void;
+export declare function makeDeployReadback(name: string, providers: ReadonlyArray<string>): ProviderReadback;
+/** Minimal migration-state fetcher. Implementations query the per-env
+ *  Supabase project's `migration_state` table. We type it abstractly so this
+ *  module doesn't pull the supabase client at init time. Returning null
+ *  indicates "fetch failed" — fail-closed treats it as unknown. */
+export interface MigrationStateFetcher {
+    /** Look up a migration version. Returns null on any error or not-found. */
+    fetch(version: string): Promise<{
+        applied: boolean;
+        appliedAt?: string;
+    } | null>;
+}
+/** Register the migration-state fetcher used by the supabase readback.
+ *  CLI boot wires this; tests inject directly. */
+export declare function registerMigrationStateFetcher(fetcher: MigrationStateFetcher | null): void;
+export declare function __resetMigrationStateFetcher(): void;
+/** State of a single planned migration as reported by the dispatcher's
+ *  ledger. The fetcher returns the per-batch plan + the live ledger view so
+ *  the readback can compute the aggregate state without re-querying. */
+export interface MigrationBatchPlannedItem {
+    /** Migration version (matches the post-effect `migration-version` ref id
+     *  shape — `<env>:<migration>` is the externalRef id, but the planned
+     *  list carries just the migration name). */
+    version: string;
+    /** Live ledger state. `applied` ⇒ merged, `pending` ⇒ open, `errored` ⇒
+     *  failed. */
+    state: 'applied' | 'pending' | 'errored';
+}
+/** Minimal `migration-batch` fetcher. Looks up the planned set for a batch
+ *  ref id (typically `${env}:${hash}` or `${env}:pre-dispatch:${ts}` per the
+ *  v6.2.1 spec) and returns the live ledger state of each. Returning null
+ *  indicates "no plan recorded for this batch" — the readback treats that
+ *  as unknown (fail closed). */
+export interface MigrationBatchFetcher {
+    fetch(batchId: string): Promise<{
+        planned: MigrationBatchPlannedItem[];
+    } | null>;
+}
+/** Register the `migration-batch` fetcher. The CLI boot wires this from the
+ *  per-skill adapter; tests inject mocks directly. */
+export declare function registerMigrationBatchFetcher(fetcher: MigrationBatchFetcher | null): void;
+export declare function __resetMigrationBatchFetcher(): void;
+export declare function makeSupabaseReadback(): ProviderReadback;
+/** Live registry — exposed as a getter so tests / callers can introspect. */
+export declare function getProviderReadbacks(): ReadonlyArray<ProviderReadback>;
+/** Replace the registry (test seam). Pass null to reset to defaults. */
+export declare function setProviderReadbacks(list: ProviderReadback[] | null): void;
+/** Look up the readback that handles a given ref. Two-pass match: first try
+ *  a strict (kind + provider) match so multiple readbacks sharing a kind
+ *  (vercel/fly/render all on `deploy`) don't shadow each other; then fall
+ *  back to a kind-only match for readbacks that don't declare a provider
+ *  allowlist (e.g. the github readback handles `github-pr` regardless of
+ *  ref.provider). Returns null if no registered readback claims this ref —
+ *  caller treats null as "no readback available, route to needs-human".
+ *
+ *  Bugbot MEDIUM (PR #91): without provider-aware matching, the first deploy
+ *  readback registered (vercel) won every `deploy`/`rollback-target` lookup
+ *  and the fly/render readbacks were dead code. */
+export declare function readbackForRef(ref: ExternalRef): ProviderReadback | null;
+/** Verify a list of refs in parallel. Returns one ReadbackResult per ref.
+ *  Refs without a registered readback get an unknown-state result so the
+ *  decision matrix can attribute the gap. Order is preserved. */
+export declare function verifyRefs(refs: ReadonlyArray<ExternalRef>): Promise<ReadbackResult[]>;
+//# sourceMappingURL=provider-readback.d.ts.map