npm - @delegance/claude-autopilot - Versions diffs - 5.2.2 → 6.2.2 - Mend

@delegance/claude-autopilot 5.2.2 → 6.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/CHANGELOG.md +1027 -1
package/README.md +104 -17
package/dist/src/adapters/council/claude.js +2 -1
package/dist/src/adapters/council/openai.js +14 -7
package/dist/src/adapters/deploy/_http.d.ts +43 -0
package/dist/src/adapters/deploy/_http.js +99 -0
package/dist/src/adapters/deploy/fly.d.ts +206 -0
package/dist/src/adapters/deploy/fly.js +696 -0
package/dist/src/adapters/deploy/generic.d.ts +39 -0
package/dist/src/adapters/deploy/generic.js +98 -0
package/dist/src/adapters/deploy/index.d.ts +15 -0
package/dist/src/adapters/deploy/index.js +78 -0
package/dist/src/adapters/deploy/render.d.ts +181 -0
package/dist/src/adapters/deploy/render.js +550 -0
package/dist/src/adapters/deploy/types.d.ts +221 -0
package/dist/src/adapters/deploy/types.js +15 -0
package/dist/src/adapters/deploy/vercel.d.ts +143 -0
package/dist/src/adapters/deploy/vercel.js +426 -0
package/dist/src/adapters/pricing.d.ts +36 -0
package/dist/src/adapters/pricing.js +40 -0
package/dist/src/adapters/review-engine/claude.js +2 -1
package/dist/src/adapters/review-engine/codex.js +12 -8
package/dist/src/adapters/review-engine/gemini.js +2 -1
package/dist/src/adapters/review-engine/openai-compatible.js +2 -1
package/dist/src/adapters/sdk-loader.d.ts +15 -0
package/dist/src/adapters/sdk-loader.js +77 -0
package/dist/src/cli/autopilot.d.ts +71 -0
package/dist/src/cli/autopilot.js +735 -0
package/dist/src/cli/brainstorm.d.ts +23 -0
package/dist/src/cli/brainstorm.js +131 -0
package/dist/src/cli/costs.d.ts +15 -1
package/dist/src/cli/costs.js +99 -10
package/dist/src/cli/deploy.d.ts +71 -0
package/dist/src/cli/deploy.js +539 -0
package/dist/src/cli/fix.d.ts +18 -0
package/dist/src/cli/fix.js +105 -11
package/dist/src/cli/help-text.d.ts +52 -0
package/dist/src/cli/help-text.js +400 -0
package/dist/src/cli/implement.d.ts +91 -0
package/dist/src/cli/implement.js +196 -0
package/dist/src/cli/index.js +784 -222
package/dist/src/cli/json-envelope.d.ts +187 -0
package/dist/src/cli/json-envelope.js +270 -0
package/dist/src/cli/json-mode.d.ts +33 -0
package/dist/src/cli/json-mode.js +201 -0
package/dist/src/cli/migrate.d.ts +111 -0
package/dist/src/cli/migrate.js +305 -0
package/dist/src/cli/plan.d.ts +81 -0
package/dist/src/cli/plan.js +149 -0
package/dist/src/cli/pr.d.ts +106 -0
package/dist/src/cli/pr.js +191 -19
package/dist/src/cli/preflight.js +102 -1
package/dist/src/cli/review.d.ts +27 -0
package/dist/src/cli/review.js +126 -0
package/dist/src/cli/runs-watch-renderer.d.ts +45 -0
package/dist/src/cli/runs-watch-renderer.js +275 -0
package/dist/src/cli/runs-watch.d.ts +41 -0
package/dist/src/cli/runs-watch.js +395 -0
package/dist/src/cli/runs.d.ts +122 -0
package/dist/src/cli/runs.js +902 -0
package/dist/src/cli/scan.d.ts +93 -0
package/dist/src/cli/scan.js +166 -40
package/dist/src/cli/spec.d.ts +66 -0
package/dist/src/cli/spec.js +132 -0
package/dist/src/cli/validate.d.ts +29 -0
package/dist/src/cli/validate.js +131 -0
package/dist/src/core/config/schema.d.ts +43 -0
package/dist/src/core/config/schema.js +25 -0
package/dist/src/core/config/types.d.ts +17 -0
package/dist/src/core/council/runner.d.ts +10 -1
package/dist/src/core/council/runner.js +25 -3
package/dist/src/core/council/types.d.ts +7 -0
package/dist/src/core/errors.d.ts +1 -1
package/dist/src/core/errors.js +12 -0
package/dist/src/core/logging/redaction.d.ts +13 -0
package/dist/src/core/logging/redaction.js +20 -0
package/dist/src/core/migrate/detector-rules.js +6 -0
package/dist/src/core/migrate/schema-validator.js +22 -1
package/dist/src/core/phases/static-rules.d.ts +5 -1
package/dist/src/core/phases/static-rules.js +2 -5
package/dist/src/core/run-state/budget.d.ts +88 -0
package/dist/src/core/run-state/budget.js +141 -0
package/dist/src/core/run-state/cli-internal.d.ts +21 -0
package/dist/src/core/run-state/cli-internal.js +174 -0
package/dist/src/core/run-state/events.d.ts +59 -0
package/dist/src/core/run-state/events.js +504 -0
package/dist/src/core/run-state/lock.d.ts +61 -0
package/dist/src/core/run-state/lock.js +206 -0
package/dist/src/core/run-state/phase-context.d.ts +60 -0
package/dist/src/core/run-state/phase-context.js +108 -0
package/dist/src/core/run-state/phase-registry.d.ts +137 -0
package/dist/src/core/run-state/phase-registry.js +162 -0
package/dist/src/core/run-state/phase-runner.d.ts +80 -0
package/dist/src/core/run-state/phase-runner.js +447 -0
package/dist/src/core/run-state/provider-readback.d.ts +130 -0
package/dist/src/core/run-state/provider-readback.js +426 -0
package/dist/src/core/run-state/replay-decision.d.ts +69 -0
package/dist/src/core/run-state/replay-decision.js +144 -0
package/dist/src/core/run-state/resolve-engine.d.ts +100 -0
package/dist/src/core/run-state/resolve-engine.js +190 -0
package/dist/src/core/run-state/resume-preflight.d.ts +66 -0
package/dist/src/core/run-state/resume-preflight.js +116 -0
package/dist/src/core/run-state/run-phase-with-lifecycle.d.ts +73 -0
package/dist/src/core/run-state/run-phase-with-lifecycle.js +186 -0
package/dist/src/core/run-state/runs.d.ts +57 -0
package/dist/src/core/run-state/runs.js +288 -0
package/dist/src/core/run-state/snapshot.d.ts +14 -0
package/dist/src/core/run-state/snapshot.js +114 -0
package/dist/src/core/run-state/state.d.ts +40 -0
package/dist/src/core/run-state/state.js +164 -0
package/dist/src/core/run-state/types.d.ts +278 -0
package/dist/src/core/run-state/types.js +13 -0
package/dist/src/core/run-state/ulid.d.ts +11 -0
package/dist/src/core/run-state/ulid.js +95 -0
package/dist/src/core/schema-alignment/extractor/index.d.ts +1 -1
package/dist/src/core/schema-alignment/extractor/index.js +2 -2
package/dist/src/core/schema-alignment/extractor/prisma.d.ts +13 -1
package/dist/src/core/schema-alignment/extractor/prisma.js +65 -10
package/dist/src/core/schema-alignment/git-history.d.ts +19 -0
package/dist/src/core/schema-alignment/git-history.js +53 -0
package/dist/src/core/static-rules/rules/brand-tokens.js +2 -2
package/dist/src/core/static-rules/rules/schema-alignment.js +14 -4
package/package.json +9 -5
package/scripts/autoregress.ts +3 -2
package/skills/claude-autopilot.md +1 -1
package/skills/make-interfaces-feel-better/SKILL.md +104 -0
package/skills/migrate/SKILL.md +193 -47
package/skills/simplify-ui/SKILL.md +103 -0
package/skills/ui/SKILL.md +117 -0
package/skills/ui-ux-pro-max/SKILL.md +90 -0

package/README.md CHANGED Viewed

@@ -1,7 +1,11 @@
 # @delegance/claude-autopilot
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![GitHub](https://img.shields.io/badge/GitHub-axledbetter%2Fclaude--autopilot-181717?logo=github)](https://github.com/axledbetter/claude-autopilot) [![npm](https://img.shields.io/npm/v/@delegance/claude-autopilot.svg)](https://www.npmjs.com/package/@delegance/claude-autopilot)
 **Autonomous development pipeline for Claude Code. Brainstorm → spec → plan → implement → migrate → validate → PR → review → merge — all from your terminal, on your codebase, with your test suite.**
+**Open source, MIT-licensed, runs on your machine with your API keys.** No hosted agent, no per-seat subscription — `npm install -g @delegance/claude-autopilot` and you're done.
 ```bash
 claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
 # → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
@@ -13,6 +17,8 @@ claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
 *No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
+**See it work end-to-end:** [DEMO.md](DEMO.md) — one real autonomous run on a Python codebase. 12 minutes wall clock, $2.20 spend, 5 new tests, multi-file integration, zero manual intervention. Honest about what's bounded today.
 ---
 ## Benchmark
@@ -29,24 +35,31 @@ Every finding came with a concrete remediation (often a code patch or named libr
 ## Why this vs the alternatives
-AI coding tools fall into three buckets. Here's where claude-autopilot sits.
-| Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
+| Tool | Where code lives | Pricing model | Models | Pipeline | Intervenable? |
 |---|---|---|---|---|---|
-| **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
-| **GitHub Copilot Workspace** | Spec → plan → PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
-| **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
-| **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
-| **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None — single-shot prompts | Continuous |
-| **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
-| **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
+| **Devin** (Cognition) | Hosted sandbox | Per-ACU (cloud markup) | Cognition's stack | Opaque | No — dashboard only |
+| **Factory Droids** | Hosted | Per-task + seat | Factory's stack | Fixed | Limited |
+| **GitHub Copilot Workspace** | GitHub-hosted | Per-seat ($) | Copilot only | Fixed, non-extensible | Edit the plan |
+| **Cursor / Copilot agent mode** | Local IDE | Per-seat ($) | Vendor's model | None — single-shot | Continuous |
+| **Cursor BugBot / CodeRabbit** | Hosted | Per-PR or seat | Vendor's model | Review only | Post-hoc |
+| **Aider / Cline** | Local CLI | Free + your API key | User's choice | None | Continuous |
+| **OpenHands / SWE-agent** | Local research | Free | User's choice | Agent decides | Rare |
+| **claude-autopilot** | **Local CLI, your repo** | **Free + your existing Claude subscription** | **Multi-model per role (Claude + Codex + Gemini)** | **Skill-per-phase, rewireable** | **Every phase, all state on disk** |
+Three things only this product gives you:
-The architectural differences that matter most in practice:
+1. **Multi-model council.** Same design question goes to Claude + Codex + Gemini in parallel; a fourth model synthesizes the consensus. Different blind spots, different recommendations, one merged answer. **No other tool dispatches multi-model on a per-decision basis.**
+2. **Your code never leaves your machine.** No cloud sandbox. No SaaS markup. The `git push` that happens at the end is from your laptop. For private repos, regulated industries, or anyone who doesn't want their unfinished code on someone else's servers — this is the only autonomous-agent shape that fits.
+3. **Ships as a Claude Code skill, not a competing IDE.** `/brainstorm`, `/autopilot`, `/migrate`, `/validate` are first-class Claude Code commands. As Claude Code grows, autopilot rides that adoption. You don't switch tools to use it; it's already there.
-1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
-2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
-3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
-4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
+Plus the four practical differences:
+- **Multi-model by role.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Swap any of them.
+- **Your stack, not a sandbox.** Runs your `npm test`, your `prisma migrate`, your `gh pr create`. If it works in your terminal, it works in the pipeline.
+- **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
+- **Test-gated auto-revert.** `claude-autopilot fix --verify` patches a file, runs your tests, reverts on failure. Built into the CLI, not a wrapper.
+**Real numbers from a real run:** [DEMO.md](DEMO.md) — autonomous multi-file change on a Python codebase, **12 minutes, $2.20, zero manual intervention.**
 ## 30-second quickstart
@@ -67,6 +80,34 @@ claude-autopilot brainstorm "add rate limiting to the public API"
 claude-autopilot run --pr 123
 ```
+## Run State Engine (v6)
+Persistent state for autopilot runs. Resume after crashes, enforce hard budget caps, and surface typed JSON events for CI consumers — all opt-in, all on disk.
+```yaml
+# guardrail.config.yaml
+engine:
+  enabled: true              # default in v6.1+; explicit `false` is deprecated and removed in v7
+budgets:
+  perRunUSD: 10              # hard stop; mandatory runtime guard
+  perPhaseUSD: 5
+```
+```bash
+claude-autopilot scan --all                  # any command — engine writes a per-run dir
+claude-autopilot runs list                   # newest-first, with status / cost / lastPhase
+claude-autopilot runs show 01HZK7P3D8Q9V…    # state snapshot + optional event tail
+claude-autopilot run resume 01HZK7P3D8Q9V…   # lookup-only today; live execution in a later v6.x
+claude-autopilot runs gc --older-than-days 7 # retire completed runs
+```
+Every state transition appends a typed event to `.guardrail-cache/runs/<ulid>/events.ndjson`; every CLI verb supports `--json` with strict stdout-envelope / stderr-NDJSON channel discipline. Side-effect phase replay consults persisted `externalRefs` plus a live provider read-back so resume is safe by construction.
+**v6.1+ ships with the engine ON by default** (flipped from v6.0's off-by-default after the stabilization criteria in [`docs/specs/v6.1-default-flip.md`](docs/specs/v6.1-default-flip.md) were met). Users who want the legacy v5.x output shape can opt out for one minor version via `--no-engine`, `CLAUDE_AUTOPILOT_ENGINE=off`, or `engine.enabled: false` — each prints a deprecation warning and is removed in v7.
+→ [`docs/v6/quickstart.md`](docs/v6/quickstart.md) — five-minute setup
+→ [`docs/v6/migration-guide.md`](docs/v6/migration-guide.md) — full v5.x → v6 walkthrough with precedence matrix, per-phase idempotency rules, and troubleshooting
 ## The pipeline, phase by phase
 Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
@@ -83,10 +124,52 @@ Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can in
 | **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
 | **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
 | **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
+| **Deploy** | `deploy` | Deploys via configured adapter (`vercel` \| `fly` \| `render` \| `generic`) with optional log streaming, health check, and bounded auto-rollback (see [Deploy phase](#deploy-phase)) | Deterministic |
 ### Migrate phase
-Configure your migration tool in `.autopilot/stack.md`. The pipeline reads stack.md, dispatches to the configured skill (`migrate@1` for generic; `migrate.supabase@1` for rich Supabase ledger; `none@1` to skip), and runs your tool with full safety: structured argv (no shell injection), 4-flag CI prod gate, hash-chained audit log. Run `claude-autopilot init` to auto-detect your stack. See [docs/skills/rich-migrate-contract.md](docs/skills/rich-migrate-contract.md) for the skill contract and [docs/skills/version-compatibility.md](docs/skills/version-compatibility.md) for the version model.
+Configure your migration tool in `.autopilot/stack.md`. The pipeline reads stack.md, dispatches to the configured skill (`migrate@1` for generic; `migrate.supabase@1` for rich Supabase ledger; `none@1` to skip), and runs your tool with full safety: structured argv (no shell injection), 4-flag CI prod gate, hash-chained audit log. Run `claude-autopilot init` to auto-detect your stack — the detector recognizes Rails, Alembic, Django, Prisma, Drizzle, golang-migrate, dbmate, flyway, supabase-cli, ecto, typeorm, and falls back to a "configure manually" path. See [docs/skills/rich-migrate-contract.md](docs/skills/rich-migrate-contract.md) for the skill contract and [docs/skills/version-compatibility.md](docs/skills/version-compatibility.md) for the version model.
+Generic example (Rails):
+```yaml
+migrate:
+  skill: "migrate@1"
+  envs:
+    dev:
+      command: { exec: "rails", args: ["db:migrate"] }
+      env_file: ".env.development"
+    prod:
+      command: { exec: "rails", args: ["db:migrate", "RAILS_ENV=production"] }
+```
+See `skills/migrate/SKILL.md` for examples covering Alembic, Django, Prisma, Drizzle, golang-migrate, dbmate, flyway, and custom scripts.
+### Deploy phase
+Configure your deploy target in `guardrail.config.yaml` under a `deploy:` block. Four adapters ship in 5.6:
+- **`vercel`** — Vercel v13 deployments API. SSE+NDJSON log streaming, native rollback via `/promote`. Auth: `VERCEL_TOKEN`.
+- **`fly`** — Fly.io Machines API. WebSocket log streaming, native rollback with simulated fallback. Auth: `FLY_API_TOKEN`. Requires the image to be pre-pushed (`fly deploy --build-only --push`).
+- **`render`** — Render REST API. Polling-based log stream with `(timestamp, logId)` cursor dedup, simulated rollback (re-deploys prior commit). Auth: `RENDER_API_KEY`.
+- **`generic`** — runs any shell `deployCommand` (`vercel --prod`, `kubectl apply`, `make deploy`, etc). No platform integration; `--watch` and `rollback` aren't supported.
+Each adapter speaks the same `DeployAdapter` contract: `deploy()`, optional `status()` / `rollback()` / `streamLogs()`, plus a `capabilities` block (`streamMode: 'websocket' | 'polling' | 'none'`, `nativeRollback: boolean`) so the CLI can degrade UX honestly (polling adapters print a one-line stderr notice under `--watch`). Auto-rollback is bounded: max one rollback per deploy attempt, with `runHealthCheck` capped at 5×6s. Log lines emitted into PR comments run through a redaction pass (`AKIA…`, `sk-…`, `eyJ…`, `ghp_`, `xoxb-`, plus configurable patterns) so build output can't leak secrets.
+Example (Fly):
+```yaml
+deploy:
+  adapter: fly
+  app: my-app
+  image: registry.fly.io/my-app:latest
+  region: ord
+  watchBuildLogs: true
+  healthCheckUrl: https://my-app.fly.dev/health
+  rollbackOn: [healthCheckFailure]
+```
+`claude-autopilot doctor` checks for the relevant auth env var when an adapter is configured. See `docs/specs/v5.6-fly-render-adapters.md` for the full adapter contract.
 ## What's distinctive
@@ -315,6 +398,10 @@ ANTHROPIC_API_KEY=sk-ant-... claude-autopilot scan --all
 We do not claim 13/13 reflects every real-world repo — it's a reproducible upper bound on a fixture that exercises the categories we explicitly target.
+## Contributing
+Issues and PRs welcome — https://github.com/axledbetter/claude-autopilot/issues. The pipeline literally builds itself; many features in this repo were implemented by autopilot running against autopilot ([DEMO.md](DEMO.md) walks through six self-eat PRs with cost trajectory $10 → ~$2.50). Read [CONTRIBUTING.md](CONTRIBUTING.md) if it exists, otherwise: clone, `npm install`, `npm test`, open a PR.
 ## License
-MIT
+MIT — see [LICENSE](LICENSE).

package/dist/src/adapters/council/claude.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import Anthropic from '@anthropic-ai/sdk';
 import { GuardrailError } from "../../core/errors.js";
 import { classifyError } from "../review-engine/prompt-builder.js";
+import { loadAnthropic } from "../sdk-loader.js";
 const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
 const MAX_OUTPUT_TOKENS = 2048;
 // Default Opus 4.7 rates — env override for other models.
@@ -14,6 +14,7 @@ export function makeClaudeCouncilAdapter(model, label) {
             if (!apiKey) {
                 throw new GuardrailError('ANTHROPIC_API_KEY not set', { code: 'auth', provider: 'claude' });
             }
+            const Anthropic = await loadAnthropic();
             const client = new Anthropic({ apiKey });
             let response;
             try {

package/dist/src/adapters/council/openai.js CHANGED Viewed

@@ -1,22 +1,28 @@
-import OpenAI from 'openai';
 import { GuardrailError } from "../../core/errors.js";
 import { classifyError } from "../review-engine/prompt-builder.js";
+import { loadOpenAI } from "../sdk-loader.js";
+import { getModelPricing } from "../pricing.js";
 const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
 const MAX_OUTPUT_TOKENS = 2048;
 // Models that ONLY work via the Responses API (not chat.completions).
 // Codex variants and the o-series reasoning models all 404 on chat.completions.
-// Without this branch, putting `gpt-5.3-codex` (the typical default) in
+// Without this branch, putting `gpt-5.3-codex` (the prior default) in
 // council.models throws model_not_found, AND the synthesizer (also typically
-// gpt-5.3-codex) fails the same way — so the whole council returns `partial`
+// the same model) fails the same way — so the whole council returns `partial`
 // with no synthesis. That regression made the marketed multi-model differentiator
 // unusable for any user who only had OPENAI_API_KEY.
+// gpt-5.5 (the new default, 2026-04-23) drops the `-codex` suffix and works
+// via standard chat.completions, so it is intentionally NOT matched here.
 function isResponsesOnlyModel(model) {
     return /codex|^o[1-9]|^gpt-5\.3-/i.test(model);
 }
-// Per-million-token rates for gpt-5.3-codex (override via env for other models).
-// Mirrors the review-engine codex adapter's pricing.
-const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? 1.25);
-const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? 10.0);
+// Per-million-token rates. Bugbot LOW PR #93: wired to read from the
+// canonical MODEL_PRICING table (no longer dead code). Resolution order:
+// env override → MODEL_PRICING entry for the council's default
+// (gpt-5.5) → numeric fallback. Mirrors the review-engine codex adapter.
+const _pricing = getModelPricing(process.env.CODEX_MODEL ?? 'gpt-5.5');
+const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? _pricing?.inputPer1M ?? 5.0);
+const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? _pricing?.outputPer1M ?? 30.0);
 export function makeOpenAICouncilAdapter(model, label) {
     return {
         label,
@@ -25,6 +31,7 @@ export function makeOpenAICouncilAdapter(model, label) {
             if (!apiKey) {
                 throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
             }
+            const OpenAI = await loadOpenAI();
             const client = new OpenAI({ apiKey });
             const userInput = `## Context\n\n${context}\n\n## Question\n\n${prompt}`;
             try {

package/dist/src/adapters/deploy/_http.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Options for {@link fetchWithRetry}. `provider` is mandatory — it's the
+ * value baked into the `GuardrailError` thrown when retries exhaust, so
+ * callers must always identify themselves. `attempts`, `baseMs`, and
+ * `sleepImpl` are tuning knobs with sensible defaults that match the
+ * pre-extraction behavior of all three adapters.
+ */
+export interface FetchWithRetryOptions {
+    /** Adapter name baked into the GuardrailError thrown on exhaustion. */
+    provider: string;
+    /** Max attempts (inclusive). Default: 3. */
+    attempts?: number;
+    /** Base backoff in ms — exponential per attempt. Default: 500. */
+    baseMs?: number;
+    /** Injected sleep — adapters pass `this.sleep` so tests stay instant. */
+    sleepImpl?: (ms: number) => Promise<void>;
+}
+/**
+ * Free-function port of the per-adapter `fetchWithRetry` helper. Behavior
+ * is intentionally identical to the previous private copies:
+ * - 5xx responses are retried with exponential backoff (`baseMs * 2 ** i`).
+ * - 4xx responses are returned as-is so the per-adapter `assertOkOrThrow`
+ *   can classify them precisely (auth vs not_found vs invalid_config).
+ * - Network errors are retried unless `AbortError`, which is rethrown so
+ *   intentional cancellation is never silently retried.
+ * - On exhaustion, throws `GuardrailError({ code: 'transient_network',
+ *   provider })` with the last error's message embedded.
+ *
+ * Adapters call this as `await fetchWithRetry(this.fetchImpl, url, init,
+ * { sleepImpl: this.sleep, provider: 'fly' })` — passing both the fetch
+ * implementation and the sleep impl explicitly keeps the adapter's
+ * `nowImpl`/`sleepImpl` injection points working for tests.
+ */
+export declare function fetchWithRetry(fetchImpl: typeof fetch, url: string, init: RequestInit, opts: FetchWithRetryOptions): Promise<Response>;
+/**
+ * Read at most 500 bytes of a `Response` body as text. Used by the
+ * per-adapter `assertOkOrThrow` helpers to embed the API's error body in
+ * the thrown `GuardrailError` for debugging without dumping multi-MB HTML
+ * pages. Returns `<no body>` if the body is unreadable (e.g. already
+ * consumed, network error mid-read).
+ */
+export declare function safeReadBody(res: Response): Promise<string>;
+//# sourceMappingURL=_http.d.ts.map

package/dist/src/adapters/deploy/_http.js ADDED Viewed

@@ -0,0 +1,99 @@
+// src/adapters/deploy/_http.ts
+//
+// Shared HTTP plumbing extracted in v5.6 Phase 5 — see
+// docs/specs/v5.6-fly-render-adapters.md § "Implementation phases".
+//
+// Three identical copies of `fetchWithRetry` and `safeReadBody` lived in
+// `vercel.ts`, `fly.ts`, and `render.ts` before this module existed. Phase 5
+// consolidates them as free functions so each adapter imports them as
+// HTTP plumbing without `this` context. The deliberate decision (per the
+// spec and PR #72 review) was to wait until a third copy materialized
+// before reaching for shared abstractions, so each adapter's seam was
+// settled.
+//
+// Out of scope on purpose:
+// - `assertOkOrThrow` style HTTP-status mappers stay per-adapter. Each one
+//   composes a different error message (auth-token doc URL, 422 hint copy)
+//   and reads a different request-id header (`Fly-Request-Id` vs
+//   `x-request-id`). Sharing those would force a configuration object that's
+//   bigger than the function it replaces.
+// - The Vercel `fetchEventsWithRetry` SSE helper is still adapter-private —
+//   it has different retry rules (404 race retried, 5xx retried with
+//   different shape) and returns the last response rather than throwing on
+//   exhaustion.
+import { GuardrailError } from "../../core/errors.js";
+const DEFAULT_SLEEP = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+/**
+ * Free-function port of the per-adapter `fetchWithRetry` helper. Behavior
+ * is intentionally identical to the previous private copies:
+ * - 5xx responses are retried with exponential backoff (`baseMs * 2 ** i`).
+ * - 4xx responses are returned as-is so the per-adapter `assertOkOrThrow`
+ *   can classify them precisely (auth vs not_found vs invalid_config).
+ * - Network errors are retried unless `AbortError`, which is rethrown so
+ *   intentional cancellation is never silently retried.
+ * - On exhaustion, throws `GuardrailError({ code: 'transient_network',
+ *   provider })` with the last error's message embedded.
+ *
+ * Adapters call this as `await fetchWithRetry(this.fetchImpl, url, init,
+ * { sleepImpl: this.sleep, provider: 'fly' })` — passing both the fetch
+ * implementation and the sleep impl explicitly keeps the adapter's
+ * `nowImpl`/`sleepImpl` injection points working for tests.
+ */
+export async function fetchWithRetry(fetchImpl, url, init, opts) {
+    const attempts = opts.attempts ?? 3;
+    const baseMs = opts.baseMs ?? 500;
+    const sleep = opts.sleepImpl ?? DEFAULT_SLEEP;
+    let lastErr;
+    for (let i = 0; i < attempts; i++) {
+        try {
+            const res = await fetchImpl(url, init);
+            // 5xx is transient — retry. 4xx is the caller's problem — fail fast
+            // so the per-adapter error mapper can classify it precisely.
+            if (res.status >= 500 && res.status < 600 && i < attempts - 1) {
+                lastErr = new Error(`HTTP ${res.status}`);
+                await sleep(baseMs * 2 ** i);
+                continue;
+            }
+            return res;
+        }
+        catch (err) {
+            lastErr = err;
+            // AbortError is intentional cancellation — surface it directly without
+            // retry. Wrapping or retrying would silently defeat caller-side
+            // cancellation.
+            if (err instanceof Error && err.name === 'AbortError')
+                throw err;
+            if (i < attempts - 1) {
+                await sleep(baseMs * 2 ** i);
+                continue;
+            }
+        }
+    }
+    throw new GuardrailError(`${capitalize(opts.provider)} API unreachable after ${attempts} attempts: ${lastErr?.message ?? String(lastErr)}`, { code: 'transient_network', provider: opts.provider });
+}
+/**
+ * Read at most 500 bytes of a `Response` body as text. Used by the
+ * per-adapter `assertOkOrThrow` helpers to embed the API's error body in
+ * the thrown `GuardrailError` for debugging without dumping multi-MB HTML
+ * pages. Returns `<no body>` if the body is unreadable (e.g. already
+ * consumed, network error mid-read).
+ */
+export async function safeReadBody(res) {
+    try {
+        return (await res.text()).slice(0, 500);
+    }
+    catch {
+        return '<no body>';
+    }
+}
+/**
+ * Title-case the first letter of an adapter name so the exhaustion error
+ * reads "Vercel API unreachable…" instead of "vercel API unreachable…",
+ * matching the wording of the pre-extraction copies.
+ */
+function capitalize(s) {
+    if (!s)
+        return s;
+    return s.charAt(0).toUpperCase() + s.slice(1);
+}
+//# sourceMappingURL=_http.js.map

package/dist/src/adapters/deploy/fly.d.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import type { DeployAdapter, DeployAdapterCapabilities, DeployInput, DeployLogLine, DeployResult, DeployRollbackInput, DeployStatusInput, DeployStatusResult, DeployStreamLogsInput } from './types.ts';
+/**
+ * Fly release lifecycle states.
+ *
+ * The first three are terminal; the rest are interim.  Fly's actual status
+ * vocabulary has evolved across the Nomad → Machines transition; this set
+ * is the conservative intersection that maps cleanly onto our
+ * `pass | fail | in-progress` tri-state. New states observed in the wild
+ * are treated as `in-progress` until the polling budget runs out.
+ */
+type FlyReleaseState = 'succeeded' | 'failed' | 'cancelled' | 'pending' | 'running' | 'starting';
+interface FlyReleaseResponse {
+    id: string;
+    /** Public hostname (e.g. `my-app.fly.dev`) — Fly returns this on the release. */
+    hostname?: string;
+    /** Terminal/interim state. */
+    status?: FlyReleaseState;
+    /** Newer Fly responses use `state`; older use `status`. We accept either. */
+    state?: FlyReleaseState;
+    /**
+     * Image reference the release was built from (e.g.
+     * `registry.fly.io/my-app:deployment-01`). Surfaced on list-releases
+     * responses and used by the simulated-rollback path to re-deploy a
+     * known-good image when native `/rollback` is unavailable.
+     */
+    image?: string;
+}
+export interface FlyDeployAdapterOptions {
+    /** Personal access token. Falls back to `process.env.FLY_API_TOKEN`. */
+    token?: string;
+    /** Fly app slug. Required. */
+    app: string;
+    /**
+     * Image reference (e.g. `registry.fly.io/my-app:deployment-01`).
+     * Required — the adapter never builds; the user pushes via
+     * `fly deploy --build-only --push` or equivalent.
+     */
+    image: string;
+    /** Optional region pin. Falls back to the app's default region. */
+    region?: string;
+    /** Polling interval (ms) when waiting for the release to reach a terminal state. Default: 2000. */
+    pollIntervalMs?: number;
+    /** Maximum total time to poll before returning `in-progress`. Default: 15 minutes. */
+    maxPollMs?: number;
+    /** Injected fetch implementation — defaults to `globalThis.fetch`. Tests pass a mock. */
+    fetchImpl?: typeof fetch;
+    /** Injected sleep implementation — tests pass a no-op so they don't actually wait. */
+    sleepImpl?: (ms: number) => Promise<void>;
+    /** Wall-clock source — tests pass a controllable counter. */
+    nowImpl?: () => number;
+    /**
+     * Optional caller-supplied redaction patterns (in addition to the
+     * built-in default set in `core/logging/redaction.ts`). Typically wired
+     * from `config.persistence.redactionPatterns` by the CLI; tests omit it.
+     */
+    redactionPatterns?: readonly string[];
+    /**
+     * Injected WebSocket constructor for `streamLogs` — defaults to Node 22's
+     * built-in `globalThis.WebSocket`. Tests pass a stub that emulates the
+     * standard `addEventListener('message' | 'error' | 'close')` surface.
+     *
+     * Phase 3 of v5.6 — Fly streams build logs over WS with NDJSON-encoded
+     * messages. The adapter never imports a WS library; we rely on Node's
+     * built-in (Node 22+) for production and the injected stub for unit tests.
+     */
+    wsImpl?: typeof WebSocket;
+    /**
+     * Optional override for the Fly log-streaming WebSocket URL builder.
+     * Defaults to the spec's stated path (see comment on `streamLogs` for
+     * the divergence-from-spec note that Phase 7 will reconcile against
+     * captured fixtures). Tests use this to point at a local stub.
+     */
+    buildLogsWsUrl?: (app: string, releaseId: string) => string;
+}
+/**
+ * Fly.io deploy adapter.
+ *
+ * Construct once per pipeline run. The adapter is stateless across calls —
+ * all configuration (token, app, image, region) is captured at construction
+ * time. Per the v5.6 spec, only `deploy()` and `status()` are wired in
+ * Phase 1; `streamLogs` (WebSocket) and `rollback` (native + simulated)
+ * land in Phases 3 and 4 respectively.
+ */
+export declare class FlyDeployAdapter implements DeployAdapter {
+    readonly name = "fly";
+    readonly capabilities: DeployAdapterCapabilities;
+    private readonly token;
+    private readonly app;
+    private readonly image;
+    private readonly region;
+    private readonly pollIntervalMs;
+    private readonly maxPollMs;
+    private readonly fetchImpl;
+    private readonly sleep;
+    private readonly now;
+    private readonly redactionPatterns;
+    private readonly wsImpl;
+    private readonly buildLogsWsUrlFn;
+    constructor(opts: FlyDeployAdapterOptions);
+    deploy(input: DeployInput): Promise<DeployResult>;
+    status(input: DeployStatusInput): Promise<DeployStatusResult>;
+    /**
+     * Phase 3 of v5.6 — subscribe to real-time build logs for a release via
+     * Fly's WebSocket log endpoint.
+     *
+     * Wire shape:
+     * - Connect to `wss://api.machines.dev/v1/apps/{app}/machines/{releaseId}/logs`
+     *   (intent-level URL per the v5.6 spec's "Logs" bullet — exact path will
+     *   be reconciled against captured fixtures in Phase 7; the `wsImpl` and
+     *   `buildLogsWsUrl` injection points keep this overridable until then).
+     * - Each WS message is a single NDJSON line containing one log entry.
+     *   Multiple lines per message are also tolerated (split on `\n`). Malformed
+     *   JSON lines are skipped silently rather than crashing the iterator.
+     * - Auth via `Authorization: Bearer <FLY_API_TOKEN>` is passed through the
+     *   `protocols` argument (Node's built-in WebSocket doesn't accept custom
+     *   `headers` directly the way `ws` does); Fly accepts the token as the
+     *   first protocol value. This is the documented pattern for browsers and
+     *   matches Node 22's WS surface.
+     * - One reconnect with exponential backoff (1s, 2s) on disconnect, then
+     *   yield a final `level: 'warn'` line referencing `buildLogsUrl` and
+     *   finish the iterator.
+     * - `signal.aborted` is honored at every await boundary; the underlying
+     *   socket is closed eagerly.
+     * - Every yielded line's `text` is run through `redactLogLines()` before
+     *   leaving the adapter.
+     */
+    streamLogs(input: DeployStreamLogsInput): AsyncGenerator<DeployLogLine>;
+    /**
+     * Phase 4 of v5.6 — roll back to a previous Fly release.
+     *
+     * Two modes per spec § "Fly.io adapter → Rollback":
+     *
+     * 1. Native: try `POST /v1/apps/{app}/releases/{releaseId}/rollback`.
+     *    This is the historical Fly API; the Machines-era replacement may
+     *    differ — Phase 7 fixture-capture reconciles. If the endpoint returns
+     *    404 / 405 / 410 (removed across the Nomad → Machines transition),
+     *    fall through to the simulated path. Any other non-OK status
+     *    (auth, invalid_config, etc.) propagates via `assertOkOrThrow`.
+     *
+     * 2. Simulated: list prior releases via
+     *    `GET /v1/apps/{app}/releases?limit=10`, find the most recent one
+     *    with `status === 'succeeded'` whose `id` differs from the one we'd
+     *    be rolling back from, and trigger a new deploy with that release's
+     *    `image`. Re-uses the same POST + poll machinery as `deploy()` via
+     *    `deployImage()`.
+     *
+     * When `input.to` is set we treat that as a specific release ID:
+     * - Native path uses it as the URL fragment.
+     * - Simulated path looks it up in the list to grab its `image`. If the
+     *   release is not present in the recent-10 window, throw
+     *   `not_found` — caller almost certainly typo'd the ID.
+     *
+     * Throws `GuardrailError({ code: 'no_previous_deploy', provider: 'fly' })`
+     * when the simulated path runs out of candidates (i.e. no prior release
+     * with `status === 'succeeded'` exists).
+     */
+    rollback(input: DeployRollbackInput): Promise<DeployResult>;
+    /**
+     * Private helper — re-uses the deploy() POST + poll machinery to deploy a
+     * specific image without going through the constructor-stamped image. Used
+     * by `rollback()`'s simulated path to redeploy a previous successful image.
+     */
+    private deployImage;
+    /**
+     * List the most recent releases for the configured app. Newest-first.
+     * `limit` caps the result set — defaults to 10 (the spec's recommended
+     * window for the rollback lookup). 4xx/5xx errors propagate via
+     * `assertOkOrThrow`.
+     */
+    listReleases(limit?: number, signal?: AbortSignal): Promise<FlyReleaseResponse[]>;
+    /**
+     * Find the most recent prior release with `status === 'succeeded'`. When
+     * `excludeId` is supplied, that release is skipped (used to ensure
+     * `rollback()` never returns "rolled back to the deploy I'm rolling back
+     * from" when the caller didn't supply `input.to`).
+     *
+     * Returns `null` when no candidate exists.
+     */
+    private findPreviousSucceededRelease;
+    /**
+     * Apply the adapter's redaction patterns to a log line's `text` field.
+     * Pure helper — keeps the streamLogs loop readable.
+     */
+    private redactLine;
+    private pollUntilTerminal;
+    private shapeResult;
+    private headers;
+    private buildLogsUrl;
+    /**
+     * HTTP-status-keyed error mapper. Per v5.6 spec:
+     *
+     * | Status | ErrorCode |
+     * |---|---|
+     * | 401 / 403 | `auth` |
+     * | 404 | `not_found` |
+     * | 422 / 400 | `invalid_config` |
+     * | 5xx | `transient_network` (retryable) |
+     * | other 4xx | `adapter_bug` |
+     *
+     * The `Fly-Request-Id` response header is captured into `details` whenever
+     * present so support tickets can quote it back to Fly.
+     */
+    private assertOkOrThrow;
+}
+export {};
+//# sourceMappingURL=fly.d.ts.map