@delegance/claude-autopilot 5.2.2 → 6.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/CHANGELOG.md +1027 -1
  2. package/README.md +104 -17
  3. package/dist/src/adapters/council/claude.js +2 -1
  4. package/dist/src/adapters/council/openai.js +14 -7
  5. package/dist/src/adapters/deploy/_http.d.ts +43 -0
  6. package/dist/src/adapters/deploy/_http.js +99 -0
  7. package/dist/src/adapters/deploy/fly.d.ts +206 -0
  8. package/dist/src/adapters/deploy/fly.js +696 -0
  9. package/dist/src/adapters/deploy/generic.d.ts +39 -0
  10. package/dist/src/adapters/deploy/generic.js +98 -0
  11. package/dist/src/adapters/deploy/index.d.ts +15 -0
  12. package/dist/src/adapters/deploy/index.js +78 -0
  13. package/dist/src/adapters/deploy/render.d.ts +181 -0
  14. package/dist/src/adapters/deploy/render.js +550 -0
  15. package/dist/src/adapters/deploy/types.d.ts +221 -0
  16. package/dist/src/adapters/deploy/types.js +15 -0
  17. package/dist/src/adapters/deploy/vercel.d.ts +143 -0
  18. package/dist/src/adapters/deploy/vercel.js +426 -0
  19. package/dist/src/adapters/pricing.d.ts +36 -0
  20. package/dist/src/adapters/pricing.js +40 -0
  21. package/dist/src/adapters/review-engine/claude.js +2 -1
  22. package/dist/src/adapters/review-engine/codex.js +12 -8
  23. package/dist/src/adapters/review-engine/gemini.js +2 -1
  24. package/dist/src/adapters/review-engine/openai-compatible.js +2 -1
  25. package/dist/src/adapters/sdk-loader.d.ts +15 -0
  26. package/dist/src/adapters/sdk-loader.js +77 -0
  27. package/dist/src/cli/autopilot.d.ts +71 -0
  28. package/dist/src/cli/autopilot.js +735 -0
  29. package/dist/src/cli/brainstorm.d.ts +23 -0
  30. package/dist/src/cli/brainstorm.js +131 -0
  31. package/dist/src/cli/costs.d.ts +15 -1
  32. package/dist/src/cli/costs.js +99 -10
  33. package/dist/src/cli/deploy.d.ts +71 -0
  34. package/dist/src/cli/deploy.js +539 -0
  35. package/dist/src/cli/fix.d.ts +18 -0
  36. package/dist/src/cli/fix.js +105 -11
  37. package/dist/src/cli/help-text.d.ts +52 -0
  38. package/dist/src/cli/help-text.js +400 -0
  39. package/dist/src/cli/implement.d.ts +91 -0
  40. package/dist/src/cli/implement.js +196 -0
  41. package/dist/src/cli/index.js +784 -222
  42. package/dist/src/cli/json-envelope.d.ts +187 -0
  43. package/dist/src/cli/json-envelope.js +270 -0
  44. package/dist/src/cli/json-mode.d.ts +33 -0
  45. package/dist/src/cli/json-mode.js +201 -0
  46. package/dist/src/cli/migrate.d.ts +111 -0
  47. package/dist/src/cli/migrate.js +305 -0
  48. package/dist/src/cli/plan.d.ts +81 -0
  49. package/dist/src/cli/plan.js +149 -0
  50. package/dist/src/cli/pr.d.ts +106 -0
  51. package/dist/src/cli/pr.js +191 -19
  52. package/dist/src/cli/preflight.js +102 -1
  53. package/dist/src/cli/review.d.ts +27 -0
  54. package/dist/src/cli/review.js +126 -0
  55. package/dist/src/cli/runs-watch-renderer.d.ts +45 -0
  56. package/dist/src/cli/runs-watch-renderer.js +275 -0
  57. package/dist/src/cli/runs-watch.d.ts +41 -0
  58. package/dist/src/cli/runs-watch.js +395 -0
  59. package/dist/src/cli/runs.d.ts +122 -0
  60. package/dist/src/cli/runs.js +902 -0
  61. package/dist/src/cli/scan.d.ts +93 -0
  62. package/dist/src/cli/scan.js +166 -40
  63. package/dist/src/cli/spec.d.ts +66 -0
  64. package/dist/src/cli/spec.js +132 -0
  65. package/dist/src/cli/validate.d.ts +29 -0
  66. package/dist/src/cli/validate.js +131 -0
  67. package/dist/src/core/config/schema.d.ts +43 -0
  68. package/dist/src/core/config/schema.js +25 -0
  69. package/dist/src/core/config/types.d.ts +17 -0
  70. package/dist/src/core/council/runner.d.ts +10 -1
  71. package/dist/src/core/council/runner.js +25 -3
  72. package/dist/src/core/council/types.d.ts +7 -0
  73. package/dist/src/core/errors.d.ts +1 -1
  74. package/dist/src/core/errors.js +12 -0
  75. package/dist/src/core/logging/redaction.d.ts +13 -0
  76. package/dist/src/core/logging/redaction.js +20 -0
  77. package/dist/src/core/migrate/detector-rules.js +6 -0
  78. package/dist/src/core/migrate/schema-validator.js +22 -1
  79. package/dist/src/core/phases/static-rules.d.ts +5 -1
  80. package/dist/src/core/phases/static-rules.js +2 -5
  81. package/dist/src/core/run-state/budget.d.ts +88 -0
  82. package/dist/src/core/run-state/budget.js +141 -0
  83. package/dist/src/core/run-state/cli-internal.d.ts +21 -0
  84. package/dist/src/core/run-state/cli-internal.js +174 -0
  85. package/dist/src/core/run-state/events.d.ts +59 -0
  86. package/dist/src/core/run-state/events.js +504 -0
  87. package/dist/src/core/run-state/lock.d.ts +61 -0
  88. package/dist/src/core/run-state/lock.js +206 -0
  89. package/dist/src/core/run-state/phase-context.d.ts +60 -0
  90. package/dist/src/core/run-state/phase-context.js +108 -0
  91. package/dist/src/core/run-state/phase-registry.d.ts +137 -0
  92. package/dist/src/core/run-state/phase-registry.js +162 -0
  93. package/dist/src/core/run-state/phase-runner.d.ts +80 -0
  94. package/dist/src/core/run-state/phase-runner.js +447 -0
  95. package/dist/src/core/run-state/provider-readback.d.ts +130 -0
  96. package/dist/src/core/run-state/provider-readback.js +426 -0
  97. package/dist/src/core/run-state/replay-decision.d.ts +69 -0
  98. package/dist/src/core/run-state/replay-decision.js +144 -0
  99. package/dist/src/core/run-state/resolve-engine.d.ts +100 -0
  100. package/dist/src/core/run-state/resolve-engine.js +190 -0
  101. package/dist/src/core/run-state/resume-preflight.d.ts +66 -0
  102. package/dist/src/core/run-state/resume-preflight.js +116 -0
  103. package/dist/src/core/run-state/run-phase-with-lifecycle.d.ts +73 -0
  104. package/dist/src/core/run-state/run-phase-with-lifecycle.js +186 -0
  105. package/dist/src/core/run-state/runs.d.ts +57 -0
  106. package/dist/src/core/run-state/runs.js +288 -0
  107. package/dist/src/core/run-state/snapshot.d.ts +14 -0
  108. package/dist/src/core/run-state/snapshot.js +114 -0
  109. package/dist/src/core/run-state/state.d.ts +40 -0
  110. package/dist/src/core/run-state/state.js +164 -0
  111. package/dist/src/core/run-state/types.d.ts +278 -0
  112. package/dist/src/core/run-state/types.js +13 -0
  113. package/dist/src/core/run-state/ulid.d.ts +11 -0
  114. package/dist/src/core/run-state/ulid.js +95 -0
  115. package/dist/src/core/schema-alignment/extractor/index.d.ts +1 -1
  116. package/dist/src/core/schema-alignment/extractor/index.js +2 -2
  117. package/dist/src/core/schema-alignment/extractor/prisma.d.ts +13 -1
  118. package/dist/src/core/schema-alignment/extractor/prisma.js +65 -10
  119. package/dist/src/core/schema-alignment/git-history.d.ts +19 -0
  120. package/dist/src/core/schema-alignment/git-history.js +53 -0
  121. package/dist/src/core/static-rules/rules/brand-tokens.js +2 -2
  122. package/dist/src/core/static-rules/rules/schema-alignment.js +14 -4
  123. package/package.json +9 -5
  124. package/scripts/autoregress.ts +3 -2
  125. package/skills/claude-autopilot.md +1 -1
  126. package/skills/make-interfaces-feel-better/SKILL.md +104 -0
  127. package/skills/migrate/SKILL.md +193 -47
  128. package/skills/simplify-ui/SKILL.md +103 -0
  129. package/skills/ui/SKILL.md +117 -0
  130. package/skills/ui-ux-pro-max/SKILL.md +90 -0
package/README.md CHANGED
@@ -1,7 +1,11 @@
1
1
  # @delegance/claude-autopilot
2
2
 
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![GitHub](https://img.shields.io/badge/GitHub-axledbetter%2Fclaude--autopilot-181717?logo=github)](https://github.com/axledbetter/claude-autopilot) [![npm](https://img.shields.io/npm/v/@delegance/claude-autopilot.svg)](https://www.npmjs.com/package/@delegance/claude-autopilot)
4
+
3
5
  **Autonomous development pipeline for Claude Code. Brainstorm → spec → plan → implement → migrate → validate → PR → review → merge — all from your terminal, on your codebase, with your test suite.**
4
6
 
7
+ **Open source, MIT-licensed, runs on your machine with your API keys.** No hosted agent, no per-seat subscription — `npm install -g @delegance/claude-autopilot` and you're done.
8
+
5
9
  ```bash
6
10
  claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
7
11
  # → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
@@ -13,6 +17,8 @@ claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
13
17
 
14
18
  *No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
15
19
 
20
+ **See it work end-to-end:** [DEMO.md](DEMO.md) — one real autonomous run on a Python codebase. 12 minutes wall clock, $2.20 spend, 5 new tests, multi-file integration, zero manual intervention. Honest about what's bounded today.
21
+
16
22
  ---
17
23
 
18
24
  ## Benchmark
@@ -29,24 +35,31 @@ Every finding came with a concrete remediation (often a code patch or named libr
29
35
 
30
36
  ## Why this vs the alternatives
31
37
 
32
- AI coding tools fall into three buckets. Here's where claude-autopilot sits.
33
-
34
- | Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
38
+ | Tool | Where code lives | Pricing model | Models | Pipeline | Intervenable? |
35
39
  |---|---|---|---|---|---|
36
- | **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
37
- | **GitHub Copilot Workspace** | Spec plan PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
38
- | **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
39
- | **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
40
- | **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None single-shot prompts | Continuous |
41
- | **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
42
- | **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
40
+ | **Devin** (Cognition) | Hosted sandbox | Per-ACU (cloud markup) | Cognition's stack | Opaque | No — dashboard only |
41
+ | **Factory Droids** | Hosted | Per-task + seat | Factory's stack | Fixed | Limited |
42
+ | **GitHub Copilot Workspace** | GitHub-hosted | Per-seat ($) | Copilot only | Fixed, non-extensible | Edit the plan |
43
+ | **Cursor / Copilot agent mode** | Local IDE | Per-seat ($) | Vendor's model | None single-shot | Continuous |
44
+ | **Cursor BugBot / CodeRabbit** | Hosted | Per-PR or seat | Vendor's model | Review only | Post-hoc |
45
+ | **Aider / Cline** | Local CLI | Free + your API key | User's choice | None | Continuous |
46
+ | **OpenHands / SWE-agent** | Local research | Free | User's choice | Agent decides | Rare |
47
+ | **claude-autopilot** | **Local CLI, your repo** | **Free + your existing Claude subscription** | **Multi-model per role (Claude + Codex + Gemini)** | **Skill-per-phase, rewireable** | **Every phase, all state on disk** |
48
+
49
+ Three things only this product gives you:
43
50
 
44
- The architectural differences that matter most in practice:
51
+ 1. **Multi-model council.** Same design question goes to Claude + Codex + Gemini in parallel; a fourth model synthesizes the consensus. Different blind spots, different recommendations, one merged answer. **No other tool dispatches multi-model on a per-decision basis.**
52
+ 2. **Your code never leaves your machine.** No cloud sandbox. No SaaS markup. The `git push` that happens at the end is from your laptop. For private repos, regulated industries, or anyone who doesn't want their unfinished code on someone else's servers — this is the only autonomous-agent shape that fits.
53
+ 3. **Ships as a Claude Code skill, not a competing IDE.** `/brainstorm`, `/autopilot`, `/migrate`, `/validate` are first-class Claude Code commands. As Claude Code grows, autopilot rides that adoption. You don't switch tools to use it; it's already there.
45
54
 
46
- 1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
47
- 2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
48
- 3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
49
- 4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
55
+ Plus the four practical differences:
56
+
57
+ - **Multi-model by role.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Swap any of them.
58
+ - **Your stack, not a sandbox.** Runs your `npm test`, your `prisma migrate`, your `gh pr create`. If it works in your terminal, it works in the pipeline.
59
+ - **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
60
+ - **Test-gated auto-revert.** `claude-autopilot fix --verify` patches a file, runs your tests, reverts on failure. Built into the CLI, not a wrapper.
61
+
62
+ **Real numbers from a real run:** [DEMO.md](DEMO.md) — autonomous multi-file change on a Python codebase, **12 minutes, $2.20, zero manual intervention.**
50
63
 
51
64
  ## 30-second quickstart
52
65
 
@@ -67,6 +80,34 @@ claude-autopilot brainstorm "add rate limiting to the public API"
67
80
  claude-autopilot run --pr 123
68
81
  ```
69
82
 
83
+ ## Run State Engine (v6)
84
+
85
+ Persistent state for autopilot runs. Resume after crashes, enforce hard budget caps, and surface typed JSON events for CI consumers — all opt-in, all on disk.
86
+
87
+ ```yaml
88
+ # guardrail.config.yaml
89
+ engine:
90
+ enabled: true # default in v6.1+; explicit `false` is deprecated and removed in v7
91
+ budgets:
92
+ perRunUSD: 10 # hard stop; mandatory runtime guard
93
+ perPhaseUSD: 5
94
+ ```
95
+
96
+ ```bash
97
+ claude-autopilot scan --all # any command — engine writes a per-run dir
98
+ claude-autopilot runs list # newest-first, with status / cost / lastPhase
99
+ claude-autopilot runs show 01HZK7P3D8Q9V… # state snapshot + optional event tail
100
+ claude-autopilot run resume 01HZK7P3D8Q9V… # lookup-only today; live execution in a later v6.x
101
+ claude-autopilot runs gc --older-than-days 7 # retire completed runs
102
+ ```
103
+
104
+ Every state transition appends a typed event to `.guardrail-cache/runs/<ulid>/events.ndjson`; every CLI verb supports `--json` with strict stdout-envelope / stderr-NDJSON channel discipline. Side-effect phase replay consults persisted `externalRefs` plus a live provider read-back so resume is safe by construction.
105
+
106
+ **v6.1+ ships with the engine ON by default** (flipped from v6.0's off-by-default after the stabilization criteria in [`docs/specs/v6.1-default-flip.md`](docs/specs/v6.1-default-flip.md) were met). Users who want the legacy v5.x output shape can opt out for one minor version via `--no-engine`, `CLAUDE_AUTOPILOT_ENGINE=off`, or `engine.enabled: false` — each prints a deprecation warning and is removed in v7.
107
+
108
+ → [`docs/v6/quickstart.md`](docs/v6/quickstart.md) — five-minute setup
109
+ → [`docs/v6/migration-guide.md`](docs/v6/migration-guide.md) — full v5.x → v6 walkthrough with precedence matrix, per-phase idempotency rules, and troubleshooting
110
+
70
111
  ## The pipeline, phase by phase
71
112
 
72
113
  Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
@@ -83,10 +124,52 @@ Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can in
83
124
  | **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
84
125
  | **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
85
126
  | **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
127
+ | **Deploy** | `deploy` | Deploys via configured adapter (`vercel` \| `fly` \| `render` \| `generic`) with optional log streaming, health check, and bounded auto-rollback (see [Deploy phase](#deploy-phase)) | Deterministic |
86
128
 
87
129
  ### Migrate phase
88
130
 
89
- Configure your migration tool in `.autopilot/stack.md`. The pipeline reads stack.md, dispatches to the configured skill (`migrate@1` for generic; `migrate.supabase@1` for rich Supabase ledger; `none@1` to skip), and runs your tool with full safety: structured argv (no shell injection), 4-flag CI prod gate, hash-chained audit log. Run `claude-autopilot init` to auto-detect your stack. See [docs/skills/rich-migrate-contract.md](docs/skills/rich-migrate-contract.md) for the skill contract and [docs/skills/version-compatibility.md](docs/skills/version-compatibility.md) for the version model.
131
+ Configure your migration tool in `.autopilot/stack.md`. The pipeline reads stack.md, dispatches to the configured skill (`migrate@1` for generic; `migrate.supabase@1` for rich Supabase ledger; `none@1` to skip), and runs your tool with full safety: structured argv (no shell injection), 4-flag CI prod gate, hash-chained audit log. Run `claude-autopilot init` to auto-detect your stack — the detector recognizes Rails, Alembic, Django, Prisma, Drizzle, golang-migrate, dbmate, flyway, supabase-cli, ecto, typeorm, and falls back to a "configure manually" path. See [docs/skills/rich-migrate-contract.md](docs/skills/rich-migrate-contract.md) for the skill contract and [docs/skills/version-compatibility.md](docs/skills/version-compatibility.md) for the version model.
132
+
133
+ Generic example (Rails):
134
+
135
+ ```yaml
136
+ migrate:
137
+ skill: "migrate@1"
138
+ envs:
139
+ dev:
140
+ command: { exec: "rails", args: ["db:migrate"] }
141
+ env_file: ".env.development"
142
+ prod:
143
+ command: { exec: "rails", args: ["db:migrate", "RAILS_ENV=production"] }
144
+ ```
145
+
146
+ See `skills/migrate/SKILL.md` for examples covering Alembic, Django, Prisma, Drizzle, golang-migrate, dbmate, flyway, and custom scripts.
147
+
148
+ ### Deploy phase
149
+
150
+ Configure your deploy target in `guardrail.config.yaml` under a `deploy:` block. Four adapters ship in 5.6:
151
+
152
+ - **`vercel`** — Vercel v13 deployments API. SSE+NDJSON log streaming, native rollback via `/promote`. Auth: `VERCEL_TOKEN`.
153
+ - **`fly`** — Fly.io Machines API. WebSocket log streaming, native rollback with simulated fallback. Auth: `FLY_API_TOKEN`. Requires the image to be pre-pushed (`fly deploy --build-only --push`).
154
+ - **`render`** — Render REST API. Polling-based log stream with `(timestamp, logId)` cursor dedup, simulated rollback (re-deploys prior commit). Auth: `RENDER_API_KEY`.
155
+ - **`generic`** — runs any shell `deployCommand` (`vercel --prod`, `kubectl apply`, `make deploy`, etc). No platform integration; `--watch` and `rollback` aren't supported.
156
+
157
+ Each adapter speaks the same `DeployAdapter` contract: `deploy()`, optional `status()` / `rollback()` / `streamLogs()`, plus a `capabilities` block (`streamMode: 'websocket' | 'polling' | 'none'`, `nativeRollback: boolean`) so the CLI can degrade UX honestly (polling adapters print a one-line stderr notice under `--watch`). Auto-rollback is bounded: max one rollback per deploy attempt, with `runHealthCheck` capped at 5×6s. Log lines emitted into PR comments run through a redaction pass (`AKIA…`, `sk-…`, `eyJ…`, `ghp_`, `xoxb-`, plus configurable patterns) so build output can't leak secrets.
158
+
159
+ Example (Fly):
160
+
161
+ ```yaml
162
+ deploy:
163
+ adapter: fly
164
+ app: my-app
165
+ image: registry.fly.io/my-app:latest
166
+ region: ord
167
+ watchBuildLogs: true
168
+ healthCheckUrl: https://my-app.fly.dev/health
169
+ rollbackOn: [healthCheckFailure]
170
+ ```
171
+
172
+ `claude-autopilot doctor` checks for the relevant auth env var when an adapter is configured. See `docs/specs/v5.6-fly-render-adapters.md` for the full adapter contract.
90
173
 
91
174
  ## What's distinctive
92
175
 
@@ -315,6 +398,10 @@ ANTHROPIC_API_KEY=sk-ant-... claude-autopilot scan --all
315
398
 
316
399
  We do not claim 13/13 reflects every real-world repo — it's a reproducible upper bound on a fixture that exercises the categories we explicitly target.
317
400
 
401
+ ## Contributing
402
+
403
+ Issues and PRs welcome — https://github.com/axledbetter/claude-autopilot/issues. The pipeline literally builds itself; many features in this repo were implemented by autopilot running against autopilot ([DEMO.md](DEMO.md) walks through six self-eat PRs with cost trajectory $10 → ~$2.50). Read [CONTRIBUTING.md](CONTRIBUTING.md) if it exists, otherwise: clone, `npm install`, `npm test`, open a PR.
404
+
318
405
  ## License
319
406
 
320
- MIT
407
+ MIT — see [LICENSE](LICENSE).
@@ -1,6 +1,6 @@
1
- import Anthropic from '@anthropic-ai/sdk';
2
1
  import { GuardrailError } from "../../core/errors.js";
3
2
  import { classifyError } from "../review-engine/prompt-builder.js";
3
+ import { loadAnthropic } from "../sdk-loader.js";
4
4
  const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
5
5
  const MAX_OUTPUT_TOKENS = 2048;
6
6
  // Default Opus 4.7 rates — env override for other models.
@@ -14,6 +14,7 @@ export function makeClaudeCouncilAdapter(model, label) {
14
14
  if (!apiKey) {
15
15
  throw new GuardrailError('ANTHROPIC_API_KEY not set', { code: 'auth', provider: 'claude' });
16
16
  }
17
+ const Anthropic = await loadAnthropic();
17
18
  const client = new Anthropic({ apiKey });
18
19
  let response;
19
20
  try {
@@ -1,22 +1,28 @@
1
- import OpenAI from 'openai';
2
1
  import { GuardrailError } from "../../core/errors.js";
3
2
  import { classifyError } from "../review-engine/prompt-builder.js";
3
+ import { loadOpenAI } from "../sdk-loader.js";
4
+ import { getModelPricing } from "../pricing.js";
4
5
  const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
5
6
  const MAX_OUTPUT_TOKENS = 2048;
6
7
  // Models that ONLY work via the Responses API (not chat.completions).
7
8
  // Codex variants and the o-series reasoning models all 404 on chat.completions.
8
- // Without this branch, putting `gpt-5.3-codex` (the typical default) in
9
+ // Without this branch, putting `gpt-5.3-codex` (the prior default) in
9
10
  // council.models throws model_not_found, AND the synthesizer (also typically
10
- // gpt-5.3-codex) fails the same way — so the whole council returns `partial`
11
+ // the same model) fails the same way — so the whole council returns `partial`
11
12
  // with no synthesis. That regression made the marketed multi-model differentiator
12
13
  // unusable for any user who only had OPENAI_API_KEY.
14
+ // gpt-5.5 (the new default, 2026-04-23) drops the `-codex` suffix and works
15
+ // via standard chat.completions, so it is intentionally NOT matched here.
13
16
  function isResponsesOnlyModel(model) {
14
17
  return /codex|^o[1-9]|^gpt-5\.3-/i.test(model);
15
18
  }
16
- // Per-million-token rates for gpt-5.3-codex (override via env for other models).
17
- // Mirrors the review-engine codex adapter's pricing.
18
- const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? 1.25);
19
- const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? 10.0);
19
+ // Per-million-token rates. Bugbot LOW PR #93: wired to read from the
20
+ // canonical MODEL_PRICING table (no longer dead code). Resolution order:
21
+ // env override MODEL_PRICING entry for the council's default
22
+ // (gpt-5.5) numeric fallback. Mirrors the review-engine codex adapter.
23
+ const _pricing = getModelPricing(process.env.CODEX_MODEL ?? 'gpt-5.5');
24
+ const COST_PER_M_INPUT = Number(process.env.CODEX_COST_INPUT_PER_M ?? _pricing?.inputPer1M ?? 5.0);
25
+ const COST_PER_M_OUTPUT = Number(process.env.CODEX_COST_OUTPUT_PER_M ?? _pricing?.outputPer1M ?? 30.0);
20
26
  export function makeOpenAICouncilAdapter(model, label) {
21
27
  return {
22
28
  label,
@@ -25,6 +31,7 @@ export function makeOpenAICouncilAdapter(model, label) {
25
31
  if (!apiKey) {
26
32
  throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
27
33
  }
34
+ const OpenAI = await loadOpenAI();
28
35
  const client = new OpenAI({ apiKey });
29
36
  const userInput = `## Context\n\n${context}\n\n## Question\n\n${prompt}`;
30
37
  try {
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Options for {@link fetchWithRetry}. `provider` is mandatory — it's the
3
+ * value baked into the `GuardrailError` thrown when retries exhaust, so
4
+ * callers must always identify themselves. `attempts`, `baseMs`, and
5
+ * `sleepImpl` are tuning knobs with sensible defaults that match the
6
+ * pre-extraction behavior of all three adapters.
7
+ */
8
+ export interface FetchWithRetryOptions {
9
+ /** Adapter name baked into the GuardrailError thrown on exhaustion. */
10
+ provider: string;
11
+ /** Max attempts (inclusive). Default: 3. */
12
+ attempts?: number;
13
+ /** Base backoff in ms — exponential per attempt. Default: 500. */
14
+ baseMs?: number;
15
+ /** Injected sleep — adapters pass `this.sleep` so tests stay instant. */
16
+ sleepImpl?: (ms: number) => Promise<void>;
17
+ }
18
+ /**
19
+ * Free-function port of the per-adapter `fetchWithRetry` helper. Behavior
20
+ * is intentionally identical to the previous private copies:
21
+ * - 5xx responses are retried with exponential backoff (`baseMs * 2 ** i`).
22
+ * - 4xx responses are returned as-is so the per-adapter `assertOkOrThrow`
23
+ * can classify them precisely (auth vs not_found vs invalid_config).
24
+ * - Network errors are retried unless `AbortError`, which is rethrown so
25
+ * intentional cancellation is never silently retried.
26
+ * - On exhaustion, throws `GuardrailError({ code: 'transient_network',
27
+ * provider })` with the last error's message embedded.
28
+ *
29
+ * Adapters call this as `await fetchWithRetry(this.fetchImpl, url, init,
30
+ * { sleepImpl: this.sleep, provider: 'fly' })` — passing both the fetch
31
+ * implementation and the sleep impl explicitly keeps the adapter's
32
+ * `nowImpl`/`sleepImpl` injection points working for tests.
33
+ */
34
+ export declare function fetchWithRetry(fetchImpl: typeof fetch, url: string, init: RequestInit, opts: FetchWithRetryOptions): Promise<Response>;
35
+ /**
36
+ * Read at most 500 bytes of a `Response` body as text. Used by the
37
+ * per-adapter `assertOkOrThrow` helpers to embed the API's error body in
38
+ * the thrown `GuardrailError` for debugging without dumping multi-MB HTML
39
+ * pages. Returns `<no body>` if the body is unreadable (e.g. already
40
+ * consumed, network error mid-read).
41
+ */
42
+ export declare function safeReadBody(res: Response): Promise<string>;
43
+ //# sourceMappingURL=_http.d.ts.map
@@ -0,0 +1,99 @@
1
+ // src/adapters/deploy/_http.ts
2
+ //
3
+ // Shared HTTP plumbing extracted in v5.6 Phase 5 — see
4
+ // docs/specs/v5.6-fly-render-adapters.md § "Implementation phases".
5
+ //
6
+ // Three identical copies of `fetchWithRetry` and `safeReadBody` lived in
7
+ // `vercel.ts`, `fly.ts`, and `render.ts` before this module existed. Phase 5
8
+ // consolidates them as free functions so each adapter imports them as
9
+ // HTTP plumbing without `this` context. The deliberate decision (per the
10
+ // spec and PR #72 review) was to wait until a third copy materialized
11
+ // before reaching for shared abstractions, so each adapter's seam was
12
+ // settled.
13
+ //
14
+ // Out of scope on purpose:
15
+ // - `assertOkOrThrow` style HTTP-status mappers stay per-adapter. Each one
16
+ // composes a different error message (auth-token doc URL, 422 hint copy)
17
+ // and reads a different request-id header (`Fly-Request-Id` vs
18
+ // `x-request-id`). Sharing those would force a configuration object that's
19
+ // bigger than the function it replaces.
20
+ // - The Vercel `fetchEventsWithRetry` SSE helper is still adapter-private —
21
+ // it has different retry rules (404 race retried, 5xx retried with
22
+ // different shape) and returns the last response rather than throwing on
23
+ // exhaustion.
24
+ import { GuardrailError } from "../../core/errors.js";
25
+ const DEFAULT_SLEEP = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
26
+ /**
27
+ * Free-function port of the per-adapter `fetchWithRetry` helper. Behavior
28
+ * is intentionally identical to the previous private copies:
29
+ * - 5xx responses are retried with exponential backoff (`baseMs * 2 ** i`).
30
+ * - 4xx responses are returned as-is so the per-adapter `assertOkOrThrow`
31
+ * can classify them precisely (auth vs not_found vs invalid_config).
32
+ * - Network errors are retried unless `AbortError`, which is rethrown so
33
+ * intentional cancellation is never silently retried.
34
+ * - On exhaustion, throws `GuardrailError({ code: 'transient_network',
35
+ * provider })` with the last error's message embedded.
36
+ *
37
+ * Adapters call this as `await fetchWithRetry(this.fetchImpl, url, init,
38
+ * { sleepImpl: this.sleep, provider: 'fly' })` — passing both the fetch
39
+ * implementation and the sleep impl explicitly keeps the adapter's
40
+ * `nowImpl`/`sleepImpl` injection points working for tests.
41
+ */
42
+ export async function fetchWithRetry(fetchImpl, url, init, opts) {
43
+ const attempts = opts.attempts ?? 3;
44
+ const baseMs = opts.baseMs ?? 500;
45
+ const sleep = opts.sleepImpl ?? DEFAULT_SLEEP;
46
+ let lastErr;
47
+ for (let i = 0; i < attempts; i++) {
48
+ try {
49
+ const res = await fetchImpl(url, init);
50
+ // 5xx is transient — retry. 4xx is the caller's problem — fail fast
51
+ // so the per-adapter error mapper can classify it precisely.
52
+ if (res.status >= 500 && res.status < 600 && i < attempts - 1) {
53
+ lastErr = new Error(`HTTP ${res.status}`);
54
+ await sleep(baseMs * 2 ** i);
55
+ continue;
56
+ }
57
+ return res;
58
+ }
59
+ catch (err) {
60
+ lastErr = err;
61
+ // AbortError is intentional cancellation — surface it directly without
62
+ // retry. Wrapping or retrying would silently defeat caller-side
63
+ // cancellation.
64
+ if (err instanceof Error && err.name === 'AbortError')
65
+ throw err;
66
+ if (i < attempts - 1) {
67
+ await sleep(baseMs * 2 ** i);
68
+ continue;
69
+ }
70
+ }
71
+ }
72
+ throw new GuardrailError(`${capitalize(opts.provider)} API unreachable after ${attempts} attempts: ${lastErr?.message ?? String(lastErr)}`, { code: 'transient_network', provider: opts.provider });
73
+ }
74
+ /**
75
+ * Read at most 500 bytes of a `Response` body as text. Used by the
76
+ * per-adapter `assertOkOrThrow` helpers to embed the API's error body in
77
+ * the thrown `GuardrailError` for debugging without dumping multi-MB HTML
78
+ * pages. Returns `<no body>` if the body is unreadable (e.g. already
79
+ * consumed, network error mid-read).
80
+ */
81
+ export async function safeReadBody(res) {
82
+ try {
83
+ return (await res.text()).slice(0, 500);
84
+ }
85
+ catch {
86
+ return '<no body>';
87
+ }
88
+ }
89
+ /**
90
+ * Title-case the first letter of an adapter name so the exhaustion error
91
+ * reads "Vercel API unreachable…" instead of "vercel API unreachable…",
92
+ * matching the wording of the pre-extraction copies.
93
+ */
94
+ function capitalize(s) {
95
+ if (!s)
96
+ return s;
97
+ return s.charAt(0).toUpperCase() + s.slice(1);
98
+ }
99
+ //# sourceMappingURL=_http.js.map
@@ -0,0 +1,206 @@
1
+ import type { DeployAdapter, DeployAdapterCapabilities, DeployInput, DeployLogLine, DeployResult, DeployRollbackInput, DeployStatusInput, DeployStatusResult, DeployStreamLogsInput } from './types.ts';
2
+ /**
3
+ * Fly release lifecycle states.
4
+ *
5
+ * The first three are terminal; the rest are interim. Fly's actual status
6
+ * vocabulary has evolved across the Nomad → Machines transition; this set
7
+ * is the conservative intersection that maps cleanly onto our
8
+ * `pass | fail | in-progress` tri-state. New states observed in the wild
9
+ * are treated as `in-progress` until the polling budget runs out.
10
+ */
11
+ type FlyReleaseState = 'succeeded' | 'failed' | 'cancelled' | 'pending' | 'running' | 'starting';
12
+ interface FlyReleaseResponse {
13
+ id: string;
14
+ /** Public hostname (e.g. `my-app.fly.dev`) — Fly returns this on the release. */
15
+ hostname?: string;
16
+ /** Terminal/interim state. */
17
+ status?: FlyReleaseState;
18
+ /** Newer Fly responses use `state`; older use `status`. We accept either. */
19
+ state?: FlyReleaseState;
20
+ /**
21
+ * Image reference the release was built from (e.g.
22
+ * `registry.fly.io/my-app:deployment-01`). Surfaced on list-releases
23
+ * responses and used by the simulated-rollback path to re-deploy a
24
+ * known-good image when native `/rollback` is unavailable.
25
+ */
26
+ image?: string;
27
+ }
28
+ export interface FlyDeployAdapterOptions {
29
+ /** Personal access token. Falls back to `process.env.FLY_API_TOKEN`. */
30
+ token?: string;
31
+ /** Fly app slug. Required. */
32
+ app: string;
33
+ /**
34
+ * Image reference (e.g. `registry.fly.io/my-app:deployment-01`).
35
+ * Required — the adapter never builds; the user pushes via
36
+ * `fly deploy --build-only --push` or equivalent.
37
+ */
38
+ image: string;
39
+ /** Optional region pin. Falls back to the app's default region. */
40
+ region?: string;
41
+ /** Polling interval (ms) when waiting for the release to reach a terminal state. Default: 2000. */
42
+ pollIntervalMs?: number;
43
+ /** Maximum total time to poll before returning `in-progress`. Default: 15 minutes. */
44
+ maxPollMs?: number;
45
+ /** Injected fetch implementation — defaults to `globalThis.fetch`. Tests pass a mock. */
46
+ fetchImpl?: typeof fetch;
47
+ /** Injected sleep implementation — tests pass a no-op so they don't actually wait. */
48
+ sleepImpl?: (ms: number) => Promise<void>;
49
+ /** Wall-clock source — tests pass a controllable counter. */
50
+ nowImpl?: () => number;
51
+ /**
52
+ * Optional caller-supplied redaction patterns (in addition to the
53
+ * built-in default set in `core/logging/redaction.ts`). Typically wired
54
+ * from `config.persistence.redactionPatterns` by the CLI; tests omit it.
55
+ */
56
+ redactionPatterns?: readonly string[];
57
+ /**
58
+ * Injected WebSocket constructor for `streamLogs` — defaults to Node 22's
59
+ * built-in `globalThis.WebSocket`. Tests pass a stub that emulates the
60
+ * standard `addEventListener('message' | 'error' | 'close')` surface.
61
+ *
62
+ * Phase 3 of v5.6 — Fly streams build logs over WS with NDJSON-encoded
63
+ * messages. The adapter never imports a WS library; we rely on Node's
64
+ * built-in (Node 22+) for production and the injected stub for unit tests.
65
+ */
66
+ wsImpl?: typeof WebSocket;
67
+ /**
68
+ * Optional override for the Fly log-streaming WebSocket URL builder.
69
+ * Defaults to the spec's stated path (see comment on `streamLogs` for
70
+ * the divergence-from-spec note that Phase 7 will reconcile against
71
+ * captured fixtures). Tests use this to point at a local stub.
72
+ */
73
+ buildLogsWsUrl?: (app: string, releaseId: string) => string;
74
+ }
75
+ /**
76
+ * Fly.io deploy adapter.
77
+ *
78
+ * Construct once per pipeline run. The adapter is stateless across calls —
79
+ * all configuration (token, app, image, region) is captured at construction
80
+ * time. Per the v5.6 spec, only `deploy()` and `status()` are wired in
81
+ * Phase 1; `streamLogs` (WebSocket) and `rollback` (native + simulated)
82
+ * land in Phases 3 and 4 respectively.
83
+ */
84
+ export declare class FlyDeployAdapter implements DeployAdapter {
85
+ readonly name = "fly";
86
+ readonly capabilities: DeployAdapterCapabilities;
87
+ private readonly token;
88
+ private readonly app;
89
+ private readonly image;
90
+ private readonly region;
91
+ private readonly pollIntervalMs;
92
+ private readonly maxPollMs;
93
+ private readonly fetchImpl;
94
+ private readonly sleep;
95
+ private readonly now;
96
+ private readonly redactionPatterns;
97
+ private readonly wsImpl;
98
+ private readonly buildLogsWsUrlFn;
99
+ constructor(opts: FlyDeployAdapterOptions);
100
+ deploy(input: DeployInput): Promise<DeployResult>;
101
+ status(input: DeployStatusInput): Promise<DeployStatusResult>;
102
+ /**
103
+ * Phase 3 of v5.6 — subscribe to real-time build logs for a release via
104
+ * Fly's WebSocket log endpoint.
105
+ *
106
+ * Wire shape:
107
+ * - Connect to `wss://api.machines.dev/v1/apps/{app}/machines/{releaseId}/logs`
108
+ * (intent-level URL per the v5.6 spec's "Logs" bullet — exact path will
109
+ * be reconciled against captured fixtures in Phase 7; the `wsImpl` and
110
+ * `buildLogsWsUrl` injection points keep this overridable until then).
111
+ * - Each WS message is a single NDJSON line containing one log entry.
112
+ * Multiple lines per message are also tolerated (split on `\n`). Malformed
113
+ * JSON lines are skipped silently rather than crashing the iterator.
114
+ * - Auth via `Authorization: Bearer <FLY_API_TOKEN>` is passed through the
115
+ * `protocols` argument (Node's built-in WebSocket doesn't accept custom
116
+ * `headers` directly the way `ws` does); Fly accepts the token as the
117
+ * first protocol value. This is the documented pattern for browsers and
118
+ * matches Node 22's WS surface.
119
+ * - One reconnect with exponential backoff (1s, 2s) on disconnect, then
120
+ * yield a final `level: 'warn'` line referencing `buildLogsUrl` and
121
+ * finish the iterator.
122
+ * - `signal.aborted` is honored at every await boundary; the underlying
123
+ * socket is closed eagerly.
124
+ * - Every yielded line's `text` is run through `redactLogLines()` before
125
+ * leaving the adapter.
126
+ */
127
+ streamLogs(input: DeployStreamLogsInput): AsyncGenerator<DeployLogLine>;
128
+ /**
129
+ * Phase 4 of v5.6 — roll back to a previous Fly release.
130
+ *
131
+ * Two modes per spec § "Fly.io adapter → Rollback":
132
+ *
133
+ * 1. Native: try `POST /v1/apps/{app}/releases/{releaseId}/rollback`.
134
+ * This is the historical Fly API; the Machines-era replacement may
135
+ * differ — Phase 7 fixture-capture reconciles. If the endpoint returns
136
+ * 404 / 405 / 410 (removed across the Nomad → Machines transition),
137
+ * fall through to the simulated path. Any other non-OK status
138
+ * (auth, invalid_config, etc.) propagates via `assertOkOrThrow`.
139
+ *
140
+ * 2. Simulated: list prior releases via
141
+ * `GET /v1/apps/{app}/releases?limit=10`, find the most recent one
142
+ * with `status === 'succeeded'` whose `id` differs from the one we'd
143
+ * be rolling back from, and trigger a new deploy with that release's
144
+ * `image`. Re-uses the same POST + poll machinery as `deploy()` via
145
+ * `deployImage()`.
146
+ *
147
+ * When `input.to` is set we treat that as a specific release ID:
148
+ * - Native path uses it as the URL fragment.
149
+ * - Simulated path looks it up in the list to grab its `image`. If the
150
+ * release is not present in the recent-10 window, throw
151
+ * `not_found` — caller almost certainly typo'd the ID.
152
+ *
153
+ * Throws `GuardrailError({ code: 'no_previous_deploy', provider: 'fly' })`
154
+ * when the simulated path runs out of candidates (i.e. no prior release
155
+ * with `status === 'succeeded'` exists).
156
+ */
157
+ rollback(input: DeployRollbackInput): Promise<DeployResult>;
158
+ /**
159
+ * Private helper — re-uses the deploy() POST + poll machinery to deploy a
160
+ * specific image without going through the constructor-stamped image. Used
161
+ * by `rollback()`'s simulated path to redeploy a previous successful image.
162
+ */
163
+ private deployImage;
164
+ /**
165
+ * List the most recent releases for the configured app. Newest-first.
166
+ * `limit` caps the result set — defaults to 10 (the spec's recommended
167
+ * window for the rollback lookup). 4xx/5xx errors propagate via
168
+ * `assertOkOrThrow`.
169
+ */
170
+ listReleases(limit?: number, signal?: AbortSignal): Promise<FlyReleaseResponse[]>;
171
+ /**
172
+ * Find the most recent prior release with `status === 'succeeded'`. When
173
+ * `excludeId` is supplied, that release is skipped (used to ensure
174
+ * `rollback()` never returns "rolled back to the deploy I'm rolling back
175
+ * from" when the caller didn't supply `input.to`).
176
+ *
177
+ * Returns `null` when no candidate exists.
178
+ */
179
+ private findPreviousSucceededRelease;
180
+ /**
181
+ * Apply the adapter's redaction patterns to a log line's `text` field.
182
+ * Pure helper — keeps the streamLogs loop readable.
183
+ */
184
+ private redactLine;
185
+ private pollUntilTerminal;
186
+ private shapeResult;
187
+ private headers;
188
+ private buildLogsUrl;
189
+ /**
190
+ * HTTP-status-keyed error mapper. Per v5.6 spec:
191
+ *
192
+ * | Status | ErrorCode |
193
+ * |---|---|
194
+ * | 401 / 403 | `auth` |
195
+ * | 404 | `not_found` |
196
+ * | 422 / 400 | `invalid_config` |
197
+ * | 5xx | `transient_network` (retryable) |
198
+ * | other 4xx | `adapter_bug` |
199
+ *
200
+ * The `Fly-Request-Id` response header is captured into `details` whenever
201
+ * present so support tickets can quote it back to Fly.
202
+ */
203
+ private assertOkOrThrow;
204
+ }
205
+ export {};
206
+ //# sourceMappingURL=fly.d.ts.map