@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/README.md +169 -106
  3. package/bin/_launcher.js +77 -0
  4. package/bin/claude-autopilot.js +3 -0
  5. package/bin/guardrail.js +3 -0
  6. package/package.json +23 -9
  7. package/presets/generic/guardrail.config.yaml +35 -0
  8. package/presets/generic/stack.md +40 -0
  9. package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
  10. package/scripts/autoregress.ts +27 -11
  11. package/skills/autopilot/SKILL.md +170 -0
  12. package/skills/claude-autopilot.md +80 -0
  13. package/skills/guardrail.md +39 -0
  14. package/skills/migrate/SKILL.md +83 -0
  15. package/src/adapters/council/claude.ts +41 -0
  16. package/src/adapters/council/openai.ts +40 -0
  17. package/src/adapters/council/types.ts +7 -0
  18. package/src/adapters/loader.ts +7 -7
  19. package/src/adapters/review-engine/auto.ts +2 -2
  20. package/src/adapters/review-engine/claude.ts +9 -11
  21. package/src/adapters/review-engine/codex.ts +9 -11
  22. package/src/adapters/review-engine/gemini.ts +9 -11
  23. package/src/adapters/review-engine/openai-compatible.ts +10 -12
  24. package/src/adapters/review-engine/parse-output.ts +32 -6
  25. package/src/adapters/review-engine/prompt-builder.ts +19 -0
  26. package/src/adapters/review-engine/types.ts +1 -1
  27. package/src/adapters/vcs-host/commit-status.ts +39 -0
  28. package/src/adapters/vcs-host/github.ts +2 -2
  29. package/src/cli/baseline.ts +125 -0
  30. package/src/cli/ci.ts +11 -8
  31. package/src/cli/costs.ts +2 -2
  32. package/src/cli/council.ts +96 -0
  33. package/src/cli/detector.ts +21 -5
  34. package/src/cli/explain.ts +197 -0
  35. package/src/cli/fix.ts +173 -111
  36. package/src/cli/hook.ts +72 -27
  37. package/src/cli/ignore-helper.ts +116 -0
  38. package/src/cli/index.ts +355 -31
  39. package/src/cli/init.ts +12 -12
  40. package/src/cli/lsp.ts +200 -0
  41. package/src/cli/mcp.ts +206 -0
  42. package/src/cli/pr-comment.ts +5 -5
  43. package/src/cli/pr-desc.ts +168 -0
  44. package/src/cli/pr-review-comments.ts +3 -3
  45. package/src/cli/pr.ts +76 -0
  46. package/src/cli/preflight.ts +109 -32
  47. package/src/cli/report.ts +186 -0
  48. package/src/cli/run.ts +140 -36
  49. package/src/cli/scan.ts +233 -0
  50. package/src/cli/setup.ts +121 -15
  51. package/src/cli/test-gen.ts +125 -0
  52. package/src/cli/triage.ts +137 -0
  53. package/src/cli/watch.ts +52 -31
  54. package/src/cli/worker.ts +109 -0
  55. package/src/core/cache/review-cache.ts +2 -2
  56. package/src/core/chunking/index.ts +2 -2
  57. package/src/core/config/loader.ts +10 -10
  58. package/src/core/config/preset-resolver.ts +6 -6
  59. package/src/core/config/schema.ts +103 -2
  60. package/src/core/config/types.ts +57 -2
  61. package/src/core/council/config.ts +71 -0
  62. package/src/core/council/context.ts +17 -0
  63. package/src/core/council/runner.ts +83 -0
  64. package/src/core/council/types.ts +45 -0
  65. package/src/core/detect/llm-key.ts +89 -0
  66. package/src/core/detect/workspaces.ts +103 -0
  67. package/src/core/errors.ts +4 -4
  68. package/src/core/fix/generator.ts +149 -0
  69. package/src/core/ignore/index.ts +4 -4
  70. package/src/core/mcp/concurrency.ts +16 -0
  71. package/src/core/mcp/handlers/fix-finding.ts +126 -0
  72. package/src/core/mcp/handlers/get-capabilities.ts +62 -0
  73. package/src/core/mcp/handlers/get-findings.ts +36 -0
  74. package/src/core/mcp/handlers/review-diff.ts +65 -0
  75. package/src/core/mcp/handlers/scan-files.ts +65 -0
  76. package/src/core/mcp/handlers/validate-fix.ts +41 -0
  77. package/src/core/mcp/run-store.ts +85 -0
  78. package/src/core/mcp/workspace.ts +35 -0
  79. package/src/core/persist/baseline.ts +112 -0
  80. package/src/core/persist/cost-log.ts +1 -1
  81. package/src/core/persist/findings-cache.ts +1 -1
  82. package/src/core/persist/triage.ts +112 -0
  83. package/src/core/phases/static-rules.ts +18 -5
  84. package/src/core/pipeline/review-phase.ts +65 -26
  85. package/src/core/pipeline/run.ts +42 -10
  86. package/src/core/runtime/lock.ts +2 -2
  87. package/src/core/runtime/state.ts +2 -2
  88. package/src/core/schema-alignment/detector.ts +59 -0
  89. package/src/core/schema-alignment/extractor/index.ts +24 -0
  90. package/src/core/schema-alignment/extractor/prisma.ts +21 -0
  91. package/src/core/schema-alignment/extractor/sql.ts +99 -0
  92. package/src/core/schema-alignment/llm-check.ts +91 -0
  93. package/src/core/schema-alignment/scanner.ts +107 -0
  94. package/src/core/schema-alignment/types.ts +43 -0
  95. package/src/core/shell.ts +3 -3
  96. package/src/core/static-rules/registry.ts +17 -8
  97. package/src/core/static-rules/rules/brand-tokens.ts +145 -0
  98. package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
  99. package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
  100. package/src/core/static-rules/rules/missing-auth.ts +70 -0
  101. package/src/core/static-rules/rules/schema-alignment.ts +132 -0
  102. package/src/core/static-rules/rules/sql-injection.ts +71 -0
  103. package/src/core/static-rules/rules/ssrf.ts +63 -0
  104. package/src/core/static-rules/tailwind-extractor.ts +38 -0
  105. package/src/core/test-gen/coverage-analyzer.ts +93 -0
  106. package/src/core/test-gen/framework-detector.ts +21 -0
  107. package/src/core/test-gen/test-writer.ts +33 -0
  108. package/src/core/ui/design-context-loader.ts +87 -0
  109. package/src/core/worker/client.ts +46 -0
  110. package/src/core/worker/lockfile.ts +38 -0
  111. package/src/core/worker/server.ts +81 -0
  112. package/src/formatters/junit.ts +52 -0
  113. package/src/formatters/sarif.ts +2 -2
  114. package/src/index.ts +1 -2
  115. package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
  116. package/tests/snapshots/index.json +3 -3
  117. package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
  118. package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
  119. package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
  120. package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
  121. package/bin/autopilot.js +0 -20
  122. package/skills/autopilot.md +0 -157
  123. /package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  124. /package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  125. /package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  126. /package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  127. /package/{src → scripts}/snapshots/impact-selector.ts +0 -0
  128. /package/{src → scripts}/snapshots/import-scanner.ts +0 -0
  129. /package/{src → scripts}/snapshots/serializer.ts +0 -0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,68 @@
1
1
  # Changelog
2
2
 
3
+ ## [5.0.0-alpha.2] — 2026-04-24
4
+
5
+ ### Added
6
+ - **v4 compatibility assertion matrix** at `tests/v4-compat/` — 20 pinned invocations covering version/help, subcommand routing for all v4 names, deterministic reads (doctor, costs, baseline, explain), flag parsing (`--base`, `--format`, `--fail-on`), deprecation-notice behavior, and the new grouped verbs. Uses marker/regex assertions, not full stdout snapshots — still catches routing and parsing regressions, which is the intent. Full normalized-stdout snapshots for deterministic commands are a follow-up item. Regression of any test blocks future alpha promotion.
7
+ - **Superpowers peer-dep detection** — `doctor` now reports a warn-level check for `superpowers:writing-plans`, `superpowers:using-git-worktrees`, `superpowers:subagent-driven-development`. Missing skills produce an actionable remediation hint (`claude plugin install superpowers`). Treated as warn not fail because review-only users don't need it; pipeline phases will hard-fail at their own entry point.
8
+ - **Grouped CLI verbs (phase 1: additive aliases)** — `claude-autopilot review <verb>` accepts `{run, scan, ci, fix, baseline, explain, watch, report}`. `claude-autopilot advanced <verb>` accepts `{lsp, mcp, worker, autoregress, test-gen, hook, detector, ignore}`. Both are additive aliases — flat forms (`claude-autopilot run`) continue to work unchanged. Broader restructuring (pipeline verbs `migrate`/`validate` top-level, `pr {create,comment,desc}`) is a later-alpha item.
9
+ - **`peerDependencies.superpowers`** (optional) declared in `package.json`.
10
+ - `src/cli/preflight.ts`: `findMissingSuperpowersSkills()` exported with recursive search across `~/.claude/plugins/**` and project-local `.claude/plugins/**`.
11
+
12
+ ### Fixed
13
+ - **`--help` / `-h` routed to `run`** (latent v4 bug). v4's dispatcher defaulted the subcommand to `run` when `args[0]` started with `--`, so `guardrail --help` silently executed a review instead of printing help. v5.0.0-alpha.2 intercepts `--help`/`-h` before subcommand defaulting and routes to the help handler. Surfaced by the new v4 compat matrix.
14
+ - **`--help` output missing 8 v4 subcommands** — `setup`, `preflight`, `hook`, `baseline`, `triage`, `pr-desc`, `council`, `mcp` were listed in the `SUBCOMMANDS` array but not in `printUsage()`. Help now lists all 20+.
15
+
16
+ ### Changed
17
+ - README install instructions now pin `@alpha` explicitly for the v5 alpha cycle. The npm `latest` tag still points at a pre-rename 2.5.0 release; without pinning, bare installs silently regress to old code. When 5.0.0 GA ships, `latest` advances and the `@alpha` pin becomes optional.
18
+ - Migration guide updated with the `@alpha` pinning note for `npm install`, GitHub Actions, and Dockerfile examples.
19
+
20
+ ### Still deferred to alpha.3
21
+ - Tombstone `@delegance/guardrail@5.0.0` with thin CLI wrapper and strict argv/stdio passthrough.
22
+ - CI bin-parity smoke tests (`npx guardrail`, `npx @delegance/guardrail`, global install, GitHub Actions).
23
+ - Codemod script `claude-autopilot migrate-v4 [--write]`.
24
+ - Compiled JS entrypoint (drops `tsx` runtime dep).
25
+
26
+ ## [5.0.0-alpha.1] — 2026-04-24
27
+
28
+ **Package renamed: `@delegance/guardrail` → `@delegance/claude-autopilot`.**
29
+
30
+ The v4 product sold itself as "LLM code review." The real product is an end-to-end autonomous development pipeline built on Claude Code skills — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. This alpha corrects the identity mismatch without breaking any v4 usage.
31
+
32
+ Every v4 invocation continues to work through v5.x via the preserved `guardrail` CLI alias. Migration guide: `docs/migration/v4-to-v5.md`.
33
+
34
+ ### Added
35
+ - **`claude-autopilot` CLI binary** — primary entrypoint (`bin/claude-autopilot.js`), co-installed with `guardrail`.
36
+ - **Pipeline skills bundled in the tarball** — `skills/claude-autopilot.md` (agent-loop spec), `skills/autopilot/`, `skills/migrate/`. v4.3.1 shipped only `skills/guardrail.md`; the pipeline skills existed only in-repo and weren't distributed.
37
+ - **`generic` preset** — no DB migration runner, uses `npm test` / `npm run typecheck` / `npm run lint` where present. Picked by `detectProject()` as the fallback when no stack signals are found (replaces the v4 behavior of claiming `nextjs-supabase` with low confidence).
38
+ - **v5 migration guide at `docs/migration/v4-to-v5.md`** — find/replace patterns for `package.json`, shell scripts, GitHub Actions yaml, Dockerfiles, and Claude Code skills.
39
+
40
+ ### Changed
41
+ - **Stack detector fallback:** plain Next.js with no Supabase signals now returns `generic`, not `nextjs-supabase (low confidence)`. Fixes the cold-start eval reviewer finding.
42
+ - **`PRESET_LABELS` in `setup.ts`:** adds `generic` entry.
43
+ - **Detector tests:** updated to assert the new `generic` fallback behavior.
44
+ - **`skills/guardrail.md`:** rewritten as a back-compat alias pointing at `skills/claude-autopilot.md`.
45
+ - **`bin/guardrail.js`:** emits a one-line deprecation notice on `stderr` on first invocation per terminal session, then forwards unchanged.
46
+
47
+ ### Deferred to later alphas
48
+ - **alpha.2:** full CLI verb restructure (`claude-autopilot {review,pr,triage,advanced,…}`), v4 compatibility golden-test matrix, superpowers peer-dep hard-fail in `doctor`.
49
+ - **alpha.3:** tombstone `@delegance/guardrail@5.0.0` publish, CI smoke tests for `npx guardrail` / `npx @delegance/guardrail` / global install / GitHub Actions parity, codemod script for find/replace migration.
50
+ - **5.0.0 GA:** after alpha.3 soaks against delegance-app for 2+ real feature pipelines.
51
+
52
+ ## [4.3.1] — 2026-04-24
53
+
54
+ ### Fixed (from external cold-start review)
55
+ - **`parseReviewOutput` silent failure** — regex required literal `### [CRITICAL]` brackets and returned zero findings when the LLM emitted `### CRITICAL`, `### **CRITICAL**`, or `### **[CRITICAL]**` (all common Llama/GPT variants). `src/adapters/review-engine/parse-output.ts` now accepts all four formats and logs a warning when raw output is non-empty but no findings parse, so format drift never silently hides bugs again.
56
+ - **Pipeline short-circuit skipped LLM review** — `src/core/pipeline/run.ts` returned early on static-rules `fail`, meaning the LLM never ran on the code that most needed it (IDOR, TOCTOU, CORS, off-by-one, rate-limit gaps typically ride alongside a static-flagged issue). New default: review runs even on static-fail. Legacy behavior restored via `pipeline.runReviewOnStaticFail: false` in config.
57
+ - **`doctor` / `preflight` ignored 3 of 5 LLM keys** — only checked `ANTHROPIC_API_KEY` and `OPENAI_API_KEY`, so users with `GROQ_API_KEY`/`GEMINI_API_KEY`/`GOOGLE_API_KEY` set saw "No LLM API key" right after `setup` reported "detected." New shared helper `src/core/detect/llm-key.ts` is the single source of truth used by setup, scan, run, and preflight.
58
+ - **Stack detector mislabeled plain Next.js as "Next.js + Supabase"** — now requires actual Supabase signals (`@supabase/supabase-js`, `@supabase/ssr`, `@supabase/auth-helpers-nextjs`, `supabase/config.toml`, or `SUPABASE_*` env vars). Vanilla Next.js still uses the `nextjs-supabase` preset as a fallback but the evidence string and setup output make the fallback explicit.
59
+ - **`--profile team` missing security rules** — added `package-lock-sync`, `ssrf`, `insecure-redirect` to match the README's advertised coverage.
60
+
61
+ ### Added
62
+ - `src/core/detect/llm-key.ts` — `detectLLMKey()`, `LLM_KEY_NAMES`, `LLM_KEY_HINTS`, `loadEnvFile()`.
63
+ - `GuardrailConfig.pipeline.runReviewOnStaticFail` / `runReviewOnTestFail` config flags.
64
+ - 6 parser format-variation tests covering all documented markdown variants plus the silent-drift warning path.
65
+
3
66
  ## [2.5.0] — 2026-04-22
4
67
 
5
68
  ### Added
package/README.md CHANGED
@@ -1,148 +1,196 @@
1
1
  # @delegance/claude-autopilot
2
2
 
3
- Automated code review pipeline for Claude Code. Runs static rules, an optional LLM review engine, and impact-aware snapshot regression tests outputs SARIF for GitHub Code Scanning, inline PR annotations, and a pre-push hook for local enforcement.
4
-
5
- ## Install
3
+ **Autonomous development pipeline for Claude Code. Brainstorm spec plan implement migrate validate PR review merge all from your terminal, on your codebase, with your test suite.**
6
4
 
7
5
  ```bash
8
- npm install @delegance/claude-autopilot
6
+ claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
7
+ # → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
8
+ # → creates branch → implements with subagents → runs migrations →
9
+ # → runs full test + lint + type + security gate → opens PR →
10
+ # → dispatches multi-model review → auto-fixes bugbot findings →
11
+ # → ready to merge
9
12
  ```
10
13
 
11
- **Prerequisites:** Node 22+, [`gh` CLI](https://cli.github.com/) authenticated, [`claude` CLI](https://claude.ai/claude-code) (Claude Code).
12
-
13
- ## Claude Code Skill
14
-
15
- The package ships a ready-made Claude Code skill. After installing, copy it into your project:
14
+ *No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
16
15
 
17
- ```bash
18
- mkdir -p .claude/skills
19
- cp node_modules/@delegance/claude-autopilot/skills/autopilot.md .claude/skills/
20
- ```
16
+ ---
21
17
 
22
- Claude will then know when and how to invoke `autopilot run`, interpret findings, and wire it into your dev pipeline automatically.
18
+ ## Why this vs the alternatives
23
19
 
24
- ## Quick Start
25
-
26
- ```bash
27
- # One command — auto-detects project type, writes config, installs hook, runs doctor
28
- npx autopilot setup
29
-
30
- # Run your first pipeline
31
- npx autopilot run
32
- ```
20
+ AI coding tools fall into three buckets. Here's where claude-autopilot sits.
33
21
 
34
- `setup` detects your stack (Go, Rails, FastAPI, T3, Next.js+Supabase), infers your test command, writes `autopilot.config.yaml`, installs the pre-push hook, then runs `doctor` to show anything still missing.
22
+ | Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
23
+ |---|---|---|---|---|---|
24
+ | **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
25
+ | **GitHub Copilot Workspace** | Spec → plan → PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
26
+ | **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
27
+ | **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
28
+ | **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None — single-shot prompts | Continuous |
29
+ | **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
30
+ | **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
35
31
 
36
- ## Commands
32
+ The architectural differences that matter most in practice:
37
33
 
38
- ### `autopilot setup`
34
+ 1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
35
+ 2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
36
+ 3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
37
+ 4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
39
38
 
40
- Zero-prompt setup. Auto-detects project type and configures everything.
39
+ ## 30-second quickstart
41
40
 
42
41
  ```bash
43
- npx autopilot setup # detect, write config, install hook
44
- npx autopilot setup --force # overwrite existing autopilot.config.yaml
45
- ```
42
+ # Install (alpha channel use @alpha through the v5 alpha cycle)
43
+ npm install -g @delegance/claude-autopilot@alpha
46
44
 
47
- ### `autopilot doctor`
45
+ # One-shot setup — detects stack, writes config, installs skills, sets hooks
46
+ npx claude-autopilot@alpha init
48
47
 
49
- Checks prerequisites. Runs automatically after `setup` — also useful any time `run` behaves unexpectedly.
48
+ # Ship a feature end-to-end
49
+ claude-autopilot brainstorm "add rate limiting to the public API"
50
+ # Answer ~5 questions. Spec written. Codex reviews it. You approve.
51
+ # Claude walks the plan → implementation → migration → tests → PR → review.
52
+ # ~15-40 min for a typical feature.
50
53
 
51
- ```bash
52
- npx autopilot doctor
54
+ # Or run just the review layer on an existing PR
55
+ claude-autopilot run --pr 123
53
56
  ```
54
57
 
55
- Verifies: Node 22+, tsx, `gh` CLI auth, `claude` CLI, `OPENAI_API_KEY`, git user config, superpowers plugin. Exits 1 if blockers found. `autopilot preflight` is an alias.
58
+ ## The pipeline, phase by phase
56
59
 
57
- ### `autopilot run`
60
+ Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
58
61
 
59
- Runs the pipeline on git-changed files.
60
-
61
- ```bash
62
- npx autopilot run # diff against HEAD~1
63
- npx autopilot run --base main # diff against main
64
- npx autopilot run --files src/foo.ts # explicit file list
65
- npx autopilot run --format sarif --output results.sarif
66
- npx autopilot run --dry-run
67
- ```
62
+ | Phase | Skill | What it does | Model role |
63
+ |---|---|---|---|
64
+ | **Brainstorm** | `brainstorming` | Turns a rough idea into an approved spec through guided questions | Claude (implementation model) |
65
+ | **Spec review** | `codex-review` | Second model critiques the spec before you commit to it | Codex / GPT-5 |
66
+ | **Plan** | `writing-plans` | Breaks spec into phased, checklist-shaped implementation plan | Claude |
67
+ | **Plan review** | `codex-review` | Second model critiques the plan before you execute it | Codex / GPT-5 |
68
+ | **Implement** | `subagent-driven-development` | Executes plan in a git worktree, one phase at a time, with per-phase tests | Claude |
69
+ | **Migrate** | `migrate` | Runs database migrations dev → QA → prod with per-env validation | Deterministic |
70
+ | **Validate** | `validate` | Static rules + tests + type check + security scan + LLM review | Any |
71
+ | **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
72
+ | **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
73
+ | **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
68
74
 
69
- ### `autopilot watch`
75
+ ## What's distinctive
70
76
 
71
- Re-runs on every file save.
77
+ Features that are hard or impossible to find in the competitive set:
72
78
 
73
- ```bash
74
- npx autopilot watch
75
- npx autopilot watch --debounce 500
76
- ```
79
+ - **Multi-model council review** — dispatch the same diff to 3+ models in parallel, synthesize agreement. Catches blind spots no single model sees.
80
+ - **Fix with test verification** — `claude-autopilot fix` runs your full test suite after every patch and reverts on failure. Safer than any tool that proposes fixes without running your tests.
81
+ - **Bug-bot auto-triage** watches Cursor BugBot / Copilot comments on your PR, triages each (real bug vs false positive), auto-fixes confirmed bugs, dismisses noise with explanations.
82
+ - **Schema alignment rule** — ensures DB migrations, backend types, and frontend types stay in sync. Custom static rule, not something any competitor ships.
83
+ - **SARIF output + GitHub Code Scanning integration** — findings appear as annotations in the PR and in the Security tab.
77
84
 
78
- ### `autopilot autoregress`
85
+ ## Just the review layer
79
86
 
80
- Impact-aware snapshot regression tests. Only fires snapshots whose source modules were touched by the current branch.
87
+ If you don't want the full pipeline, the review subcommands are a strict superset of what `guardrail run` used to do: LLM code review over git-changed files, SARIF output, inline PR comments, auto-fix, baselines, per-finding triage, cost budgets. The legacy `guardrail` CLI remains aliased to the review subcommands through v5.x.
81
88
 
82
89
  ```bash
83
- npx autopilot autoregress run # impact-selected (default)
84
- npx autopilot autoregress run --all
85
- npx autopilot autoregress diff # show JSON diffs vs baselines
86
- npx autopilot autoregress update # overwrite baselines
87
- npx autopilot autoregress generate # LLM-generate snapshot tests for changed files
88
- npx autopilot autoregress generate --files src/foo.ts,src/bar.ts
90
+ claude-autopilot run # review changes since main
91
+ claude-autopilot run --inline-comments # post per-line PR annotations
92
+ claude-autopilot run --format sarif --output out.sarif
93
+ claude-autopilot fix --verify # LLM patch + test gate + revert on fail
89
94
  ```
90
95
 
91
- `generate` requires `OPENAI_API_KEY`.
92
-
93
- ### `autopilot hook`
96
+ > **Alpha.1 CLI note:** subcommands are flat (`run`, `scan`, `ci`, `fix`, `baseline`, `explain`, …). The grouped `claude-autopilot review <verb>` form lands in alpha.2 as an alias — flat forms continue to work indefinitely.
94
97
 
95
- Manages the `pre-push` git hook.
98
+ ## Install & requirements
96
99
 
97
100
  ```bash
98
- npx autopilot hook install # write .git/hooks/pre-push
99
- npx autopilot hook install --force # overwrite existing
100
- npx autopilot hook uninstall
101
- npx autopilot hook status
102
- ```
103
-
104
- Works in git worktrees.
101
+ # v5 alpha current release channel
102
+ npm install -g @delegance/claude-autopilot@alpha
105
103
 
106
- ### `autopilot init`
104
+ # When 5.0.0 GA ships, the `latest` tag will advance and you can drop the @alpha:
105
+ # npm install -g @delegance/claude-autopilot
106
+ ```
107
107
 
108
- Interactive preset picker — for when you want to choose a preset manually instead of using `setup`.
108
+ - Node 22+
109
+ - `gh` CLI (for PR phases)
110
+ - One of: `ANTHROPIC_API_KEY` (recommended), `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `GROQ_API_KEY`
111
+ - Claude Code CLI (for skill-based phases — pipeline falls back to direct CLI invocations without it, but loses interactive checkpoints)
112
+ - `superpowers` Claude Code plugin (required for pipeline phases — `claude-autopilot doctor` will remediation-hint if missing)
109
113
 
110
- ```bash
111
- npx autopilot init
112
- ```
114
+ ---
113
115
 
114
- Presets: `nextjs-supabase`, `t3`, `python-fastapi`, `rails-postgres`, `go`.
116
+ ---
115
117
 
116
- ## Config (`autopilot.config.yaml`)
118
+ ## Config (`guardrail.config.yaml`)
117
119
 
118
120
  ```yaml
119
121
  configVersion: 1
120
122
  reviewEngine:
121
- adapter: auto # auto-detects best available key at runtime
122
- testCommand: npm test
123
+ adapter: auto # auto-selects best available key at runtime
124
+ testCommand: npm test # null to disable; used by `fix` verified mode
125
+
123
126
  protectedPaths:
124
- - src/core/**
125
127
  - data/deltas/**
128
+ - .github/workflows/**
129
+
126
130
  staticRules:
127
- - hardcoded-secrets
131
+ - hardcoded-secrets # Anthropic, OpenAI, Stripe, GitHub, Supabase, Twilio, SendGrid
128
132
  - npm-audit
133
+ - sql-injection # template literals / concatenation in SQL context
134
+ - missing-auth # Next.js/pages API routes with POST/PUT/DELETE, no auth pattern
135
+ - ssrf # HTTP calls with user-controlled URL
136
+ - insecure-redirect # redirect() with user-controlled target
137
+ - console-log
138
+ - todo-fixme
139
+ - large-file
140
+ - missing-tests
141
+ - package-lock-sync
142
+ - brand-tokens # opt-in: requires brand: block below
143
+
144
+ # Brand token enforcement (opt-in — omit to disable)
145
+ brand:
146
+ colorsFrom: tailwind.config.ts # auto-extract theme.colors as canonical palette
147
+ colors: # explicit palette entries (merged with colorsFrom)
148
+ - '#f97316'
149
+ - '#1a1f3a'
150
+ fonts:
151
+ - 'Inter'
152
+ - 'Geist'
153
+
154
+ policy:
155
+ failOn: critical # critical (default) | warning | note | none
156
+ newOnly: false # true = suppress findings present in .guardrail-baseline.json
157
+
158
+ cost:
159
+ maxPerRun: 0.50 # abort review phase if spend exceeds $0.50
160
+ estimateBeforeRun: false # print token estimate before LLM calls
161
+
162
+ ignore:
163
+ - src/legacy/** # suppress all findings in path
164
+ - { rule: console-log, path: scripts/** } # suppress specific rule in path
165
+
166
+ chunking:
167
+ rateLimitBackoff: exp # exp (default) | linear | none
168
+ parallelism: 3
129
169
  ```
130
170
 
131
- Full schema and preset defaults: `presets/<name>/autopilot.config.yaml`.
171
+ ### Setup Profiles
172
+
173
+ `guardrail setup --profile <name>` overlays a pre-baked rule + policy configuration on top of the detected stack preset:
174
+
175
+ | Profile | Rules | `failOn` | Best for |
176
+ |---|---|---|---|
177
+ | `security-strict` | All security rules + hygiene | `warning` | Security audits, regulated environments |
178
+ | `team` | Core security + hygiene | `critical` | Standard CI/CD on shared branches |
179
+ | `solo` | Hygiene only | `critical` | Solo projects, low-noise baseline |
132
180
 
133
181
  ### Review Engine Adapters
134
182
 
135
183
  | Adapter | Key required | Notes |
136
184
  |---|---|---|
137
- | `auto` | any below | Auto-selects best available (recommended) |
138
- | `claude` | `ANTHROPIC_API_KEY` | Opus 4.7 default |
185
+ | `auto` | any | Auto-selects best available (recommended) |
186
+ | `claude` | `ANTHROPIC_API_KEY` | Claude Opus 4.7 |
139
187
  | `gemini` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Gemini 2.5 Pro, 1M context |
140
188
  | `codex` | `OPENAI_API_KEY` | GPT-5 Codex |
141
189
  | `openai-compatible` | configurable | Groq, Ollama, Together AI, etc. |
142
190
 
143
191
  `auto` priority: Anthropic → Gemini → OpenAI → Groq.
144
192
 
145
- **Groq example:**
193
+ **Groq (fast/free tier):**
146
194
  ```yaml
147
195
  reviewEngine:
148
196
  adapter: openai-compatible
@@ -161,44 +209,57 @@ reviewEngine:
161
209
  baseUrl: http://localhost:11434/v1
162
210
  ```
163
211
 
212
+ ---
213
+
164
214
  ## GitHub Actions
165
215
 
166
216
  ```yaml
167
217
  - uses: axledbetter/claude-autopilot/.github/actions/ci@main
168
218
  with:
169
- openai-api-key: ${{ secrets.OPENAI_API_KEY }}
219
+ anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
220
+ # Optional:
221
+ # post-comments: 'true'
222
+ # inline-comments: 'false'
223
+ # base-ref: 'main'
224
+ # sarif-output: 'guardrail.sarif'
225
+ # version: 'latest'
170
226
  ```
171
227
 
172
228
  Runs the pipeline, uploads SARIF to GitHub Code Scanning, annotates the PR diff inline.
173
229
 
174
- ## SARIF Output
230
+ ---
175
231
 
176
- ```bash
177
- npx autopilot run --format sarif --output autopilot.sarif
178
- ```
232
+ ## Typical Team Workflow
179
233
 
180
- Compatible with `github/codeql-action/upload-sarif@v3`.
234
+ ```bash
235
+ # 1. First run — establish a baseline so CI only fails on new issues
236
+ npx guardrail run --base main
237
+ npx guardrail baseline create --note "post-v2 audit"
238
+ git add .guardrail-baseline.json && git commit -m "chore: guardrail baseline"
181
239
 
182
- ## Snapshot Regression Testing
240
+ # 2. CI — only new findings block the build
241
+ npx guardrail ci --new-only --fail-on critical
183
242
 
184
- After each feature lands:
243
+ # 3. Triage false positives once, never see them again
244
+ npx guardrail triage sql-injection:src/db/raw.ts:47 false-positive --reason "internal admin only"
245
+ git add .guardrail-triage.json && git commit -m "chore: triage false positive"
185
246
 
186
- ```bash
187
- npx autopilot autoregress generate # generate baselines for changed files
247
+ # 4. Auto-fix and verify
248
+ npx guardrail fix --yes # applies patches + runs tests, reverts on failure
188
249
  ```
189
250
 
190
- Future PRs automatically fail if covered behavior diverges. The impact selector uses `git merge-base` diff + one-hop import graph expansion — only relevant snapshots run, keeping CI fast.
251
+ ---
191
252
 
192
- High-impact paths (`src/core/pipeline/**`, `src/adapters/**`, `src/core/findings/**`, `src/core/config/**`) always trigger a full run.
253
+ ## Interpreting Results
193
254
 
194
- ## Public API
255
+ **Exit 0** — pass or warnings only (at current `policy.failOn` threshold). Safe to merge.
256
+ **Exit 1** — findings at or above threshold. Fix before merging.
195
257
 
196
- ```typescript
197
- import type { Finding, RunResult, AutopilotConfig } from '@delegance/claude-autopilot';
198
- import { normalizeSnapshot } from '@delegance/claude-autopilot';
199
- ```
258
+ Findings: `critical` blocks merge · `warning` should fix · `note` informational.
200
259
 
201
- Types are available for TypeScript consumers. Runtime import requires a tsx-aware bundler (the package ships TypeScript source).
260
+ PR comments show: status badge, phase table, critical/warning findings with inline links, cost footer. Re-runs update the existing comment in place.
261
+
262
+ ---
202
263
 
203
264
  ## Architecture
204
265
 
@@ -206,11 +267,13 @@ Four pluggable adapter points:
206
267
 
207
268
  | Point | Built-in | Purpose |
208
269
  |---|---|---|
209
- | `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM code review |
210
- | `vcs-host` | `github` | PR comments + SARIF upload |
211
- | `migration-runner` | `supabase` | DB migration execution |
270
+ | `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM review |
271
+ | `vcs-host` | `github` | PR comments + SARIF |
272
+ | `migration-runner` | `supabase` | DB migrations |
212
273
  | `review-bot-parser` | `cursor` | Parse review bot comments |
213
274
 
275
+ **Monorepo:** Auto-detects npm/yarn/pnpm workspaces, Turborepo, and Nx.
276
+
214
277
  ## License
215
278
 
216
279
  MIT
@@ -0,0 +1,77 @@
1
+ // Shared launcher for both `claude-autopilot` and `guardrail` bins.
2
+ // Imported, not a bin itself. Resolves tsx, spawns src/cli/index.ts with
3
+ // the caller's argv, forwards stdio, exits with the child's status.
4
+
5
+ import { fileURLToPath } from 'node:url';
6
+ import { spawnSync } from 'node:child_process';
7
+ import * as fs from 'node:fs';
8
+ import * as path from 'node:path';
9
+ import * as os from 'node:os';
10
+
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ const ENTRYPOINT = path.resolve(__dirname, '..', 'src', 'cli', 'index.ts');
13
+
14
+ function findTsx() {
15
+ const own = path.resolve(__dirname, '..', 'node_modules', '.bin', 'tsx');
16
+ if (fs.existsSync(own)) return own;
17
+ const consumer = path.resolve(__dirname, '..', '..', '..', '.bin', 'tsx');
18
+ if (fs.existsSync(consumer)) return consumer;
19
+ return 'tsx';
20
+ }
21
+
22
+ // Tracks per-terminal-session whether the deprecation notice has been shown.
23
+ // Uses a temp file keyed by parent PID + stderr's tty so parallel CI jobs don't
24
+ // collide. Falls back to always-emit if the stamp can't be written.
25
+ const DEPRECATION_STAMP_DIR = path.join(os.tmpdir(), 'claude-autopilot');
26
+ function hasShownDeprecation() {
27
+ try {
28
+ if (!fs.existsSync(DEPRECATION_STAMP_DIR)) {
29
+ fs.mkdirSync(DEPRECATION_STAMP_DIR, { recursive: true });
30
+ }
31
+ const key = `${process.ppid}-${process.stderr.isTTY ? 'tty' : 'pipe'}.stamp`;
32
+ const stampPath = path.join(DEPRECATION_STAMP_DIR, key);
33
+ if (fs.existsSync(stampPath)) return true;
34
+ fs.writeFileSync(stampPath, String(Date.now()));
35
+ // Best-effort cleanup of stamps older than 1h to keep tmpdir tidy.
36
+ const cutoff = Date.now() - 60 * 60 * 1000;
37
+ for (const f of fs.readdirSync(DEPRECATION_STAMP_DIR)) {
38
+ const p = path.join(DEPRECATION_STAMP_DIR, f);
39
+ try {
40
+ if (fs.statSync(p).mtimeMs < cutoff) fs.unlinkSync(p);
41
+ } catch { /* ignore */ }
42
+ }
43
+ return false;
44
+ } catch {
45
+ return false;
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Decide whether to emit the deprecation notice. Order:
51
+ * CLAUDE_AUTOPILOT_DEPRECATION=never → never emit (CI/automation)
52
+ * CLAUDE_AUTOPILOT_DEPRECATION=always → always emit (deterministic testing)
53
+ * otherwise → once per terminal session (stamp-based)
54
+ */
55
+ function shouldEmitDeprecation() {
56
+ const override = process.env.CLAUDE_AUTOPILOT_DEPRECATION;
57
+ if (override === 'never') return false;
58
+ if (override === 'always') return true;
59
+ return !hasShownDeprecation();
60
+ }
61
+
62
+ /**
63
+ * Launch the CLI with `argv` passed through verbatim.
64
+ * @param {{ name: 'claude-autopilot' | 'guardrail' }} opts
65
+ */
66
+ export function launch(opts) {
67
+ if (opts.name === 'guardrail' && shouldEmitDeprecation()) {
68
+ process.stderr.write(
69
+ '\x1b[33m[deprecated]\x1b[0m `guardrail` CLI is renamed to `claude-autopilot`. ' +
70
+ 'The `guardrail` alias works through v5.x and will be removed in v6. ' +
71
+ 'Migration guide: https://github.com/axledbetter/claude-autopilot/blob/master/docs/migration/v4-to-v5.md\n' +
72
+ 'Silence: set CLAUDE_AUTOPILOT_DEPRECATION=never\n',
73
+ );
74
+ }
75
+ const result = spawnSync(findTsx(), [ENTRYPOINT, ...process.argv.slice(2)], { stdio: 'inherit' });
76
+ process.exit(result.status ?? 1);
77
+ }
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import { launch } from './_launcher.js';
3
+ launch({ name: 'claude-autopilot' });
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import { launch } from './_launcher.js';
3
+ launch({ name: 'guardrail' });
package/package.json CHANGED
@@ -1,15 +1,18 @@
1
1
  {
2
2
  "name": "@delegance/claude-autopilot",
3
- "version": "2.5.0",
3
+ "version": "5.0.0-alpha.2",
4
4
  "type": "module",
5
- "description": "Claude Code automation pipeline: spec → plan → implement → validate → PR",
5
+ "description": "Autonomous development pipeline for Claude Code: brainstorm spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
6
6
  "keywords": [
7
- "claude",
7
+ "claude-autopilot",
8
8
  "autopilot",
9
- "ai",
10
- "pipeline",
9
+ "claude-code",
10
+ "ai-agent",
11
11
  "code-review",
12
- "cli"
12
+ "llm",
13
+ "sarif",
14
+ "cli",
15
+ "pipeline"
13
16
  ],
14
17
  "license": "MIT",
15
18
  "repository": {
@@ -20,7 +23,8 @@
20
23
  "node": ">=22.0.0"
21
24
  },
22
25
  "bin": {
23
- "autopilot": "bin/autopilot.js"
26
+ "claude-autopilot": "bin/claude-autopilot.js",
27
+ "guardrail": "bin/guardrail.js"
24
28
  },
25
29
  "types": "./src/index.ts",
26
30
  "exports": {
@@ -35,6 +39,7 @@
35
39
  "skills/",
36
40
  "scripts/test-runner.mjs",
37
41
  "scripts/autoregress.ts",
42
+ "scripts/snapshots/",
38
43
  "tests/snapshots/",
39
44
  "CHANGELOG.md"
40
45
  ],
@@ -47,6 +52,7 @@
47
52
  "dependencies": {
48
53
  "@anthropic-ai/sdk": "^0.90.0",
49
54
  "@google/generative-ai": "^0.24.1",
55
+ "@modelcontextprotocol/sdk": "^1.29.0",
50
56
  "ajv": "^8",
51
57
  "dotenv": ">=16",
52
58
  "js-yaml": "^4",
@@ -56,7 +62,15 @@
56
62
  },
57
63
  "devDependencies": {
58
64
  "@types/js-yaml": "^4",
59
- "@types/node": "^22",
60
- "typescript": "^5"
65
+ "@types/node": "^25",
66
+ "typescript": "^6"
67
+ },
68
+ "peerDependencies": {
69
+ "superpowers": "*"
70
+ },
71
+ "peerDependenciesMeta": {
72
+ "superpowers": {
73
+ "optional": true
74
+ }
61
75
  }
62
76
  }
@@ -0,0 +1,35 @@
1
+ configVersion: 1
2
+ reviewEngine: { adapter: auto }
3
+ vcsHost: { adapter: github }
4
+ reviewBot: { adapter: cursor }
5
+ # No migrationRunner — the migrate phase will no-op with a notice pointing
6
+ # to `.claude-autopilot/stack.yaml` for users who want to wire one up.
7
+ protectedPaths:
8
+ - "**/auth/**"
9
+ - "**/payment/**"
10
+ - "**/encryption/**"
11
+ - "**/secret/**"
12
+ - "**/keys/**"
13
+ staticRules:
14
+ - hardcoded-secrets
15
+ - npm-audit
16
+ - package-lock-sync
17
+ - sql-injection
18
+ - missing-auth
19
+ - ssrf
20
+ - insecure-redirect
21
+ policy:
22
+ failOn: critical
23
+ newOnly: false
24
+ thresholds:
25
+ bugbotAutoFix: 85
26
+ bugbotProposePatch: 60
27
+ maxValidateRetries: 3
28
+ reviewStrategy: auto
29
+ chunking:
30
+ smallTierMaxTokens: 8000
31
+ partialReviewTokens: 60000
32
+ perFileMaxTokens: 32000
33
+ pipeline:
34
+ runReviewOnStaticFail: true
35
+ runReviewOnTestFail: false