@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/CHANGELOG.md +40 -0
  2. package/README.md +164 -106
  3. package/bin/_launcher.js +77 -0
  4. package/bin/claude-autopilot.js +3 -0
  5. package/bin/guardrail.js +3 -0
  6. package/package.json +15 -9
  7. package/presets/generic/guardrail.config.yaml +35 -0
  8. package/presets/generic/stack.md +40 -0
  9. package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
  10. package/scripts/autoregress.ts +27 -11
  11. package/skills/autopilot/SKILL.md +170 -0
  12. package/skills/claude-autopilot.md +80 -0
  13. package/skills/guardrail.md +39 -0
  14. package/skills/migrate/SKILL.md +83 -0
  15. package/src/adapters/council/claude.ts +41 -0
  16. package/src/adapters/council/openai.ts +40 -0
  17. package/src/adapters/council/types.ts +7 -0
  18. package/src/adapters/loader.ts +7 -7
  19. package/src/adapters/review-engine/auto.ts +2 -2
  20. package/src/adapters/review-engine/claude.ts +9 -11
  21. package/src/adapters/review-engine/codex.ts +9 -11
  22. package/src/adapters/review-engine/gemini.ts +9 -11
  23. package/src/adapters/review-engine/openai-compatible.ts +10 -12
  24. package/src/adapters/review-engine/parse-output.ts +32 -6
  25. package/src/adapters/review-engine/prompt-builder.ts +19 -0
  26. package/src/adapters/review-engine/types.ts +1 -1
  27. package/src/adapters/vcs-host/commit-status.ts +39 -0
  28. package/src/adapters/vcs-host/github.ts +2 -2
  29. package/src/cli/baseline.ts +125 -0
  30. package/src/cli/ci.ts +11 -8
  31. package/src/cli/costs.ts +2 -2
  32. package/src/cli/council.ts +96 -0
  33. package/src/cli/detector.ts +21 -5
  34. package/src/cli/explain.ts +197 -0
  35. package/src/cli/fix.ts +173 -111
  36. package/src/cli/hook.ts +72 -27
  37. package/src/cli/ignore-helper.ts +116 -0
  38. package/src/cli/index.ts +272 -31
  39. package/src/cli/init.ts +12 -12
  40. package/src/cli/lsp.ts +200 -0
  41. package/src/cli/mcp.ts +206 -0
  42. package/src/cli/pr-comment.ts +5 -5
  43. package/src/cli/pr-desc.ts +168 -0
  44. package/src/cli/pr-review-comments.ts +3 -3
  45. package/src/cli/pr.ts +76 -0
  46. package/src/cli/preflight.ts +15 -32
  47. package/src/cli/report.ts +186 -0
  48. package/src/cli/run.ts +140 -36
  49. package/src/cli/scan.ts +233 -0
  50. package/src/cli/setup.ts +121 -15
  51. package/src/cli/test-gen.ts +125 -0
  52. package/src/cli/triage.ts +137 -0
  53. package/src/cli/watch.ts +52 -31
  54. package/src/cli/worker.ts +109 -0
  55. package/src/core/cache/review-cache.ts +2 -2
  56. package/src/core/chunking/index.ts +2 -2
  57. package/src/core/config/loader.ts +10 -10
  58. package/src/core/config/preset-resolver.ts +6 -6
  59. package/src/core/config/schema.ts +103 -2
  60. package/src/core/config/types.ts +57 -2
  61. package/src/core/council/config.ts +71 -0
  62. package/src/core/council/context.ts +17 -0
  63. package/src/core/council/runner.ts +83 -0
  64. package/src/core/council/types.ts +45 -0
  65. package/src/core/detect/llm-key.ts +89 -0
  66. package/src/core/detect/workspaces.ts +103 -0
  67. package/src/core/errors.ts +4 -4
  68. package/src/core/fix/generator.ts +149 -0
  69. package/src/core/ignore/index.ts +4 -4
  70. package/src/core/mcp/concurrency.ts +16 -0
  71. package/src/core/mcp/handlers/fix-finding.ts +126 -0
  72. package/src/core/mcp/handlers/get-capabilities.ts +62 -0
  73. package/src/core/mcp/handlers/get-findings.ts +36 -0
  74. package/src/core/mcp/handlers/review-diff.ts +65 -0
  75. package/src/core/mcp/handlers/scan-files.ts +65 -0
  76. package/src/core/mcp/handlers/validate-fix.ts +41 -0
  77. package/src/core/mcp/run-store.ts +85 -0
  78. package/src/core/mcp/workspace.ts +35 -0
  79. package/src/core/persist/baseline.ts +112 -0
  80. package/src/core/persist/cost-log.ts +1 -1
  81. package/src/core/persist/findings-cache.ts +1 -1
  82. package/src/core/persist/triage.ts +112 -0
  83. package/src/core/phases/static-rules.ts +18 -5
  84. package/src/core/pipeline/review-phase.ts +65 -26
  85. package/src/core/pipeline/run.ts +42 -10
  86. package/src/core/runtime/lock.ts +2 -2
  87. package/src/core/runtime/state.ts +2 -2
  88. package/src/core/schema-alignment/detector.ts +59 -0
  89. package/src/core/schema-alignment/extractor/index.ts +24 -0
  90. package/src/core/schema-alignment/extractor/prisma.ts +21 -0
  91. package/src/core/schema-alignment/extractor/sql.ts +99 -0
  92. package/src/core/schema-alignment/llm-check.ts +91 -0
  93. package/src/core/schema-alignment/scanner.ts +107 -0
  94. package/src/core/schema-alignment/types.ts +43 -0
  95. package/src/core/shell.ts +3 -3
  96. package/src/core/static-rules/registry.ts +17 -8
  97. package/src/core/static-rules/rules/brand-tokens.ts +145 -0
  98. package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
  99. package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
  100. package/src/core/static-rules/rules/missing-auth.ts +70 -0
  101. package/src/core/static-rules/rules/schema-alignment.ts +132 -0
  102. package/src/core/static-rules/rules/sql-injection.ts +71 -0
  103. package/src/core/static-rules/rules/ssrf.ts +63 -0
  104. package/src/core/static-rules/tailwind-extractor.ts +38 -0
  105. package/src/core/test-gen/coverage-analyzer.ts +93 -0
  106. package/src/core/test-gen/framework-detector.ts +21 -0
  107. package/src/core/test-gen/test-writer.ts +33 -0
  108. package/src/core/ui/design-context-loader.ts +87 -0
  109. package/src/core/worker/client.ts +46 -0
  110. package/src/core/worker/lockfile.ts +38 -0
  111. package/src/core/worker/server.ts +81 -0
  112. package/src/formatters/junit.ts +52 -0
  113. package/src/formatters/sarif.ts +2 -2
  114. package/src/index.ts +1 -2
  115. package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
  116. package/tests/snapshots/index.json +3 -3
  117. package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
  118. package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
  119. package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
  120. package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
  121. package/bin/autopilot.js +0 -20
  122. package/skills/autopilot.md +0 -157
  123. /package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  124. /package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  125. /package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  126. /package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  127. /package/{src → scripts}/snapshots/impact-selector.ts +0 -0
  128. /package/{src → scripts}/snapshots/import-scanner.ts +0 -0
  129. /package/{src → scripts}/snapshots/serializer.ts +0 -0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,45 @@
1
1
  # Changelog
2
2
 
3
+ ## [5.0.0-alpha.1] — 2026-04-24
4
+
5
+ **Package renamed: `@delegance/guardrail` → `@delegance/claude-autopilot`.**
6
+
7
+ The v4 product sold itself as "LLM code review." The real product is an end-to-end autonomous development pipeline built on Claude Code skills — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. This alpha corrects the identity mismatch without breaking any v4 usage.
8
+
9
+ Every v4 invocation continues to work through v5.x via the preserved `guardrail` CLI alias. Migration guide: `docs/migration/v4-to-v5.md`.
10
+
11
+ ### Added
12
+ - **`claude-autopilot` CLI binary** — primary entrypoint (`bin/claude-autopilot.js`), co-installed with `guardrail`.
13
+ - **Pipeline skills bundled in the tarball** — `skills/claude-autopilot.md` (agent-loop spec), `skills/autopilot/`, `skills/migrate/`. v4.3.1 shipped only `skills/guardrail.md`; the pipeline skills existed only in-repo and weren't distributed.
14
+ - **`generic` preset** — no DB migration runner, uses `npm test` / `npm run typecheck` / `npm run lint` where present. Picked by `detectProject()` as the fallback when no stack signals are found (replaces the v4 behavior of claiming `nextjs-supabase` with low confidence).
15
+ - **v5 migration guide at `docs/migration/v4-to-v5.md`** — find/replace patterns for `package.json`, shell scripts, GitHub Actions yaml, Dockerfiles, and Claude Code skills.
16
+
17
+ ### Changed
18
+ - **Stack detector fallback:** plain Next.js with no Supabase signals now returns `generic`, not `nextjs-supabase (low confidence)`. Fixes the cold-start eval reviewer finding.
19
+ - **`PRESET_LABELS` in `setup.ts`:** adds `generic` entry.
20
+ - **Detector tests:** updated to assert the new `generic` fallback behavior.
21
+ - **`skills/guardrail.md`:** rewritten as a back-compat alias pointing at `skills/claude-autopilot.md`.
22
+ - **`bin/guardrail.js`:** emits a one-line deprecation notice on `stderr` on first invocation per terminal session, then forwards unchanged.
23
+
24
+ ### Deferred to later alphas
25
+ - **alpha.2:** full CLI verb restructure (`claude-autopilot {review,pr,triage,advanced,…}`), v4 compatibility golden-test matrix, superpowers peer-dep hard-fail in `doctor`.
26
+ - **alpha.3:** tombstone `@delegance/guardrail@5.0.0` publish, CI smoke tests for `npx guardrail` / `npx @delegance/guardrail` / global install / GitHub Actions parity, codemod script for find/replace migration.
27
+ - **5.0.0 GA:** after alpha.3 soaks against delegance-app for 2+ real feature pipelines.
28
+
29
+ ## [4.3.1] — 2026-04-24
30
+
31
+ ### Fixed (from external cold-start review)
32
+ - **`parseReviewOutput` silent failure** — regex required literal `### [CRITICAL]` brackets and returned zero findings when the LLM emitted `### CRITICAL`, `### **CRITICAL**`, or `### **[CRITICAL]**` (all common Llama/GPT variants). `src/adapters/review-engine/parse-output.ts` now accepts all four formats and logs a warning when raw output is non-empty but no findings parse, so format drift never silently hides bugs again.
33
+ - **Pipeline short-circuit skipped LLM review** — `src/core/pipeline/run.ts` returned early on static-rules `fail`, meaning the LLM never ran on the code that most needed it (IDOR, TOCTOU, CORS, off-by-one, rate-limit gaps typically ride alongside a static-flagged issue). New default: review runs even on static-fail. Legacy behavior restored via `pipeline.runReviewOnStaticFail: false` in config.
34
+ - **`doctor` / `preflight` ignored 3 of 5 LLM keys** — only checked `ANTHROPIC_API_KEY` and `OPENAI_API_KEY`, so users with `GROQ_API_KEY`/`GEMINI_API_KEY`/`GOOGLE_API_KEY` set saw "No LLM API key" right after `setup` reported "detected." New shared helper `src/core/detect/llm-key.ts` is the single source of truth used by setup, scan, run, and preflight.
35
+ - **Stack detector mislabeled plain Next.js as "Next.js + Supabase"** — now requires actual Supabase signals (`@supabase/supabase-js`, `@supabase/ssr`, `@supabase/auth-helpers-nextjs`, `supabase/config.toml`, or `SUPABASE_*` env vars). Vanilla Next.js still uses the `nextjs-supabase` preset as a fallback but the evidence string and setup output make the fallback explicit.
36
+ - **`--profile team` missing security rules** — added `package-lock-sync`, `ssrf`, `insecure-redirect` to match the README's advertised coverage.
37
+
38
+ ### Added
39
+ - `src/core/detect/llm-key.ts` — `detectLLMKey()`, `LLM_KEY_NAMES`, `LLM_KEY_HINTS`, `loadEnvFile()`.
40
+ - `GuardrailConfig.pipeline.runReviewOnStaticFail` / `runReviewOnTestFail` config flags.
41
+ - 6 parser format-variation tests covering all documented markdown variants plus the silent-drift warning path.
42
+
3
43
  ## [2.5.0] — 2026-04-22
4
44
 
5
45
  ### Added
package/README.md CHANGED
@@ -1,148 +1,191 @@
1
1
  # @delegance/claude-autopilot
2
2
 
3
- Automated code review pipeline for Claude Code. Runs static rules, an optional LLM review engine, and impact-aware snapshot regression tests outputs SARIF for GitHub Code Scanning, inline PR annotations, and a pre-push hook for local enforcement.
4
-
5
- ## Install
3
+ **Autonomous development pipeline for Claude Code. Brainstorm spec plan implement migrate validate PR review merge all from your terminal, on your codebase, with your test suite.**
6
4
 
7
5
  ```bash
8
- npm install @delegance/claude-autopilot
6
+ claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
7
+ # → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
8
+ # → creates branch → implements with subagents → runs migrations →
9
+ # → runs full test + lint + type + security gate → opens PR →
10
+ # → dispatches multi-model review → auto-fixes bugbot findings →
11
+ # → ready to merge
9
12
  ```
10
13
 
11
- **Prerequisites:** Node 22+, [`gh` CLI](https://cli.github.com/) authenticated, [`claude` CLI](https://claude.ai/claude-code) (Claude Code).
12
-
13
- ## Claude Code Skill
14
-
15
- The package ships a ready-made Claude Code skill. After installing, copy it into your project:
14
+ *No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
16
15
 
17
- ```bash
18
- mkdir -p .claude/skills
19
- cp node_modules/@delegance/claude-autopilot/skills/autopilot.md .claude/skills/
20
- ```
16
+ ---
21
17
 
22
- Claude will then know when and how to invoke `autopilot run`, interpret findings, and wire it into your dev pipeline automatically.
18
+ ## Why this vs the alternatives
23
19
 
24
- ## Quick Start
25
-
26
- ```bash
27
- # One command — auto-detects project type, writes config, installs hook, runs doctor
28
- npx autopilot setup
29
-
30
- # Run your first pipeline
31
- npx autopilot run
32
- ```
20
+ AI coding tools fall into three buckets. Here's where claude-autopilot sits.
33
21
 
34
- `setup` detects your stack (Go, Rails, FastAPI, T3, Next.js+Supabase), infers your test command, writes `autopilot.config.yaml`, installs the pre-push hook, then runs `doctor` to show anything still missing.
22
+ | Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
23
+ |---|---|---|---|---|---|
24
+ | **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
25
+ | **GitHub Copilot Workspace** | Spec → plan → PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
26
+ | **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
27
+ | **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
28
+ | **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None — single-shot prompts | Continuous |
29
+ | **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
30
+ | **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
35
31
 
36
- ## Commands
32
+ The architectural differences that matter most in practice:
37
33
 
38
- ### `autopilot setup`
34
+ 1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
35
+ 2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
36
+ 3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
37
+ 4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
39
38
 
40
- Zero-prompt setup. Auto-detects project type and configures everything.
39
+ ## 30-second quickstart
41
40
 
42
41
  ```bash
43
- npx autopilot setup # detect, write config, install hook
44
- npx autopilot setup --force # overwrite existing autopilot.config.yaml
45
- ```
42
+ # Install
43
+ npm install -g @delegance/claude-autopilot
46
44
 
47
- ### `autopilot doctor`
45
+ # One-shot setup — detects stack, writes config, installs skills, sets hooks
46
+ npx claude-autopilot init
48
47
 
49
- Checks prerequisites. Runs automatically after `setup` — also useful any time `run` behaves unexpectedly.
48
+ # Ship a feature end-to-end
49
+ claude-autopilot brainstorm "add rate limiting to the public API"
50
+ # Answer ~5 questions. Spec written. Codex reviews it. You approve.
51
+ # Claude walks the plan → implementation → migration → tests → PR → review.
52
+ # ~15-40 min for a typical feature.
50
53
 
51
- ```bash
52
- npx autopilot doctor
54
+ # Or run just the review layer on an existing PR
55
+ claude-autopilot run --pr 123
53
56
  ```
54
57
 
55
- Verifies: Node 22+, tsx, `gh` CLI auth, `claude` CLI, `OPENAI_API_KEY`, git user config, superpowers plugin. Exits 1 if blockers found. `autopilot preflight` is an alias.
58
+ ## The pipeline, phase by phase
56
59
 
57
- ### `autopilot run`
60
+ Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
58
61
 
59
- Runs the pipeline on git-changed files.
60
-
61
- ```bash
62
- npx autopilot run # diff against HEAD~1
63
- npx autopilot run --base main # diff against main
64
- npx autopilot run --files src/foo.ts # explicit file list
65
- npx autopilot run --format sarif --output results.sarif
66
- npx autopilot run --dry-run
67
- ```
62
+ | Phase | Skill | What it does | Model role |
63
+ |---|---|---|---|
64
+ | **Brainstorm** | `brainstorming` | Turns a rough idea into an approved spec through guided questions | Claude (implementation model) |
65
+ | **Spec review** | `codex-review` | Second model critiques the spec before you commit to it | Codex / GPT-5 |
66
+ | **Plan** | `writing-plans` | Breaks spec into phased, checklist-shaped implementation plan | Claude |
67
+ | **Plan review** | `codex-review` | Second model critiques the plan before you execute it | Codex / GPT-5 |
68
+ | **Implement** | `subagent-driven-development` | Executes plan in a git worktree, one phase at a time, with per-phase tests | Claude |
69
+ | **Migrate** | `migrate` | Runs database migrations dev → QA → prod with per-env validation | Deterministic |
70
+ | **Validate** | `validate` | Static rules + tests + type check + security scan + LLM review | Any |
71
+ | **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
72
+ | **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
73
+ | **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
68
74
 
69
- ### `autopilot watch`
75
+ ## What's distinctive
70
76
 
71
- Re-runs on every file save.
77
+ Features that are hard or impossible to find in the competitive set:
72
78
 
73
- ```bash
74
- npx autopilot watch
75
- npx autopilot watch --debounce 500
76
- ```
79
+ - **Multi-model council review** — dispatch the same diff to 3+ models in parallel, synthesize agreement. Catches blind spots no single model sees.
80
+ - **Fix with test verification** — `claude-autopilot fix` runs your full test suite after every patch and reverts on failure. Safer than any tool that proposes fixes without running your tests.
81
+ - **Bug-bot auto-triage** watches Cursor BugBot / Copilot comments on your PR, triages each (real bug vs false positive), auto-fixes confirmed bugs, dismisses noise with explanations.
82
+ - **Schema alignment rule** — ensures DB migrations, backend types, and frontend types stay in sync. Custom static rule, not something any competitor ships.
83
+ - **SARIF output + GitHub Code Scanning integration** — findings appear as annotations in the PR and in the Security tab.
77
84
 
78
- ### `autopilot autoregress`
85
+ ## Just the review layer
79
86
 
80
- Impact-aware snapshot regression tests. Only fires snapshots whose source modules were touched by the current branch.
87
+ If you don't want the full pipeline, the review subcommands are a strict superset of what `guardrail run` used to do: LLM code review over git-changed files, SARIF output, inline PR comments, auto-fix, baselines, per-finding triage, cost budgets. The legacy `guardrail` CLI remains aliased to the review subcommands through v5.x.
81
88
 
82
89
  ```bash
83
- npx autopilot autoregress run # impact-selected (default)
84
- npx autopilot autoregress run --all
85
- npx autopilot autoregress diff # show JSON diffs vs baselines
86
- npx autopilot autoregress update # overwrite baselines
87
- npx autopilot autoregress generate # LLM-generate snapshot tests for changed files
88
- npx autopilot autoregress generate --files src/foo.ts,src/bar.ts
90
+ claude-autopilot run # review changes since main
91
+ claude-autopilot run --inline-comments # post per-line PR annotations
92
+ claude-autopilot run --format sarif --output out.sarif
93
+ claude-autopilot fix --verify # LLM patch + test gate + revert on fail
89
94
  ```
90
95
 
91
- `generate` requires `OPENAI_API_KEY`.
92
-
93
- ### `autopilot hook`
96
+ > **Alpha.1 CLI note:** subcommands are flat (`run`, `scan`, `ci`, `fix`, `baseline`, `explain`, …). The grouped `claude-autopilot review <verb>` form lands in alpha.2 as an alias — flat forms continue to work indefinitely.
94
97
 
95
- Manages the `pre-push` git hook.
98
+ ## Install & requirements
96
99
 
97
100
  ```bash
98
- npx autopilot hook install # write .git/hooks/pre-push
99
- npx autopilot hook install --force # overwrite existing
100
- npx autopilot hook uninstall
101
- npx autopilot hook status
101
+ npm install -g @delegance/claude-autopilot
102
102
  ```
103
103
 
104
- Works in git worktrees.
104
+ - Node 22+
105
+ - `gh` CLI (for PR phases)
106
+ - One of: `ANTHROPIC_API_KEY` (recommended), `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `GROQ_API_KEY`
107
+ - Claude Code CLI (for skill-based phases — pipeline falls back to direct CLI invocations without it, but loses interactive checkpoints)
105
108
 
106
- ### `autopilot init`
109
+ ---
107
110
 
108
- Interactive preset picker — for when you want to choose a preset manually instead of using `setup`.
111
+ ---
109
112
 
110
- ```bash
111
- npx autopilot init
112
- ```
113
-
114
- Presets: `nextjs-supabase`, `t3`, `python-fastapi`, `rails-postgres`, `go`.
115
-
116
- ## Config (`autopilot.config.yaml`)
113
+ ## Config (`guardrail.config.yaml`)
117
114
 
118
115
  ```yaml
119
116
  configVersion: 1
120
117
  reviewEngine:
121
- adapter: auto # auto-detects best available key at runtime
122
- testCommand: npm test
118
+ adapter: auto # auto-selects best available key at runtime
119
+ testCommand: npm test # null to disable; used by `fix` verified mode
120
+
123
121
  protectedPaths:
124
- - src/core/**
125
122
  - data/deltas/**
123
+ - .github/workflows/**
124
+
126
125
  staticRules:
127
- - hardcoded-secrets
126
+ - hardcoded-secrets # Anthropic, OpenAI, Stripe, GitHub, Supabase, Twilio, SendGrid
128
127
  - npm-audit
128
+ - sql-injection # template literals / concatenation in SQL context
129
+ - missing-auth # Next.js/pages API routes with POST/PUT/DELETE, no auth pattern
130
+ - ssrf # HTTP calls with user-controlled URL
131
+ - insecure-redirect # redirect() with user-controlled target
132
+ - console-log
133
+ - todo-fixme
134
+ - large-file
135
+ - missing-tests
136
+ - package-lock-sync
137
+ - brand-tokens # opt-in: requires brand: block below
138
+
139
+ # Brand token enforcement (opt-in — omit to disable)
140
+ brand:
141
+ colorsFrom: tailwind.config.ts # auto-extract theme.colors as canonical palette
142
+ colors: # explicit palette entries (merged with colorsFrom)
143
+ - '#f97316'
144
+ - '#1a1f3a'
145
+ fonts:
146
+ - 'Inter'
147
+ - 'Geist'
148
+
149
+ policy:
150
+ failOn: critical # critical (default) | warning | note | none
151
+ newOnly: false # true = suppress findings present in .guardrail-baseline.json
152
+
153
+ cost:
154
+ maxPerRun: 0.50 # abort review phase if spend exceeds $0.50
155
+ estimateBeforeRun: false # print token estimate before LLM calls
156
+
157
+ ignore:
158
+ - src/legacy/** # suppress all findings in path
159
+ - { rule: console-log, path: scripts/** } # suppress specific rule in path
160
+
161
+ chunking:
162
+ rateLimitBackoff: exp # exp (default) | linear | none
163
+ parallelism: 3
129
164
  ```
130
165
 
131
- Full schema and preset defaults: `presets/<name>/autopilot.config.yaml`.
166
+ ### Setup Profiles
167
+
168
+ `guardrail setup --profile <name>` overlays a pre-baked rule + policy configuration on top of the detected stack preset:
169
+
170
+ | Profile | Rules | `failOn` | Best for |
171
+ |---|---|---|---|
172
+ | `security-strict` | All security rules + hygiene | `warning` | Security audits, regulated environments |
173
+ | `team` | Core security + hygiene | `critical` | Standard CI/CD on shared branches |
174
+ | `solo` | Hygiene only | `critical` | Solo projects, low-noise baseline |
132
175
 
133
176
  ### Review Engine Adapters
134
177
 
135
178
  | Adapter | Key required | Notes |
136
179
  |---|---|---|
137
- | `auto` | any below | Auto-selects best available (recommended) |
138
- | `claude` | `ANTHROPIC_API_KEY` | Opus 4.7 default |
180
+ | `auto` | any | Auto-selects best available (recommended) |
181
+ | `claude` | `ANTHROPIC_API_KEY` | Claude Opus 4.7 |
139
182
  | `gemini` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Gemini 2.5 Pro, 1M context |
140
183
  | `codex` | `OPENAI_API_KEY` | GPT-5 Codex |
141
184
  | `openai-compatible` | configurable | Groq, Ollama, Together AI, etc. |
142
185
 
143
186
  `auto` priority: Anthropic → Gemini → OpenAI → Groq.
144
187
 
145
- **Groq example:**
188
+ **Groq (fast/free tier):**
146
189
  ```yaml
147
190
  reviewEngine:
148
191
  adapter: openai-compatible
@@ -161,44 +204,57 @@ reviewEngine:
161
204
  baseUrl: http://localhost:11434/v1
162
205
  ```
163
206
 
207
+ ---
208
+
164
209
  ## GitHub Actions
165
210
 
166
211
  ```yaml
167
212
  - uses: axledbetter/claude-autopilot/.github/actions/ci@main
168
213
  with:
169
- openai-api-key: ${{ secrets.OPENAI_API_KEY }}
214
+ anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
215
+ # Optional:
216
+ # post-comments: 'true'
217
+ # inline-comments: 'false'
218
+ # base-ref: 'main'
219
+ # sarif-output: 'guardrail.sarif'
220
+ # version: 'latest'
170
221
  ```
171
222
 
172
223
  Runs the pipeline, uploads SARIF to GitHub Code Scanning, annotates the PR diff inline.
173
224
 
174
- ## SARIF Output
225
+ ---
175
226
 
176
- ```bash
177
- npx autopilot run --format sarif --output autopilot.sarif
178
- ```
227
+ ## Typical Team Workflow
179
228
 
180
- Compatible with `github/codeql-action/upload-sarif@v3`.
229
+ ```bash
230
+ # 1. First run — establish a baseline so CI only fails on new issues
231
+ npx guardrail run --base main
232
+ npx guardrail baseline create --note "post-v2 audit"
233
+ git add .guardrail-baseline.json && git commit -m "chore: guardrail baseline"
181
234
 
182
- ## Snapshot Regression Testing
235
+ # 2. CI — only new findings block the build
236
+ npx guardrail ci --new-only --fail-on critical
183
237
 
184
- After each feature lands:
238
+ # 3. Triage false positives once, never see them again
239
+ npx guardrail triage sql-injection:src/db/raw.ts:47 false-positive --reason "internal admin only"
240
+ git add .guardrail-triage.json && git commit -m "chore: triage false positive"
185
241
 
186
- ```bash
187
- npx autopilot autoregress generate # generate baselines for changed files
242
+ # 4. Auto-fix and verify
243
+ npx guardrail fix --yes # applies patches + runs tests, reverts on failure
188
244
  ```
189
245
 
190
- Future PRs automatically fail if covered behavior diverges. The impact selector uses `git merge-base` diff + one-hop import graph expansion — only relevant snapshots run, keeping CI fast.
246
+ ---
191
247
 
192
- High-impact paths (`src/core/pipeline/**`, `src/adapters/**`, `src/core/findings/**`, `src/core/config/**`) always trigger a full run.
248
+ ## Interpreting Results
193
249
 
194
- ## Public API
250
+ **Exit 0** — pass or warnings only (at current `policy.failOn` threshold). Safe to merge.
251
+ **Exit 1** — findings at or above threshold. Fix before merging.
195
252
 
196
- ```typescript
197
- import type { Finding, RunResult, AutopilotConfig } from '@delegance/claude-autopilot';
198
- import { normalizeSnapshot } from '@delegance/claude-autopilot';
199
- ```
253
+ Findings: `critical` blocks merge · `warning` should fix · `note` informational.
200
254
 
201
- Types are available for TypeScript consumers. Runtime import requires a tsx-aware bundler (the package ships TypeScript source).
255
+ PR comments show: status badge, phase table, critical/warning findings with inline links, cost footer. Re-runs update the existing comment in place.
256
+
257
+ ---
202
258
 
203
259
  ## Architecture
204
260
 
@@ -206,11 +262,13 @@ Four pluggable adapter points:
206
262
 
207
263
  | Point | Built-in | Purpose |
208
264
  |---|---|---|
209
- | `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM code review |
210
- | `vcs-host` | `github` | PR comments + SARIF upload |
211
- | `migration-runner` | `supabase` | DB migration execution |
265
+ | `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM review |
266
+ | `vcs-host` | `github` | PR comments + SARIF |
267
+ | `migration-runner` | `supabase` | DB migrations |
212
268
  | `review-bot-parser` | `cursor` | Parse review bot comments |
213
269
 
270
+ **Monorepo:** Auto-detects npm/yarn/pnpm workspaces, Turborepo, and Nx.
271
+
214
272
  ## License
215
273
 
216
274
  MIT
@@ -0,0 +1,77 @@
1
+ // Shared launcher for both `claude-autopilot` and `guardrail` bins.
2
+ // Imported, not a bin itself. Resolves tsx, spawns src/cli/index.ts with
3
+ // the caller's argv, forwards stdio, exits with the child's status.
4
+
5
+ import { fileURLToPath } from 'node:url';
6
+ import { spawnSync } from 'node:child_process';
7
+ import * as fs from 'node:fs';
8
+ import * as path from 'node:path';
9
+ import * as os from 'node:os';
10
+
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ const ENTRYPOINT = path.resolve(__dirname, '..', 'src', 'cli', 'index.ts');
13
+
14
+ function findTsx() {
15
+ const own = path.resolve(__dirname, '..', 'node_modules', '.bin', 'tsx');
16
+ if (fs.existsSync(own)) return own;
17
+ const consumer = path.resolve(__dirname, '..', '..', '..', '.bin', 'tsx');
18
+ if (fs.existsSync(consumer)) return consumer;
19
+ return 'tsx';
20
+ }
21
+
22
+ // Tracks per-terminal-session whether the deprecation notice has been shown.
23
+ // Uses a temp file keyed by parent PID + stderr's tty so parallel CI jobs don't
24
+ // collide. Falls back to always-emit if the stamp can't be written.
25
+ const DEPRECATION_STAMP_DIR = path.join(os.tmpdir(), 'claude-autopilot');
26
+ function hasShownDeprecation() {
27
+ try {
28
+ if (!fs.existsSync(DEPRECATION_STAMP_DIR)) {
29
+ fs.mkdirSync(DEPRECATION_STAMP_DIR, { recursive: true });
30
+ }
31
+ const key = `${process.ppid}-${process.stderr.isTTY ? 'tty' : 'pipe'}.stamp`;
32
+ const stampPath = path.join(DEPRECATION_STAMP_DIR, key);
33
+ if (fs.existsSync(stampPath)) return true;
34
+ fs.writeFileSync(stampPath, String(Date.now()));
35
+ // Best-effort cleanup of stamps older than 1h to keep tmpdir tidy.
36
+ const cutoff = Date.now() - 60 * 60 * 1000;
37
+ for (const f of fs.readdirSync(DEPRECATION_STAMP_DIR)) {
38
+ const p = path.join(DEPRECATION_STAMP_DIR, f);
39
+ try {
40
+ if (fs.statSync(p).mtimeMs < cutoff) fs.unlinkSync(p);
41
+ } catch { /* ignore */ }
42
+ }
43
+ return false;
44
+ } catch {
45
+ return false;
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Decide whether to emit the deprecation notice. Order:
51
+ * CLAUDE_AUTOPILOT_DEPRECATION=never → never emit (CI/automation)
52
+ * CLAUDE_AUTOPILOT_DEPRECATION=always → always emit (deterministic testing)
53
+ * otherwise → once per terminal session (stamp-based)
54
+ */
55
+ function shouldEmitDeprecation() {
56
+ const override = process.env.CLAUDE_AUTOPILOT_DEPRECATION;
57
+ if (override === 'never') return false;
58
+ if (override === 'always') return true;
59
+ return !hasShownDeprecation();
60
+ }
61
+
62
+ /**
63
+ * Launch the CLI with `argv` passed through verbatim.
64
+ * @param {{ name: 'claude-autopilot' | 'guardrail' }} opts
65
+ */
66
+ export function launch(opts) {
67
+ if (opts.name === 'guardrail' && shouldEmitDeprecation()) {
68
+ process.stderr.write(
69
+ '\x1b[33m[deprecated]\x1b[0m `guardrail` CLI is renamed to `claude-autopilot`. ' +
70
+ 'The `guardrail` alias works through v5.x and will be removed in v6. ' +
71
+ 'Migration guide: https://github.com/axledbetter/claude-autopilot/blob/master/docs/migration/v4-to-v5.md\n' +
72
+ 'Silence: set CLAUDE_AUTOPILOT_DEPRECATION=never\n',
73
+ );
74
+ }
75
+ const result = spawnSync(findTsx(), [ENTRYPOINT, ...process.argv.slice(2)], { stdio: 'inherit' });
76
+ process.exit(result.status ?? 1);
77
+ }
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import { launch } from './_launcher.js';
3
+ launch({ name: 'claude-autopilot' });
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import { launch } from './_launcher.js';
3
+ launch({ name: 'guardrail' });
package/package.json CHANGED
@@ -1,15 +1,18 @@
1
1
  {
2
2
  "name": "@delegance/claude-autopilot",
3
- "version": "2.5.0",
3
+ "version": "5.0.0-alpha.1",
4
4
  "type": "module",
5
- "description": "Claude Code automation pipeline: spec → plan → implement → validate → PR",
5
+ "description": "Autonomous development pipeline for Claude Code: brainstorm spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
6
6
  "keywords": [
7
- "claude",
7
+ "claude-autopilot",
8
8
  "autopilot",
9
- "ai",
10
- "pipeline",
9
+ "claude-code",
10
+ "ai-agent",
11
11
  "code-review",
12
- "cli"
12
+ "llm",
13
+ "sarif",
14
+ "cli",
15
+ "pipeline"
13
16
  ],
14
17
  "license": "MIT",
15
18
  "repository": {
@@ -20,7 +23,8 @@
20
23
  "node": ">=22.0.0"
21
24
  },
22
25
  "bin": {
23
- "autopilot": "bin/autopilot.js"
26
+ "claude-autopilot": "bin/claude-autopilot.js",
27
+ "guardrail": "bin/guardrail.js"
24
28
  },
25
29
  "types": "./src/index.ts",
26
30
  "exports": {
@@ -35,6 +39,7 @@
35
39
  "skills/",
36
40
  "scripts/test-runner.mjs",
37
41
  "scripts/autoregress.ts",
42
+ "scripts/snapshots/",
38
43
  "tests/snapshots/",
39
44
  "CHANGELOG.md"
40
45
  ],
@@ -47,6 +52,7 @@
47
52
  "dependencies": {
48
53
  "@anthropic-ai/sdk": "^0.90.0",
49
54
  "@google/generative-ai": "^0.24.1",
55
+ "@modelcontextprotocol/sdk": "^1.29.0",
50
56
  "ajv": "^8",
51
57
  "dotenv": ">=16",
52
58
  "js-yaml": "^4",
@@ -56,7 +62,7 @@
56
62
  },
57
63
  "devDependencies": {
58
64
  "@types/js-yaml": "^4",
59
- "@types/node": "^22",
60
- "typescript": "^5"
65
+ "@types/node": "^25",
66
+ "typescript": "^6"
61
67
  }
62
68
  }
@@ -0,0 +1,35 @@
1
+ configVersion: 1
2
+ reviewEngine: { adapter: auto }
3
+ vcsHost: { adapter: github }
4
+ reviewBot: { adapter: cursor }
5
+ # No migrationRunner — the migrate phase will no-op with a notice pointing
6
+ # to `.claude-autopilot/stack.yaml` for users who want to wire one up.
7
+ protectedPaths:
8
+ - "**/auth/**"
9
+ - "**/payment/**"
10
+ - "**/encryption/**"
11
+ - "**/secret/**"
12
+ - "**/keys/**"
13
+ staticRules:
14
+ - hardcoded-secrets
15
+ - npm-audit
16
+ - package-lock-sync
17
+ - sql-injection
18
+ - missing-auth
19
+ - ssrf
20
+ - insecure-redirect
21
+ policy:
22
+ failOn: critical
23
+ newOnly: false
24
+ thresholds:
25
+ bugbotAutoFix: 85
26
+ bugbotProposePatch: 60
27
+ maxValidateRetries: 3
28
+ reviewStrategy: auto
29
+ chunking:
30
+ smallTierMaxTokens: 8000
31
+ partialReviewTokens: 60000
32
+ perFileMaxTokens: 32000
33
+ pipeline:
34
+ runReviewOnStaticFail: true
35
+ runReviewOnTestFail: false
@@ -0,0 +1,40 @@
1
+ A generic project with no strong framework signals detected.
2
+
3
+ This preset makes **no assumptions** about:
4
+ - Database engine or migration runner
5
+ - Type generation
6
+ - Test framework (uses whatever `npm test` / `npm run typecheck` / `npm run lint` find)
7
+ - Deployment target
8
+
9
+ It enables the core security rules that apply to most codebases — hardcoded secrets, npm audit, SQL injection patterns, missing auth checks, SSRF, insecure redirects.
10
+
11
+ ## What's disabled vs stack-specific presets
12
+
13
+ - `supabase-rls-bypass` rule (Supabase-only)
14
+ - `schema-alignment` rule (requires declared migration paths)
15
+ - `migrate` phase of the pipeline no-ops with a notice
16
+
17
+ ## Wiring up migrations
18
+
19
+ If your project uses migrations, create `.claude-autopilot/stack.yaml` with:
20
+
21
+ ```yaml
22
+ migrate:
23
+ command: "prisma migrate dev" # or flyway, dbmate, tbls, golang-migrate, etc.
24
+ environments: [dev, staging, prod]
25
+ typeGeneration:
26
+ command: "prisma generate"
27
+ path: "node_modules/.prisma/client"
28
+ ```
29
+
30
+ Or pick a stack-specific preset at setup time: `claude-autopilot init --preset nextjs-supabase`.
31
+
32
+ ## Things that should flag CRITICAL (universal)
33
+
34
+ - Secrets committed to code or history
35
+ - SQL string concatenation with user input
36
+ - POST endpoints without auth checks
37
+ - SSRF via user-controlled URLs in `fetch` / `axios`
38
+ - Open redirects (user-controlled `Location` header)
39
+ - Dynamic code evaluation (`eval`, `Function` constructor) with user input
40
+ - Shell command construction with user input