@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +164 -106
- package/bin/_launcher.js +77 -0
- package/bin/claude-autopilot.js +3 -0
- package/bin/guardrail.js +3 -0
- package/package.json +15 -9
- package/presets/generic/guardrail.config.yaml +35 -0
- package/presets/generic/stack.md +40 -0
- package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
- package/scripts/autoregress.ts +27 -11
- package/skills/autopilot/SKILL.md +170 -0
- package/skills/claude-autopilot.md +80 -0
- package/skills/guardrail.md +39 -0
- package/skills/migrate/SKILL.md +83 -0
- package/src/adapters/council/claude.ts +41 -0
- package/src/adapters/council/openai.ts +40 -0
- package/src/adapters/council/types.ts +7 -0
- package/src/adapters/loader.ts +7 -7
- package/src/adapters/review-engine/auto.ts +2 -2
- package/src/adapters/review-engine/claude.ts +9 -11
- package/src/adapters/review-engine/codex.ts +9 -11
- package/src/adapters/review-engine/gemini.ts +9 -11
- package/src/adapters/review-engine/openai-compatible.ts +10 -12
- package/src/adapters/review-engine/parse-output.ts +32 -6
- package/src/adapters/review-engine/prompt-builder.ts +19 -0
- package/src/adapters/review-engine/types.ts +1 -1
- package/src/adapters/vcs-host/commit-status.ts +39 -0
- package/src/adapters/vcs-host/github.ts +2 -2
- package/src/cli/baseline.ts +125 -0
- package/src/cli/ci.ts +11 -8
- package/src/cli/costs.ts +2 -2
- package/src/cli/council.ts +96 -0
- package/src/cli/detector.ts +21 -5
- package/src/cli/explain.ts +197 -0
- package/src/cli/fix.ts +173 -111
- package/src/cli/hook.ts +72 -27
- package/src/cli/ignore-helper.ts +116 -0
- package/src/cli/index.ts +272 -31
- package/src/cli/init.ts +12 -12
- package/src/cli/lsp.ts +200 -0
- package/src/cli/mcp.ts +206 -0
- package/src/cli/pr-comment.ts +5 -5
- package/src/cli/pr-desc.ts +168 -0
- package/src/cli/pr-review-comments.ts +3 -3
- package/src/cli/pr.ts +76 -0
- package/src/cli/preflight.ts +15 -32
- package/src/cli/report.ts +186 -0
- package/src/cli/run.ts +140 -36
- package/src/cli/scan.ts +233 -0
- package/src/cli/setup.ts +121 -15
- package/src/cli/test-gen.ts +125 -0
- package/src/cli/triage.ts +137 -0
- package/src/cli/watch.ts +52 -31
- package/src/cli/worker.ts +109 -0
- package/src/core/cache/review-cache.ts +2 -2
- package/src/core/chunking/index.ts +2 -2
- package/src/core/config/loader.ts +10 -10
- package/src/core/config/preset-resolver.ts +6 -6
- package/src/core/config/schema.ts +103 -2
- package/src/core/config/types.ts +57 -2
- package/src/core/council/config.ts +71 -0
- package/src/core/council/context.ts +17 -0
- package/src/core/council/runner.ts +83 -0
- package/src/core/council/types.ts +45 -0
- package/src/core/detect/llm-key.ts +89 -0
- package/src/core/detect/workspaces.ts +103 -0
- package/src/core/errors.ts +4 -4
- package/src/core/fix/generator.ts +149 -0
- package/src/core/ignore/index.ts +4 -4
- package/src/core/mcp/concurrency.ts +16 -0
- package/src/core/mcp/handlers/fix-finding.ts +126 -0
- package/src/core/mcp/handlers/get-capabilities.ts +62 -0
- package/src/core/mcp/handlers/get-findings.ts +36 -0
- package/src/core/mcp/handlers/review-diff.ts +65 -0
- package/src/core/mcp/handlers/scan-files.ts +65 -0
- package/src/core/mcp/handlers/validate-fix.ts +41 -0
- package/src/core/mcp/run-store.ts +85 -0
- package/src/core/mcp/workspace.ts +35 -0
- package/src/core/persist/baseline.ts +112 -0
- package/src/core/persist/cost-log.ts +1 -1
- package/src/core/persist/findings-cache.ts +1 -1
- package/src/core/persist/triage.ts +112 -0
- package/src/core/phases/static-rules.ts +18 -5
- package/src/core/pipeline/review-phase.ts +65 -26
- package/src/core/pipeline/run.ts +42 -10
- package/src/core/runtime/lock.ts +2 -2
- package/src/core/runtime/state.ts +2 -2
- package/src/core/schema-alignment/detector.ts +59 -0
- package/src/core/schema-alignment/extractor/index.ts +24 -0
- package/src/core/schema-alignment/extractor/prisma.ts +21 -0
- package/src/core/schema-alignment/extractor/sql.ts +99 -0
- package/src/core/schema-alignment/llm-check.ts +91 -0
- package/src/core/schema-alignment/scanner.ts +107 -0
- package/src/core/schema-alignment/types.ts +43 -0
- package/src/core/shell.ts +3 -3
- package/src/core/static-rules/registry.ts +17 -8
- package/src/core/static-rules/rules/brand-tokens.ts +145 -0
- package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
- package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
- package/src/core/static-rules/rules/missing-auth.ts +70 -0
- package/src/core/static-rules/rules/schema-alignment.ts +132 -0
- package/src/core/static-rules/rules/sql-injection.ts +71 -0
- package/src/core/static-rules/rules/ssrf.ts +63 -0
- package/src/core/static-rules/tailwind-extractor.ts +38 -0
- package/src/core/test-gen/coverage-analyzer.ts +93 -0
- package/src/core/test-gen/framework-detector.ts +21 -0
- package/src/core/test-gen/test-writer.ts +33 -0
- package/src/core/ui/design-context-loader.ts +87 -0
- package/src/core/worker/client.ts +46 -0
- package/src/core/worker/lockfile.ts +38 -0
- package/src/core/worker/server.ts +81 -0
- package/src/formatters/junit.ts +52 -0
- package/src/formatters/sarif.ts +2 -2
- package/src/index.ts +1 -2
- package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
- package/tests/snapshots/index.json +3 -3
- package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
- package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
- package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
- package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
- package/bin/autopilot.js +0 -20
- package/skills/autopilot.md +0 -157
- /package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/{src → scripts}/snapshots/impact-selector.ts +0 -0
- /package/{src → scripts}/snapshots/import-scanner.ts +0 -0
- /package/{src → scripts}/snapshots/serializer.ts +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,45 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [5.0.0-alpha.1] — 2026-04-24
|
|
4
|
+
|
|
5
|
+
**Package renamed: `@delegance/guardrail` → `@delegance/claude-autopilot`.**
|
|
6
|
+
|
|
7
|
+
The v4 product sold itself as "LLM code review." The real product is an end-to-end autonomous development pipeline built on Claude Code skills — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. This alpha corrects the identity mismatch without breaking any v4 usage.
|
|
8
|
+
|
|
9
|
+
Every v4 invocation continues to work through v5.x via the preserved `guardrail` CLI alias. Migration guide: `docs/migration/v4-to-v5.md`.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **`claude-autopilot` CLI binary** — primary entrypoint (`bin/claude-autopilot.js`), co-installed with `guardrail`.
|
|
13
|
+
- **Pipeline skills bundled in the tarball** — `skills/claude-autopilot.md` (agent-loop spec), `skills/autopilot/`, `skills/migrate/`. v4.3.1 shipped only `skills/guardrail.md`; the pipeline skills existed only in-repo and weren't distributed.
|
|
14
|
+
- **`generic` preset** — no DB migration runner, uses `npm test` / `npm run typecheck` / `npm run lint` where present. Picked by `detectProject()` as the fallback when no stack signals are found (replaces the v4 behavior of claiming `nextjs-supabase` with low confidence).
|
|
15
|
+
- **v5 migration guide at `docs/migration/v4-to-v5.md`** — find/replace patterns for `package.json`, shell scripts, GitHub Actions yaml, Dockerfiles, and Claude Code skills.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- **Stack detector fallback:** plain Next.js with no Supabase signals now returns `generic`, not `nextjs-supabase (low confidence)`. Fixes the cold-start eval reviewer finding.
|
|
19
|
+
- **`PRESET_LABELS` in `setup.ts`:** adds `generic` entry.
|
|
20
|
+
- **Detector tests:** updated to assert the new `generic` fallback behavior.
|
|
21
|
+
- **`skills/guardrail.md`:** rewritten as a back-compat alias pointing at `skills/claude-autopilot.md`.
|
|
22
|
+
- **`bin/guardrail.js`:** emits a one-line deprecation notice on `stderr` on first invocation per terminal session, then forwards unchanged.
|
|
23
|
+
|
|
24
|
+
### Deferred to later alphas
|
|
25
|
+
- **alpha.2:** full CLI verb restructure (`claude-autopilot {review,pr,triage,advanced,…}`), v4 compatibility golden-test matrix, superpowers peer-dep hard-fail in `doctor`.
|
|
26
|
+
- **alpha.3:** tombstone `@delegance/guardrail@5.0.0` publish, CI smoke tests for `npx guardrail` / `npx @delegance/guardrail` / global install / GitHub Actions parity, codemod script for find/replace migration.
|
|
27
|
+
- **5.0.0 GA:** after alpha.3 soaks against delegance-app for 2+ real feature pipelines.
|
|
28
|
+
|
|
29
|
+
## [4.3.1] — 2026-04-24
|
|
30
|
+
|
|
31
|
+
### Fixed (from external cold-start review)
|
|
32
|
+
- **`parseReviewOutput` silent failure** — regex required literal `### [CRITICAL]` brackets and returned zero findings when the LLM emitted `### CRITICAL`, `### **CRITICAL**`, or `### **[CRITICAL]**` (all common Llama/GPT variants). `src/adapters/review-engine/parse-output.ts` now accepts all four formats and logs a warning when raw output is non-empty but no findings parse, so format drift never silently hides bugs again.
|
|
33
|
+
- **Pipeline short-circuit skipped LLM review** — `src/core/pipeline/run.ts` returned early on static-rules `fail`, meaning the LLM never ran on the code that most needed it (IDOR, TOCTOU, CORS, off-by-one, rate-limit gaps typically ride alongside a static-flagged issue). New default: review runs even on static-fail. Legacy behavior restored via `pipeline.runReviewOnStaticFail: false` in config.
|
|
34
|
+
- **`doctor` / `preflight` ignored 3 of 5 LLM keys** — only checked `ANTHROPIC_API_KEY` and `OPENAI_API_KEY`, so users with `GROQ_API_KEY`/`GEMINI_API_KEY`/`GOOGLE_API_KEY` set saw "No LLM API key" right after `setup` reported "detected." New shared helper `src/core/detect/llm-key.ts` is the single source of truth used by setup, scan, run, and preflight.
|
|
35
|
+
- **Stack detector mislabeled plain Next.js as "Next.js + Supabase"** — now requires actual Supabase signals (`@supabase/supabase-js`, `@supabase/ssr`, `@supabase/auth-helpers-nextjs`, `supabase/config.toml`, or `SUPABASE_*` env vars). Vanilla Next.js still uses the `nextjs-supabase` preset as a fallback but the evidence string and setup output make the fallback explicit.
|
|
36
|
+
- **`--profile team` missing security rules** — added `package-lock-sync`, `ssrf`, `insecure-redirect` to match the README's advertised coverage.
|
|
37
|
+
|
|
38
|
+
### Added
|
|
39
|
+
- `src/core/detect/llm-key.ts` — `detectLLMKey()`, `LLM_KEY_NAMES`, `LLM_KEY_HINTS`, `loadEnvFile()`.
|
|
40
|
+
- `GuardrailConfig.pipeline.runReviewOnStaticFail` / `runReviewOnTestFail` config flags.
|
|
41
|
+
- 6 parser format-variation tests covering all documented markdown variants plus the silent-drift warning path.
|
|
42
|
+
|
|
3
43
|
## [2.5.0] — 2026-04-22
|
|
4
44
|
|
|
5
45
|
### Added
|
package/README.md
CHANGED
|
@@ -1,148 +1,191 @@
|
|
|
1
1
|
# @delegance/claude-autopilot
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
## Install
|
|
3
|
+
**Autonomous development pipeline for Claude Code. Brainstorm → spec → plan → implement → migrate → validate → PR → review → merge — all from your terminal, on your codebase, with your test suite.**
|
|
6
4
|
|
|
7
5
|
```bash
|
|
8
|
-
|
|
6
|
+
claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
|
|
7
|
+
# → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
|
|
8
|
+
# → creates branch → implements with subagents → runs migrations →
|
|
9
|
+
# → runs full test + lint + type + security gate → opens PR →
|
|
10
|
+
# → dispatches multi-model review → auto-fixes bugbot findings →
|
|
11
|
+
# → ready to merge
|
|
9
12
|
```
|
|
10
13
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
## Claude Code Skill
|
|
14
|
-
|
|
15
|
-
The package ships a ready-made Claude Code skill. After installing, copy it into your project:
|
|
14
|
+
*No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
mkdir -p .claude/skills
|
|
19
|
-
cp node_modules/@delegance/claude-autopilot/skills/autopilot.md .claude/skills/
|
|
20
|
-
```
|
|
16
|
+
---
|
|
21
17
|
|
|
22
|
-
|
|
18
|
+
## Why this vs the alternatives
|
|
23
19
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
# One command — auto-detects project type, writes config, installs hook, runs doctor
|
|
28
|
-
npx autopilot setup
|
|
29
|
-
|
|
30
|
-
# Run your first pipeline
|
|
31
|
-
npx autopilot run
|
|
32
|
-
```
|
|
20
|
+
AI coding tools fall into three buckets. Here's where claude-autopilot sits.
|
|
33
21
|
|
|
34
|
-
|
|
22
|
+
| Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
|
|
23
|
+
|---|---|---|---|---|---|
|
|
24
|
+
| **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
|
|
25
|
+
| **GitHub Copilot Workspace** | Spec → plan → PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
|
|
26
|
+
| **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
|
|
27
|
+
| **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
|
|
28
|
+
| **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None — single-shot prompts | Continuous |
|
|
29
|
+
| **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
|
|
30
|
+
| **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
|
|
35
31
|
|
|
36
|
-
|
|
32
|
+
The architectural differences that matter most in practice:
|
|
37
33
|
|
|
38
|
-
|
|
34
|
+
1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
|
|
35
|
+
2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
|
|
36
|
+
3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
|
|
37
|
+
4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
|
|
39
38
|
|
|
40
|
-
|
|
39
|
+
## 30-second quickstart
|
|
41
40
|
|
|
42
41
|
```bash
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
```
|
|
42
|
+
# Install
|
|
43
|
+
npm install -g @delegance/claude-autopilot
|
|
46
44
|
|
|
47
|
-
|
|
45
|
+
# One-shot setup — detects stack, writes config, installs skills, sets hooks
|
|
46
|
+
npx claude-autopilot init
|
|
48
47
|
|
|
49
|
-
|
|
48
|
+
# Ship a feature end-to-end
|
|
49
|
+
claude-autopilot brainstorm "add rate limiting to the public API"
|
|
50
|
+
# Answer ~5 questions. Spec written. Codex reviews it. You approve.
|
|
51
|
+
# Claude walks the plan → implementation → migration → tests → PR → review.
|
|
52
|
+
# ~15-40 min for a typical feature.
|
|
50
53
|
|
|
51
|
-
|
|
52
|
-
|
|
54
|
+
# Or run just the review layer on an existing PR
|
|
55
|
+
claude-autopilot run --pr 123
|
|
53
56
|
```
|
|
54
57
|
|
|
55
|
-
|
|
58
|
+
## The pipeline, phase by phase
|
|
56
59
|
|
|
57
|
-
|
|
60
|
+
Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
|
|
58
61
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
| Phase | Skill | What it does | Model role |
|
|
63
|
+
|---|---|---|---|
|
|
64
|
+
| **Brainstorm** | `brainstorming` | Turns a rough idea into an approved spec through guided questions | Claude (implementation model) |
|
|
65
|
+
| **Spec review** | `codex-review` | Second model critiques the spec before you commit to it | Codex / GPT-5 |
|
|
66
|
+
| **Plan** | `writing-plans` | Breaks spec into phased, checklist-shaped implementation plan | Claude |
|
|
67
|
+
| **Plan review** | `codex-review` | Second model critiques the plan before you execute it | Codex / GPT-5 |
|
|
68
|
+
| **Implement** | `subagent-driven-development` | Executes plan in a git worktree, one phase at a time, with per-phase tests | Claude |
|
|
69
|
+
| **Migrate** | `migrate` | Runs database migrations dev → QA → prod with per-env validation | Deterministic |
|
|
70
|
+
| **Validate** | `validate` | Static rules + tests + type check + security scan + LLM review | Any |
|
|
71
|
+
| **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
|
|
72
|
+
| **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
|
|
73
|
+
| **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
|
|
68
74
|
|
|
69
|
-
|
|
75
|
+
## What's distinctive
|
|
70
76
|
|
|
71
|
-
|
|
77
|
+
Features that are hard or impossible to find in the competitive set:
|
|
72
78
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
79
|
+
- **Multi-model council review** — dispatch the same diff to 3+ models in parallel, synthesize agreement. Catches blind spots no single model sees.
|
|
80
|
+
- **Fix with test verification** — `claude-autopilot fix` runs your full test suite after every patch and reverts on failure. Safer than any tool that proposes fixes without running your tests.
|
|
81
|
+
- **Bug-bot auto-triage** — watches Cursor BugBot / Copilot comments on your PR, triages each (real bug vs false positive), auto-fixes confirmed bugs, dismisses noise with explanations.
|
|
82
|
+
- **Schema alignment rule** — ensures DB migrations, backend types, and frontend types stay in sync. Custom static rule, not something any competitor ships.
|
|
83
|
+
- **SARIF output + GitHub Code Scanning integration** — findings appear as annotations in the PR and in the Security tab.
|
|
77
84
|
|
|
78
|
-
|
|
85
|
+
## Just the review layer
|
|
79
86
|
|
|
80
|
-
|
|
87
|
+
If you don't want the full pipeline, the review subcommands are a strict superset of what `guardrail run` used to do: LLM code review over git-changed files, SARIF output, inline PR comments, auto-fix, baselines, per-finding triage, cost budgets. The legacy `guardrail` CLI remains aliased to the review subcommands through v5.x.
|
|
81
88
|
|
|
82
89
|
```bash
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
npx autopilot autoregress generate # LLM-generate snapshot tests for changed files
|
|
88
|
-
npx autopilot autoregress generate --files src/foo.ts,src/bar.ts
|
|
90
|
+
claude-autopilot run # review changes since main
|
|
91
|
+
claude-autopilot run --inline-comments # post per-line PR annotations
|
|
92
|
+
claude-autopilot run --format sarif --output out.sarif
|
|
93
|
+
claude-autopilot fix --verify # LLM patch + test gate + revert on fail
|
|
89
94
|
```
|
|
90
95
|
|
|
91
|
-
`
|
|
92
|
-
|
|
93
|
-
### `autopilot hook`
|
|
96
|
+
> **Alpha.1 CLI note:** subcommands are flat (`run`, `scan`, `ci`, `fix`, `baseline`, `explain`, …). The grouped `claude-autopilot review <verb>` form lands in alpha.2 as an alias — flat forms continue to work indefinitely.
|
|
94
97
|
|
|
95
|
-
|
|
98
|
+
## Install & requirements
|
|
96
99
|
|
|
97
100
|
```bash
|
|
98
|
-
|
|
99
|
-
npx autopilot hook install --force # overwrite existing
|
|
100
|
-
npx autopilot hook uninstall
|
|
101
|
-
npx autopilot hook status
|
|
101
|
+
npm install -g @delegance/claude-autopilot
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
- Node 22+
|
|
105
|
+
- `gh` CLI (for PR phases)
|
|
106
|
+
- One of: `ANTHROPIC_API_KEY` (recommended), `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `GROQ_API_KEY`
|
|
107
|
+
- Claude Code CLI (for skill-based phases — pipeline falls back to direct CLI invocations without it, but loses interactive checkpoints)
|
|
105
108
|
|
|
106
|
-
|
|
109
|
+
---
|
|
107
110
|
|
|
108
|
-
|
|
111
|
+
---
|
|
109
112
|
|
|
110
|
-
|
|
111
|
-
npx autopilot init
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Presets: `nextjs-supabase`, `t3`, `python-fastapi`, `rails-postgres`, `go`.
|
|
115
|
-
|
|
116
|
-
## Config (`autopilot.config.yaml`)
|
|
113
|
+
## Config (`guardrail.config.yaml`)
|
|
117
114
|
|
|
118
115
|
```yaml
|
|
119
116
|
configVersion: 1
|
|
120
117
|
reviewEngine:
|
|
121
|
-
adapter: auto # auto-
|
|
122
|
-
testCommand: npm test
|
|
118
|
+
adapter: auto # auto-selects best available key at runtime
|
|
119
|
+
testCommand: npm test # null to disable; used by `fix` verified mode
|
|
120
|
+
|
|
123
121
|
protectedPaths:
|
|
124
|
-
- src/core/**
|
|
125
122
|
- data/deltas/**
|
|
123
|
+
- .github/workflows/**
|
|
124
|
+
|
|
126
125
|
staticRules:
|
|
127
|
-
- hardcoded-secrets
|
|
126
|
+
- hardcoded-secrets # Anthropic, OpenAI, Stripe, GitHub, Supabase, Twilio, SendGrid
|
|
128
127
|
- npm-audit
|
|
128
|
+
- sql-injection # template literals / concatenation in SQL context
|
|
129
|
+
- missing-auth # Next.js/pages API routes with POST/PUT/DELETE, no auth pattern
|
|
130
|
+
- ssrf # HTTP calls with user-controlled URL
|
|
131
|
+
- insecure-redirect # redirect() with user-controlled target
|
|
132
|
+
- console-log
|
|
133
|
+
- todo-fixme
|
|
134
|
+
- large-file
|
|
135
|
+
- missing-tests
|
|
136
|
+
- package-lock-sync
|
|
137
|
+
- brand-tokens # opt-in: requires brand: block below
|
|
138
|
+
|
|
139
|
+
# Brand token enforcement (opt-in — omit to disable)
|
|
140
|
+
brand:
|
|
141
|
+
colorsFrom: tailwind.config.ts # auto-extract theme.colors as canonical palette
|
|
142
|
+
colors: # explicit palette entries (merged with colorsFrom)
|
|
143
|
+
- '#f97316'
|
|
144
|
+
- '#1a1f3a'
|
|
145
|
+
fonts:
|
|
146
|
+
- 'Inter'
|
|
147
|
+
- 'Geist'
|
|
148
|
+
|
|
149
|
+
policy:
|
|
150
|
+
failOn: critical # critical (default) | warning | note | none
|
|
151
|
+
newOnly: false # true = suppress findings present in .guardrail-baseline.json
|
|
152
|
+
|
|
153
|
+
cost:
|
|
154
|
+
maxPerRun: 0.50 # abort review phase if spend exceeds $0.50
|
|
155
|
+
estimateBeforeRun: false # print token estimate before LLM calls
|
|
156
|
+
|
|
157
|
+
ignore:
|
|
158
|
+
- src/legacy/** # suppress all findings in path
|
|
159
|
+
- { rule: console-log, path: scripts/** } # suppress specific rule in path
|
|
160
|
+
|
|
161
|
+
chunking:
|
|
162
|
+
rateLimitBackoff: exp # exp (default) | linear | none
|
|
163
|
+
parallelism: 3
|
|
129
164
|
```
|
|
130
165
|
|
|
131
|
-
|
|
166
|
+
### Setup Profiles
|
|
167
|
+
|
|
168
|
+
`guardrail setup --profile <name>` overlays a pre-baked rule + policy configuration on top of the detected stack preset:
|
|
169
|
+
|
|
170
|
+
| Profile | Rules | `failOn` | Best for |
|
|
171
|
+
|---|---|---|---|
|
|
172
|
+
| `security-strict` | All security rules + hygiene | `warning` | Security audits, regulated environments |
|
|
173
|
+
| `team` | Core security + hygiene | `critical` | Standard CI/CD on shared branches |
|
|
174
|
+
| `solo` | Hygiene only | `critical` | Solo projects, low-noise baseline |
|
|
132
175
|
|
|
133
176
|
### Review Engine Adapters
|
|
134
177
|
|
|
135
178
|
| Adapter | Key required | Notes |
|
|
136
179
|
|---|---|---|
|
|
137
|
-
| `auto` | any
|
|
138
|
-
| `claude` | `ANTHROPIC_API_KEY` | Opus 4.7
|
|
180
|
+
| `auto` | any | Auto-selects best available (recommended) |
|
|
181
|
+
| `claude` | `ANTHROPIC_API_KEY` | Claude Opus 4.7 |
|
|
139
182
|
| `gemini` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Gemini 2.5 Pro, 1M context |
|
|
140
183
|
| `codex` | `OPENAI_API_KEY` | GPT-5 Codex |
|
|
141
184
|
| `openai-compatible` | configurable | Groq, Ollama, Together AI, etc. |
|
|
142
185
|
|
|
143
186
|
`auto` priority: Anthropic → Gemini → OpenAI → Groq.
|
|
144
187
|
|
|
145
|
-
**Groq
|
|
188
|
+
**Groq (fast/free tier):**
|
|
146
189
|
```yaml
|
|
147
190
|
reviewEngine:
|
|
148
191
|
adapter: openai-compatible
|
|
@@ -161,44 +204,57 @@ reviewEngine:
|
|
|
161
204
|
baseUrl: http://localhost:11434/v1
|
|
162
205
|
```
|
|
163
206
|
|
|
207
|
+
---
|
|
208
|
+
|
|
164
209
|
## GitHub Actions
|
|
165
210
|
|
|
166
211
|
```yaml
|
|
167
212
|
- uses: axledbetter/claude-autopilot/.github/actions/ci@main
|
|
168
213
|
with:
|
|
169
|
-
|
|
214
|
+
anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
215
|
+
# Optional:
|
|
216
|
+
# post-comments: 'true'
|
|
217
|
+
# inline-comments: 'false'
|
|
218
|
+
# base-ref: 'main'
|
|
219
|
+
# sarif-output: 'guardrail.sarif'
|
|
220
|
+
# version: 'latest'
|
|
170
221
|
```
|
|
171
222
|
|
|
172
223
|
Runs the pipeline, uploads SARIF to GitHub Code Scanning, annotates the PR diff inline.
|
|
173
224
|
|
|
174
|
-
|
|
225
|
+
---
|
|
175
226
|
|
|
176
|
-
|
|
177
|
-
npx autopilot run --format sarif --output autopilot.sarif
|
|
178
|
-
```
|
|
227
|
+
## Typical Team Workflow
|
|
179
228
|
|
|
180
|
-
|
|
229
|
+
```bash
|
|
230
|
+
# 1. First run — establish a baseline so CI only fails on new issues
|
|
231
|
+
npx guardrail run --base main
|
|
232
|
+
npx guardrail baseline create --note "post-v2 audit"
|
|
233
|
+
git add .guardrail-baseline.json && git commit -m "chore: guardrail baseline"
|
|
181
234
|
|
|
182
|
-
|
|
235
|
+
# 2. CI — only new findings block the build
|
|
236
|
+
npx guardrail ci --new-only --fail-on critical
|
|
183
237
|
|
|
184
|
-
|
|
238
|
+
# 3. Triage false positives once, never see them again
|
|
239
|
+
npx guardrail triage sql-injection:src/db/raw.ts:47 false-positive --reason "internal admin only"
|
|
240
|
+
git add .guardrail-triage.json && git commit -m "chore: triage false positive"
|
|
185
241
|
|
|
186
|
-
|
|
187
|
-
npx
|
|
242
|
+
# 4. Auto-fix and verify
|
|
243
|
+
npx guardrail fix --yes # applies patches + runs tests, reverts on failure
|
|
188
244
|
```
|
|
189
245
|
|
|
190
|
-
|
|
246
|
+
---
|
|
191
247
|
|
|
192
|
-
|
|
248
|
+
## Interpreting Results
|
|
193
249
|
|
|
194
|
-
|
|
250
|
+
**Exit 0** — pass or warnings only (at current `policy.failOn` threshold). Safe to merge.
|
|
251
|
+
**Exit 1** — findings at or above threshold. Fix before merging.
|
|
195
252
|
|
|
196
|
-
|
|
197
|
-
import type { Finding, RunResult, AutopilotConfig } from '@delegance/claude-autopilot';
|
|
198
|
-
import { normalizeSnapshot } from '@delegance/claude-autopilot';
|
|
199
|
-
```
|
|
253
|
+
Findings: `critical` blocks merge · `warning` should fix · `note` informational.
|
|
200
254
|
|
|
201
|
-
|
|
255
|
+
PR comments show: status badge, phase table, critical/warning findings with inline links, cost footer. Re-runs update the existing comment in place.
|
|
256
|
+
|
|
257
|
+
---
|
|
202
258
|
|
|
203
259
|
## Architecture
|
|
204
260
|
|
|
@@ -206,11 +262,13 @@ Four pluggable adapter points:
|
|
|
206
262
|
|
|
207
263
|
| Point | Built-in | Purpose |
|
|
208
264
|
|---|---|---|
|
|
209
|
-
| `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM
|
|
210
|
-
| `vcs-host` | `github` | PR comments + SARIF
|
|
211
|
-
| `migration-runner` | `supabase` | DB
|
|
265
|
+
| `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM review |
|
|
266
|
+
| `vcs-host` | `github` | PR comments + SARIF |
|
|
267
|
+
| `migration-runner` | `supabase` | DB migrations |
|
|
212
268
|
| `review-bot-parser` | `cursor` | Parse review bot comments |
|
|
213
269
|
|
|
270
|
+
**Monorepo:** Auto-detects npm/yarn/pnpm workspaces, Turborepo, and Nx.
|
|
271
|
+
|
|
214
272
|
## License
|
|
215
273
|
|
|
216
274
|
MIT
|
package/bin/_launcher.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// Shared launcher for both `claude-autopilot` and `guardrail` bins.
|
|
2
|
+
// Imported, not a bin itself. Resolves tsx, spawns src/cli/index.ts with
|
|
3
|
+
// the caller's argv, forwards stdio, exits with the child's status.
|
|
4
|
+
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
import { spawnSync } from 'node:child_process';
|
|
7
|
+
import * as fs from 'node:fs';
|
|
8
|
+
import * as path from 'node:path';
|
|
9
|
+
import * as os from 'node:os';
|
|
10
|
+
|
|
11
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const ENTRYPOINT = path.resolve(__dirname, '..', 'src', 'cli', 'index.ts');
|
|
13
|
+
|
|
14
|
+
function findTsx() {
|
|
15
|
+
const own = path.resolve(__dirname, '..', 'node_modules', '.bin', 'tsx');
|
|
16
|
+
if (fs.existsSync(own)) return own;
|
|
17
|
+
const consumer = path.resolve(__dirname, '..', '..', '..', '.bin', 'tsx');
|
|
18
|
+
if (fs.existsSync(consumer)) return consumer;
|
|
19
|
+
return 'tsx';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Tracks per-terminal-session whether the deprecation notice has been shown.
|
|
23
|
+
// Uses a temp file keyed by parent PID + stderr's tty so parallel CI jobs don't
|
|
24
|
+
// collide. Falls back to always-emit if the stamp can't be written.
|
|
25
|
+
const DEPRECATION_STAMP_DIR = path.join(os.tmpdir(), 'claude-autopilot');
|
|
26
|
+
function hasShownDeprecation() {
|
|
27
|
+
try {
|
|
28
|
+
if (!fs.existsSync(DEPRECATION_STAMP_DIR)) {
|
|
29
|
+
fs.mkdirSync(DEPRECATION_STAMP_DIR, { recursive: true });
|
|
30
|
+
}
|
|
31
|
+
const key = `${process.ppid}-${process.stderr.isTTY ? 'tty' : 'pipe'}.stamp`;
|
|
32
|
+
const stampPath = path.join(DEPRECATION_STAMP_DIR, key);
|
|
33
|
+
if (fs.existsSync(stampPath)) return true;
|
|
34
|
+
fs.writeFileSync(stampPath, String(Date.now()));
|
|
35
|
+
// Best-effort cleanup of stamps older than 1h to keep tmpdir tidy.
|
|
36
|
+
const cutoff = Date.now() - 60 * 60 * 1000;
|
|
37
|
+
for (const f of fs.readdirSync(DEPRECATION_STAMP_DIR)) {
|
|
38
|
+
const p = path.join(DEPRECATION_STAMP_DIR, f);
|
|
39
|
+
try {
|
|
40
|
+
if (fs.statSync(p).mtimeMs < cutoff) fs.unlinkSync(p);
|
|
41
|
+
} catch { /* ignore */ }
|
|
42
|
+
}
|
|
43
|
+
return false;
|
|
44
|
+
} catch {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Decide whether to emit the deprecation notice. Order:
|
|
51
|
+
* CLAUDE_AUTOPILOT_DEPRECATION=never → never emit (CI/automation)
|
|
52
|
+
* CLAUDE_AUTOPILOT_DEPRECATION=always → always emit (deterministic testing)
|
|
53
|
+
* otherwise → once per terminal session (stamp-based)
|
|
54
|
+
*/
|
|
55
|
+
function shouldEmitDeprecation() {
|
|
56
|
+
const override = process.env.CLAUDE_AUTOPILOT_DEPRECATION;
|
|
57
|
+
if (override === 'never') return false;
|
|
58
|
+
if (override === 'always') return true;
|
|
59
|
+
return !hasShownDeprecation();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Launch the CLI with `argv` passed through verbatim.
|
|
64
|
+
* @param {{ name: 'claude-autopilot' | 'guardrail' }} opts
|
|
65
|
+
*/
|
|
66
|
+
export function launch(opts) {
|
|
67
|
+
if (opts.name === 'guardrail' && shouldEmitDeprecation()) {
|
|
68
|
+
process.stderr.write(
|
|
69
|
+
'\x1b[33m[deprecated]\x1b[0m `guardrail` CLI is renamed to `claude-autopilot`. ' +
|
|
70
|
+
'The `guardrail` alias works through v5.x and will be removed in v6. ' +
|
|
71
|
+
'Migration guide: https://github.com/axledbetter/claude-autopilot/blob/master/docs/migration/v4-to-v5.md\n' +
|
|
72
|
+
'Silence: set CLAUDE_AUTOPILOT_DEPRECATION=never\n',
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
const result = spawnSync(findTsx(), [ENTRYPOINT, ...process.argv.slice(2)], { stdio: 'inherit' });
|
|
76
|
+
process.exit(result.status ?? 1);
|
|
77
|
+
}
|
package/bin/guardrail.js
ADDED
package/package.json
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@delegance/claude-autopilot",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "5.0.0-alpha.1",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"description": "Claude Code
|
|
5
|
+
"description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
|
|
6
6
|
"keywords": [
|
|
7
|
-
"claude",
|
|
7
|
+
"claude-autopilot",
|
|
8
8
|
"autopilot",
|
|
9
|
-
"
|
|
10
|
-
"
|
|
9
|
+
"claude-code",
|
|
10
|
+
"ai-agent",
|
|
11
11
|
"code-review",
|
|
12
|
-
"
|
|
12
|
+
"llm",
|
|
13
|
+
"sarif",
|
|
14
|
+
"cli",
|
|
15
|
+
"pipeline"
|
|
13
16
|
],
|
|
14
17
|
"license": "MIT",
|
|
15
18
|
"repository": {
|
|
@@ -20,7 +23,8 @@
|
|
|
20
23
|
"node": ">=22.0.0"
|
|
21
24
|
},
|
|
22
25
|
"bin": {
|
|
23
|
-
"autopilot": "bin/autopilot.js"
|
|
26
|
+
"claude-autopilot": "bin/claude-autopilot.js",
|
|
27
|
+
"guardrail": "bin/guardrail.js"
|
|
24
28
|
},
|
|
25
29
|
"types": "./src/index.ts",
|
|
26
30
|
"exports": {
|
|
@@ -35,6 +39,7 @@
|
|
|
35
39
|
"skills/",
|
|
36
40
|
"scripts/test-runner.mjs",
|
|
37
41
|
"scripts/autoregress.ts",
|
|
42
|
+
"scripts/snapshots/",
|
|
38
43
|
"tests/snapshots/",
|
|
39
44
|
"CHANGELOG.md"
|
|
40
45
|
],
|
|
@@ -47,6 +52,7 @@
|
|
|
47
52
|
"dependencies": {
|
|
48
53
|
"@anthropic-ai/sdk": "^0.90.0",
|
|
49
54
|
"@google/generative-ai": "^0.24.1",
|
|
55
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
50
56
|
"ajv": "^8",
|
|
51
57
|
"dotenv": ">=16",
|
|
52
58
|
"js-yaml": "^4",
|
|
@@ -56,7 +62,7 @@
|
|
|
56
62
|
},
|
|
57
63
|
"devDependencies": {
|
|
58
64
|
"@types/js-yaml": "^4",
|
|
59
|
-
"@types/node": "^
|
|
60
|
-
"typescript": "^
|
|
65
|
+
"@types/node": "^25",
|
|
66
|
+
"typescript": "^6"
|
|
61
67
|
}
|
|
62
68
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
configVersion: 1
|
|
2
|
+
reviewEngine: { adapter: auto }
|
|
3
|
+
vcsHost: { adapter: github }
|
|
4
|
+
reviewBot: { adapter: cursor }
|
|
5
|
+
# No migrationRunner — the migrate phase will no-op with a notice pointing
|
|
6
|
+
# to `.claude-autopilot/stack.yaml` for users who want to wire one up.
|
|
7
|
+
protectedPaths:
|
|
8
|
+
- "**/auth/**"
|
|
9
|
+
- "**/payment/**"
|
|
10
|
+
- "**/encryption/**"
|
|
11
|
+
- "**/secret/**"
|
|
12
|
+
- "**/keys/**"
|
|
13
|
+
staticRules:
|
|
14
|
+
- hardcoded-secrets
|
|
15
|
+
- npm-audit
|
|
16
|
+
- package-lock-sync
|
|
17
|
+
- sql-injection
|
|
18
|
+
- missing-auth
|
|
19
|
+
- ssrf
|
|
20
|
+
- insecure-redirect
|
|
21
|
+
policy:
|
|
22
|
+
failOn: critical
|
|
23
|
+
newOnly: false
|
|
24
|
+
thresholds:
|
|
25
|
+
bugbotAutoFix: 85
|
|
26
|
+
bugbotProposePatch: 60
|
|
27
|
+
maxValidateRetries: 3
|
|
28
|
+
reviewStrategy: auto
|
|
29
|
+
chunking:
|
|
30
|
+
smallTierMaxTokens: 8000
|
|
31
|
+
partialReviewTokens: 60000
|
|
32
|
+
perFileMaxTokens: 32000
|
|
33
|
+
pipeline:
|
|
34
|
+
runReviewOnStaticFail: true
|
|
35
|
+
runReviewOnTestFail: false
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
A generic project with no strong framework signals detected.
|
|
2
|
+
|
|
3
|
+
This preset makes **no assumptions** about:
|
|
4
|
+
- Database engine or migration runner
|
|
5
|
+
- Type generation
|
|
6
|
+
- Test framework (uses whatever `npm test` / `npm run typecheck` / `npm run lint` find)
|
|
7
|
+
- Deployment target
|
|
8
|
+
|
|
9
|
+
It enables the core security rules that apply to most codebases — hardcoded secrets, npm audit, SQL injection patterns, missing auth checks, SSRF, insecure redirects.
|
|
10
|
+
|
|
11
|
+
## What's disabled vs stack-specific presets
|
|
12
|
+
|
|
13
|
+
- `supabase-rls-bypass` rule (Supabase-only)
|
|
14
|
+
- `schema-alignment` rule (requires declared migration paths)
|
|
15
|
+
- `migrate` phase of the pipeline no-ops with a notice
|
|
16
|
+
|
|
17
|
+
## Wiring up migrations
|
|
18
|
+
|
|
19
|
+
If your project uses migrations, create `.claude-autopilot/stack.yaml` with:
|
|
20
|
+
|
|
21
|
+
```yaml
|
|
22
|
+
migrate:
|
|
23
|
+
command: "prisma migrate dev" # or flyway, dbmate, tbls, golang-migrate, etc.
|
|
24
|
+
environments: [dev, staging, prod]
|
|
25
|
+
typeGeneration:
|
|
26
|
+
command: "prisma generate"
|
|
27
|
+
path: "node_modules/.prisma/client"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Or pick a stack-specific preset at setup time: `claude-autopilot init --preset nextjs-supabase`.
|
|
31
|
+
|
|
32
|
+
## Things that should flag CRITICAL (universal)
|
|
33
|
+
|
|
34
|
+
- Secrets committed to code or history
|
|
35
|
+
- SQL string concatenation with user input
|
|
36
|
+
- POST endpoints without auth checks
|
|
37
|
+
- SSRF via user-controlled URLs in `fetch` / `axios`
|
|
38
|
+
- Open redirects (user-controlled `Location` header)
|
|
39
|
+
- Dynamic code evaluation (`eval`, `Function` constructor) with user input
|
|
40
|
+
- Shell command construction with user input
|