npm - @delegance/claude-autopilot - Versions diffs - 2.5.0 → 5.0.0-alpha.1 - Mend

@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/CHANGELOG.md +40 -0
package/README.md +164 -106
package/bin/_launcher.js +77 -0
package/bin/claude-autopilot.js +3 -0
package/bin/guardrail.js +3 -0
package/package.json +15 -9
package/presets/generic/guardrail.config.yaml +35 -0
package/presets/generic/stack.md +40 -0
package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
package/scripts/autoregress.ts +27 -11
package/skills/autopilot/SKILL.md +170 -0
package/skills/claude-autopilot.md +80 -0
package/skills/guardrail.md +39 -0
package/skills/migrate/SKILL.md +83 -0
package/src/adapters/council/claude.ts +41 -0
package/src/adapters/council/openai.ts +40 -0
package/src/adapters/council/types.ts +7 -0
package/src/adapters/loader.ts +7 -7
package/src/adapters/review-engine/auto.ts +2 -2
package/src/adapters/review-engine/claude.ts +9 -11
package/src/adapters/review-engine/codex.ts +9 -11
package/src/adapters/review-engine/gemini.ts +9 -11
package/src/adapters/review-engine/openai-compatible.ts +10 -12
package/src/adapters/review-engine/parse-output.ts +32 -6
package/src/adapters/review-engine/prompt-builder.ts +19 -0
package/src/adapters/review-engine/types.ts +1 -1
package/src/adapters/vcs-host/commit-status.ts +39 -0
package/src/adapters/vcs-host/github.ts +2 -2
package/src/cli/baseline.ts +125 -0
package/src/cli/ci.ts +11 -8
package/src/cli/costs.ts +2 -2
package/src/cli/council.ts +96 -0
package/src/cli/detector.ts +21 -5
package/src/cli/explain.ts +197 -0
package/src/cli/fix.ts +173 -111
package/src/cli/hook.ts +72 -27
package/src/cli/ignore-helper.ts +116 -0
package/src/cli/index.ts +272 -31
package/src/cli/init.ts +12 -12
package/src/cli/lsp.ts +200 -0
package/src/cli/mcp.ts +206 -0
package/src/cli/pr-comment.ts +5 -5
package/src/cli/pr-desc.ts +168 -0
package/src/cli/pr-review-comments.ts +3 -3
package/src/cli/pr.ts +76 -0
package/src/cli/preflight.ts +15 -32
package/src/cli/report.ts +186 -0
package/src/cli/run.ts +140 -36
package/src/cli/scan.ts +233 -0
package/src/cli/setup.ts +121 -15
package/src/cli/test-gen.ts +125 -0
package/src/cli/triage.ts +137 -0
package/src/cli/watch.ts +52 -31
package/src/cli/worker.ts +109 -0
package/src/core/cache/review-cache.ts +2 -2
package/src/core/chunking/index.ts +2 -2
package/src/core/config/loader.ts +10 -10
package/src/core/config/preset-resolver.ts +6 -6
package/src/core/config/schema.ts +103 -2
package/src/core/config/types.ts +57 -2
package/src/core/council/config.ts +71 -0
package/src/core/council/context.ts +17 -0
package/src/core/council/runner.ts +83 -0
package/src/core/council/types.ts +45 -0
package/src/core/detect/llm-key.ts +89 -0
package/src/core/detect/workspaces.ts +103 -0
package/src/core/errors.ts +4 -4
package/src/core/fix/generator.ts +149 -0
package/src/core/ignore/index.ts +4 -4
package/src/core/mcp/concurrency.ts +16 -0
package/src/core/mcp/handlers/fix-finding.ts +126 -0
package/src/core/mcp/handlers/get-capabilities.ts +62 -0
package/src/core/mcp/handlers/get-findings.ts +36 -0
package/src/core/mcp/handlers/review-diff.ts +65 -0
package/src/core/mcp/handlers/scan-files.ts +65 -0
package/src/core/mcp/handlers/validate-fix.ts +41 -0
package/src/core/mcp/run-store.ts +85 -0
package/src/core/mcp/workspace.ts +35 -0
package/src/core/persist/baseline.ts +112 -0
package/src/core/persist/cost-log.ts +1 -1
package/src/core/persist/findings-cache.ts +1 -1
package/src/core/persist/triage.ts +112 -0
package/src/core/phases/static-rules.ts +18 -5
package/src/core/pipeline/review-phase.ts +65 -26
package/src/core/pipeline/run.ts +42 -10
package/src/core/runtime/lock.ts +2 -2
package/src/core/runtime/state.ts +2 -2
package/src/core/schema-alignment/detector.ts +59 -0
package/src/core/schema-alignment/extractor/index.ts +24 -0
package/src/core/schema-alignment/extractor/prisma.ts +21 -0
package/src/core/schema-alignment/extractor/sql.ts +99 -0
package/src/core/schema-alignment/llm-check.ts +91 -0
package/src/core/schema-alignment/scanner.ts +107 -0
package/src/core/schema-alignment/types.ts +43 -0
package/src/core/shell.ts +3 -3
package/src/core/static-rules/registry.ts +17 -8
package/src/core/static-rules/rules/brand-tokens.ts +145 -0
package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
package/src/core/static-rules/rules/missing-auth.ts +70 -0
package/src/core/static-rules/rules/schema-alignment.ts +132 -0
package/src/core/static-rules/rules/sql-injection.ts +71 -0
package/src/core/static-rules/rules/ssrf.ts +63 -0
package/src/core/static-rules/tailwind-extractor.ts +38 -0
package/src/core/test-gen/coverage-analyzer.ts +93 -0
package/src/core/test-gen/framework-detector.ts +21 -0
package/src/core/test-gen/test-writer.ts +33 -0
package/src/core/ui/design-context-loader.ts +87 -0
package/src/core/worker/client.ts +46 -0
package/src/core/worker/lockfile.ts +38 -0
package/src/core/worker/server.ts +81 -0
package/src/formatters/junit.ts +52 -0
package/src/formatters/sarif.ts +2 -2
package/src/index.ts +1 -2
package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
package/tests/snapshots/index.json +3 -3
package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
package/bin/autopilot.js +0 -20
package/skills/autopilot.md +0 -157
/package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
/package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
/package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
/package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
/package/{src → scripts}/snapshots/impact-selector.ts +0 -0
/package/{src → scripts}/snapshots/import-scanner.ts +0 -0
/package/{src → scripts}/snapshots/serializer.ts +0 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,45 @@
 # Changelog
+## [5.0.0-alpha.1] — 2026-04-24
+**Package renamed: `@delegance/guardrail` → `@delegance/claude-autopilot`.**
+The v4 product sold itself as "LLM code review." The real product is an end-to-end autonomous development pipeline built on Claude Code skills — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. This alpha corrects the identity mismatch without breaking any v4 usage.
+Every v4 invocation continues to work through v5.x via the preserved `guardrail` CLI alias. Migration guide: `docs/migration/v4-to-v5.md`.
+### Added
+- **`claude-autopilot` CLI binary** — primary entrypoint (`bin/claude-autopilot.js`), co-installed with `guardrail`.
+- **Pipeline skills bundled in the tarball** — `skills/claude-autopilot.md` (agent-loop spec), `skills/autopilot/`, `skills/migrate/`. v4.3.1 shipped only `skills/guardrail.md`; the pipeline skills existed only in-repo and weren't distributed.
+- **`generic` preset** — no DB migration runner, uses `npm test` / `npm run typecheck` / `npm run lint` where present. Picked by `detectProject()` as the fallback when no stack signals are found (replaces the v4 behavior of claiming `nextjs-supabase` with low confidence).
+- **v5 migration guide at `docs/migration/v4-to-v5.md`** — find/replace patterns for `package.json`, shell scripts, GitHub Actions yaml, Dockerfiles, and Claude Code skills.
+### Changed
+- **Stack detector fallback:** plain Next.js with no Supabase signals now returns `generic`, not `nextjs-supabase (low confidence)`. Fixes the cold-start eval reviewer finding.
+- **`PRESET_LABELS` in `setup.ts`:** adds `generic` entry.
+- **Detector tests:** updated to assert the new `generic` fallback behavior.
+- **`skills/guardrail.md`:** rewritten as a back-compat alias pointing at `skills/claude-autopilot.md`.
+- **`bin/guardrail.js`:** emits a one-line deprecation notice on `stderr` on first invocation per terminal session, then forwards unchanged.
+### Deferred to later alphas
+- **alpha.2:** full CLI verb restructure (`claude-autopilot {review,pr,triage,advanced,…}`), v4 compatibility golden-test matrix, superpowers peer-dep hard-fail in `doctor`.
+- **alpha.3:** tombstone `@delegance/guardrail@5.0.0` publish, CI smoke tests for `npx guardrail` / `npx @delegance/guardrail` / global install / GitHub Actions parity, codemod script for find/replace migration.
+- **5.0.0 GA:** after alpha.3 soaks against delegance-app for 2+ real feature pipelines.
+## [4.3.1] — 2026-04-24
+### Fixed (from external cold-start review)
+- **`parseReviewOutput` silent failure** — regex required literal `### [CRITICAL]` brackets and returned zero findings when the LLM emitted `### CRITICAL`, `### **CRITICAL**`, or `### **[CRITICAL]**` (all common Llama/GPT variants). `src/adapters/review-engine/parse-output.ts` now accepts all four formats and logs a warning when raw output is non-empty but no findings parse, so format drift never silently hides bugs again.
+- **Pipeline short-circuit skipped LLM review** — `src/core/pipeline/run.ts` returned early on static-rules `fail`, meaning the LLM never ran on the code that most needed it (IDOR, TOCTOU, CORS, off-by-one, rate-limit gaps typically ride alongside a static-flagged issue). New default: review runs even on static-fail. Legacy behavior restored via `pipeline.runReviewOnStaticFail: false` in config.
+- **`doctor` / `preflight` ignored 3 of 5 LLM keys** — only checked `ANTHROPIC_API_KEY` and `OPENAI_API_KEY`, so users with `GROQ_API_KEY`/`GEMINI_API_KEY`/`GOOGLE_API_KEY` set saw "No LLM API key" right after `setup` reported "detected." New shared helper `src/core/detect/llm-key.ts` is the single source of truth used by setup, scan, run, and preflight.
+- **Stack detector mislabeled plain Next.js as "Next.js + Supabase"** — now requires actual Supabase signals (`@supabase/supabase-js`, `@supabase/ssr`, `@supabase/auth-helpers-nextjs`, `supabase/config.toml`, or `SUPABASE_*` env vars). Vanilla Next.js still uses the `nextjs-supabase` preset as a fallback but the evidence string and setup output make the fallback explicit.
+- **`--profile team` missing security rules** — added `package-lock-sync`, `ssrf`, `insecure-redirect` to match the README's advertised coverage.
+### Added
+- `src/core/detect/llm-key.ts` — `detectLLMKey()`, `LLM_KEY_NAMES`, `LLM_KEY_HINTS`, `loadEnvFile()`.
+- `GuardrailConfig.pipeline.runReviewOnStaticFail` / `runReviewOnTestFail` config flags.
+- 6 parser format-variation tests covering all documented markdown variants plus the silent-drift warning path.
 ## [2.5.0] — 2026-04-22
 ### Added

package/README.md CHANGED Viewed

@@ -1,148 +1,191 @@
 # @delegance/claude-autopilot
-Automated code review pipeline for Claude Code. Runs static rules, an optional LLM review engine, and impact-aware snapshot regression tests — outputs SARIF for GitHub Code Scanning, inline PR annotations, and a pre-push hook for local enforcement.
-## Install
+**Autonomous development pipeline for Claude Code. Brainstorm → spec → plan → implement → migrate → validate → PR → review → merge — all from your terminal, on your codebase, with your test suite.**
 ```bash
-npm install @delegance/claude-autopilot
+claude-autopilot brainstorm "add SSO with SAML for enterprise tenants"
+# → writes spec (reviewed by Codex) → writes plan (reviewed by Codex) →
+# → creates branch → implements with subagents → runs migrations →
+# → runs full test + lint + type + security gate → opens PR →
+# → dispatches multi-model review → auto-fixes bugbot findings →
+# → ready to merge
 ```
-**Prerequisites:** Node 22+, [`gh` CLI](https://cli.github.com/) authenticated, [`claude` CLI](https://claude.ai/claude-code) (Claude Code).
-## Claude Code Skill
-The package ships a ready-made Claude Code skill. After installing, copy it into your project:
+*No hosted agent. No per-seat subscription. Runs locally on your machine, against your real repo, using your API keys. Every phase is a Claude Code skill you can intervene in, rewire, or run by itself.*
-```bash
-mkdir -p .claude/skills
-cp node_modules/@delegance/claude-autopilot/skills/autopilot.md .claude/skills/
-```
+---
-Claude will then know when and how to invoke `autopilot run`, interpret findings, and wire it into your dev pipeline automatically.
+## Why this vs the alternatives
-## Quick Start
-```bash
-# One command — auto-detects project type, writes config, installs hook, runs doctor
-npx autopilot setup
-# Run your first pipeline
-npx autopilot run
-```
+AI coding tools fall into three buckets. Here's where claude-autopilot sits.
-`setup` detects your stack (Go, Rails, FastAPI, T3, Next.js+Supabase), infers your test command, writes `autopilot.config.yaml`, installs the pre-push hook, then runs `doctor` to show anything still missing.
+| Tool | Shape | Hosted? | Model lock-in | Pipeline structure | You can intervene mid-flow? |
+|---|---|---|---|---|---|
+| **Devin** (Cognition) | Autonomous agent | Yes (SaaS, $500/mo) | Cognition's stack | Opaque | No — watch a dashboard |
+| **GitHub Copilot Workspace** | Spec → plan → PR | Yes | Copilot only | Fixed, non-extensible | Edit the plan, that's it |
+| **Factory Droids** | Multi-agent workflow | Yes (per-seat) | Factory's stack | Fixed | Limited |
+| **Cursor BugBot / Copilot Review / CodeRabbit** | Async PR reviewer | Yes | Vendor's model | Single phase (review only) | N/A — post-hoc only |
+| **Aider / Cline / Cursor agent mode** | Interactive pair programming | Local | User's choice | None — single-shot prompts | Continuous |
+| **OpenHands / SWE-agent** | Open-ended agent framework | Local | User's choice | None — agent decides | Rare, research-grade |
+| **claude-autopilot** | **Opinionated local pipeline** | **Local** | **Any LLM (Claude / GPT / Gemini / Groq / Ollama)** | **Fixed but rewireable, skill-per-phase** | **Every phase. All state on disk.** |
-## Commands
+The architectural differences that matter most in practice:
-### `autopilot setup`
+1. **Multi-model by design.** Claude writes code, Codex reviews the plan, bugbot triages PR findings. Different model for each role, swap any of them. The pipeline's phases are explicit contracts, not one opaque API call.
+2. **Your stack, not a sandbox.** It runs your `npm test`, your `prisma migrate`, your `gh pr create`, your `ruff check`. If it works in your terminal, it works in the pipeline.
+3. **Phase artifacts on disk, editable.** Every phase writes to a file you can open — `docs/specs/*.md`, `docs/plans/*.md`, a branch, a PR. Stop, edit by hand, resume, or re-run any phase in isolation.
+4. **Test-gated auto-revert as a first-class command.** `claude-autopilot fix --verify` patches a file, runs your full test suite, and reverts on failure. Built into the CLI, not a wrapper you write yourself.
-Zero-prompt setup. Auto-detects project type and configures everything.
+## 30-second quickstart
 ```bash
-npx autopilot setup            # detect, write config, install hook
-npx autopilot setup --force    # overwrite existing autopilot.config.yaml
-```
+# Install
+npm install -g @delegance/claude-autopilot
-### `autopilot doctor`
+# One-shot setup — detects stack, writes config, installs skills, sets hooks
+npx claude-autopilot init
-Checks prerequisites. Runs automatically after `setup` — also useful any time `run` behaves unexpectedly.
+# Ship a feature end-to-end
+claude-autopilot brainstorm "add rate limiting to the public API"
+# Answer ~5 questions. Spec written. Codex reviews it. You approve.
+# Claude walks the plan → implementation → migration → tests → PR → review.
+# ~15-40 min for a typical feature.
-```bash
-npx autopilot doctor
+# Or run just the review layer on an existing PR
+claude-autopilot run --pr 123
 ```
-Verifies: Node 22+, tsx, `gh` CLI auth, `claude` CLI, `OPENAI_API_KEY`, git user config, superpowers plugin. Exits 1 if blockers found. `autopilot preflight` is an alias.
+## The pipeline, phase by phase
-### `autopilot run`
+Each phase is a Claude Code skill (`.claude/skills/<name>/SKILL.md`). You can invoke any phase directly (`/brainstorm`, `/plan`, `/migrate`, `/validate`) without running the full pipeline. You can also rewire the pipeline by editing the `autopilot` skill.
-Runs the pipeline on git-changed files.
-```bash
-npx autopilot run                        # diff against HEAD~1
-npx autopilot run --base main            # diff against main
-npx autopilot run --files src/foo.ts     # explicit file list
-npx autopilot run --format sarif --output results.sarif
-npx autopilot run --dry-run
-```
+| Phase | Skill | What it does | Model role |
+|---|---|---|---|
+| **Brainstorm** | `brainstorming` | Turns a rough idea into an approved spec through guided questions | Claude (implementation model) |
+| **Spec review** | `codex-review` | Second model critiques the spec before you commit to it | Codex / GPT-5 |
+| **Plan** | `writing-plans` | Breaks spec into phased, checklist-shaped implementation plan | Claude |
+| **Plan review** | `codex-review` | Second model critiques the plan before you execute it | Codex / GPT-5 |
+| **Implement** | `subagent-driven-development` | Executes plan in a git worktree, one phase at a time, with per-phase tests | Claude |
+| **Migrate** | `migrate` | Runs database migrations dev → QA → prod with per-env validation | Deterministic |
+| **Validate** | `validate` | Static rules + tests + type check + security scan + LLM review | Any |
+| **PR** | `commit-push-pr` | Opens the PR with auto-generated title, summary, and test plan | Claude |
+| **Review** | `review-2pass` / `council` | Multi-model review of the diff (critical pass + informational pass) | Multiple |
+| **Triage** | `bugbot` | Fetches automated reviewer findings, auto-fixes real bugs, dismisses false positives | Claude |
-### `autopilot watch`
+## What's distinctive
-Re-runs on every file save.
+Features that are hard or impossible to find in the competitive set:
-```bash
-npx autopilot watch
-npx autopilot watch --debounce 500
-```
+- **Multi-model council review** — dispatch the same diff to 3+ models in parallel, synthesize agreement. Catches blind spots no single model sees.
+- **Fix with test verification** — `claude-autopilot fix` runs your full test suite after every patch and reverts on failure. Safer than any tool that proposes fixes without running your tests.
+- **Bug-bot auto-triage** — watches Cursor BugBot / Copilot comments on your PR, triages each (real bug vs false positive), auto-fixes confirmed bugs, dismisses noise with explanations.
+- **Schema alignment rule** — ensures DB migrations, backend types, and frontend types stay in sync. Custom static rule, not something any competitor ships.
+- **SARIF output + GitHub Code Scanning integration** — findings appear as annotations in the PR and in the Security tab.
-### `autopilot autoregress`
+## Just the review layer
-Impact-aware snapshot regression tests. Only fires snapshots whose source modules were touched by the current branch.
+If you don't want the full pipeline, the review subcommands are a strict superset of what `guardrail run` used to do: LLM code review over git-changed files, SARIF output, inline PR comments, auto-fix, baselines, per-finding triage, cost budgets. The legacy `guardrail` CLI remains aliased to the review subcommands through v5.x.
 ```bash
-npx autopilot autoregress run              # impact-selected (default)
-npx autopilot autoregress run --all
-npx autopilot autoregress diff             # show JSON diffs vs baselines
-npx autopilot autoregress update           # overwrite baselines
-npx autopilot autoregress generate         # LLM-generate snapshot tests for changed files
-npx autopilot autoregress generate --files src/foo.ts,src/bar.ts
+claude-autopilot run                             # review changes since main
+claude-autopilot run --inline-comments           # post per-line PR annotations
+claude-autopilot run --format sarif --output out.sarif
+claude-autopilot fix --verify                    # LLM patch + test gate + revert on fail
 ```
-`generate` requires `OPENAI_API_KEY`.
-### `autopilot hook`
+> **Alpha.1 CLI note:** subcommands are flat (`run`, `scan`, `ci`, `fix`, `baseline`, `explain`, …). The grouped `claude-autopilot review <verb>` form lands in alpha.2 as an alias — flat forms continue to work indefinitely.
-Manages the `pre-push` git hook.
+## Install & requirements
 ```bash
-npx autopilot hook install          # write .git/hooks/pre-push
-npx autopilot hook install --force  # overwrite existing
-npx autopilot hook uninstall
-npx autopilot hook status
+npm install -g @delegance/claude-autopilot
 ```
-Works in git worktrees.
+- Node 22+
+- `gh` CLI (for PR phases)
+- One of: `ANTHROPIC_API_KEY` (recommended), `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `GROQ_API_KEY`
+- Claude Code CLI (for skill-based phases — pipeline falls back to direct CLI invocations without it, but loses interactive checkpoints)
-### `autopilot init`
+---
-Interactive preset picker — for when you want to choose a preset manually instead of using `setup`.
+---
-```bash
-npx autopilot init
-```
-Presets: `nextjs-supabase`, `t3`, `python-fastapi`, `rails-postgres`, `go`.
-## Config (`autopilot.config.yaml`)
+## Config (`guardrail.config.yaml`)
 ```yaml
 configVersion: 1
 reviewEngine:
-  adapter: auto        # auto-detects best available key at runtime
-testCommand: npm test
+  adapter: auto        # auto-selects best available key at runtime
+testCommand: npm test  # null to disable; used by `fix` verified mode
 protectedPaths:
-  - src/core/**
   - data/deltas/**
+  - .github/workflows/**
 staticRules:
-  - hardcoded-secrets
+  - hardcoded-secrets   # Anthropic, OpenAI, Stripe, GitHub, Supabase, Twilio, SendGrid
   - npm-audit
+  - sql-injection       # template literals / concatenation in SQL context
+  - missing-auth        # Next.js/pages API routes with POST/PUT/DELETE, no auth pattern
+  - ssrf                # HTTP calls with user-controlled URL
+  - insecure-redirect   # redirect() with user-controlled target
+  - console-log
+  - todo-fixme
+  - large-file
+  - missing-tests
+  - package-lock-sync
+  - brand-tokens        # opt-in: requires brand: block below
+# Brand token enforcement (opt-in — omit to disable)
+brand:
+  colorsFrom: tailwind.config.ts   # auto-extract theme.colors as canonical palette
+  colors:                          # explicit palette entries (merged with colorsFrom)
+    - '#f97316'
+    - '#1a1f3a'
+  fonts:
+    - 'Inter'
+    - 'Geist'
+policy:
+  failOn: critical      # critical (default) | warning | note | none
+  newOnly: false        # true = suppress findings present in .guardrail-baseline.json
+cost:
+  maxPerRun: 0.50       # abort review phase if spend exceeds $0.50
+  estimateBeforeRun: false  # print token estimate before LLM calls
+ignore:
+  - src/legacy/**                              # suppress all findings in path
+  - { rule: console-log, path: scripts/** }    # suppress specific rule in path
+chunking:
+  rateLimitBackoff: exp    # exp (default) | linear | none
+  parallelism: 3
 ```
-Full schema and preset defaults: `presets/<name>/autopilot.config.yaml`.
+### Setup Profiles
+`guardrail setup --profile <name>` overlays a pre-baked rule + policy configuration on top of the detected stack preset:
+| Profile | Rules | `failOn` | Best for |
+|---|---|---|---|
+| `security-strict` | All security rules + hygiene | `warning` | Security audits, regulated environments |
+| `team` | Core security + hygiene | `critical` | Standard CI/CD on shared branches |
+| `solo` | Hygiene only | `critical` | Solo projects, low-noise baseline |
 ### Review Engine Adapters
 | Adapter | Key required | Notes |
 |---|---|---|
-| `auto` | any below | Auto-selects best available (recommended) |
-| `claude` | `ANTHROPIC_API_KEY` | Opus 4.7 default |
+| `auto` | any | Auto-selects best available (recommended) |
+| `claude` | `ANTHROPIC_API_KEY` | Claude Opus 4.7 |
 | `gemini` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | Gemini 2.5 Pro, 1M context |
 | `codex` | `OPENAI_API_KEY` | GPT-5 Codex |
 | `openai-compatible` | configurable | Groq, Ollama, Together AI, etc. |
 `auto` priority: Anthropic → Gemini → OpenAI → Groq.
-**Groq example:**
+**Groq (fast/free tier):**
 ```yaml
 reviewEngine:
   adapter: openai-compatible
@@ -161,44 +204,57 @@ reviewEngine:
     baseUrl: http://localhost:11434/v1
 ```
+---
 ## GitHub Actions
 ```yaml
 - uses: axledbetter/claude-autopilot/.github/actions/ci@main
   with:
-    openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+    anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
+    # Optional:
+    # post-comments: 'true'
+    # inline-comments: 'false'
+    # base-ref: 'main'
+    # sarif-output: 'guardrail.sarif'
+    # version: 'latest'
 ```
 Runs the pipeline, uploads SARIF to GitHub Code Scanning, annotates the PR diff inline.
-## SARIF Output
+---
-```bash
-npx autopilot run --format sarif --output autopilot.sarif
-```
+## Typical Team Workflow
-Compatible with `github/codeql-action/upload-sarif@v3`.
+```bash
+# 1. First run — establish a baseline so CI only fails on new issues
+npx guardrail run --base main
+npx guardrail baseline create --note "post-v2 audit"
+git add .guardrail-baseline.json && git commit -m "chore: guardrail baseline"
-## Snapshot Regression Testing
+# 2. CI — only new findings block the build
+npx guardrail ci --new-only --fail-on critical
-After each feature lands:
+# 3. Triage false positives once, never see them again
+npx guardrail triage sql-injection:src/db/raw.ts:47 false-positive --reason "internal admin only"
+git add .guardrail-triage.json && git commit -m "chore: triage false positive"
-```bash
-npx autopilot autoregress generate   # generate baselines for changed files
+# 4. Auto-fix and verify
+npx guardrail fix --yes   # applies patches + runs tests, reverts on failure
 ```
-Future PRs automatically fail if covered behavior diverges. The impact selector uses `git merge-base` diff + one-hop import graph expansion — only relevant snapshots run, keeping CI fast.
+---
-High-impact paths (`src/core/pipeline/**`, `src/adapters/**`, `src/core/findings/**`, `src/core/config/**`) always trigger a full run.
+## Interpreting Results
-## Public API
+**Exit 0** — pass or warnings only (at current `policy.failOn` threshold). Safe to merge.
+**Exit 1** — findings at or above threshold. Fix before merging.
-```typescript
-import type { Finding, RunResult, AutopilotConfig } from '@delegance/claude-autopilot';
-import { normalizeSnapshot } from '@delegance/claude-autopilot';
-```
+Findings: `critical` blocks merge · `warning` should fix · `note` informational.
-Types are available for TypeScript consumers. Runtime import requires a tsx-aware bundler (the package ships TypeScript source).
+PR comments show: status badge, phase table, critical/warning findings with inline links, cost footer. Re-runs update the existing comment in place.
+---
 ## Architecture
@@ -206,11 +262,13 @@ Four pluggable adapter points:
 | Point | Built-in | Purpose |
 |---|---|---|
-| `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM code review |
-| `vcs-host` | `github` | PR comments + SARIF upload |
-| `migration-runner` | `supabase` | DB migration execution |
+| `review-engine` | `auto`, `claude`, `gemini`, `codex`, `openai-compatible` | LLM review |
+| `vcs-host` | `github` | PR comments + SARIF |
+| `migration-runner` | `supabase` | DB migrations |
 | `review-bot-parser` | `cursor` | Parse review bot comments |
+**Monorepo:** Auto-detects npm/yarn/pnpm workspaces, Turborepo, and Nx.
 ## License
 MIT

package/bin/_launcher.js ADDED Viewed

@@ -0,0 +1,77 @@
+// Shared launcher for both `claude-autopilot` and `guardrail` bins.
+// Imported, not a bin itself. Resolves tsx, spawns src/cli/index.ts with
+// the caller's argv, forwards stdio, exits with the child's status.
+import { fileURLToPath } from 'node:url';
+import { spawnSync } from 'node:child_process';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as os from 'node:os';
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ENTRYPOINT = path.resolve(__dirname, '..', 'src', 'cli', 'index.ts');
+function findTsx() {
+  const own = path.resolve(__dirname, '..', 'node_modules', '.bin', 'tsx');
+  if (fs.existsSync(own)) return own;
+  const consumer = path.resolve(__dirname, '..', '..', '..', '.bin', 'tsx');
+  if (fs.existsSync(consumer)) return consumer;
+  return 'tsx';
+}
+// Tracks per-terminal-session whether the deprecation notice has been shown.
+// Uses a temp file keyed by parent PID + stderr's tty so parallel CI jobs don't
+// collide. Falls back to always-emit if the stamp can't be written.
+const DEPRECATION_STAMP_DIR = path.join(os.tmpdir(), 'claude-autopilot');
+function hasShownDeprecation() {
+  try {
+    if (!fs.existsSync(DEPRECATION_STAMP_DIR)) {
+      fs.mkdirSync(DEPRECATION_STAMP_DIR, { recursive: true });
+    }
+    const key = `${process.ppid}-${process.stderr.isTTY ? 'tty' : 'pipe'}.stamp`;
+    const stampPath = path.join(DEPRECATION_STAMP_DIR, key);
+    if (fs.existsSync(stampPath)) return true;
+    fs.writeFileSync(stampPath, String(Date.now()));
+    // Best-effort cleanup of stamps older than 1h to keep tmpdir tidy.
+    const cutoff = Date.now() - 60 * 60 * 1000;
+    for (const f of fs.readdirSync(DEPRECATION_STAMP_DIR)) {
+      const p = path.join(DEPRECATION_STAMP_DIR, f);
+      try {
+        if (fs.statSync(p).mtimeMs < cutoff) fs.unlinkSync(p);
+      } catch { /* ignore */ }
+    }
+    return false;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Decide whether to emit the deprecation notice. Order:
+ *   CLAUDE_AUTOPILOT_DEPRECATION=never   → never emit (CI/automation)
+ *   CLAUDE_AUTOPILOT_DEPRECATION=always  → always emit (deterministic testing)
+ *   otherwise                            → once per terminal session (stamp-based)
+ */
+function shouldEmitDeprecation() {
+  const override = process.env.CLAUDE_AUTOPILOT_DEPRECATION;
+  if (override === 'never') return false;
+  if (override === 'always') return true;
+  return !hasShownDeprecation();
+}
+/**
+ * Launch the CLI with `argv` passed through verbatim.
+ * @param {{ name: 'claude-autopilot' | 'guardrail' }} opts
+ */
+export function launch(opts) {
+  if (opts.name === 'guardrail' && shouldEmitDeprecation()) {
+    process.stderr.write(
+      '\x1b[33m[deprecated]\x1b[0m `guardrail` CLI is renamed to `claude-autopilot`. ' +
+      'The `guardrail` alias works through v5.x and will be removed in v6. ' +
+      'Migration guide: https://github.com/axledbetter/claude-autopilot/blob/master/docs/migration/v4-to-v5.md\n' +
+      'Silence: set CLAUDE_AUTOPILOT_DEPRECATION=never\n',
+    );
+  }
+  const result = spawnSync(findTsx(), [ENTRYPOINT, ...process.argv.slice(2)], { stdio: 'inherit' });
+  process.exit(result.status ?? 1);
+}

package/bin/claude-autopilot.js ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+import { launch } from './_launcher.js';
+launch({ name: 'claude-autopilot' });

package/bin/guardrail.js ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+import { launch } from './_launcher.js';
+launch({ name: 'guardrail' });

package/package.json CHANGED Viewed

@@ -1,15 +1,18 @@
 {
   "name": "@delegance/claude-autopilot",
-  "version": "2.5.0",
+  "version": "5.0.0-alpha.1",
   "type": "module",
-  "description": "Claude Code automation pipeline: spec → plan → implement → validate → PR",
+  "description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
   "keywords": [
-    "claude",
+    "claude-autopilot",
     "autopilot",
-    "ai",
-    "pipeline",
+    "claude-code",
+    "ai-agent",
     "code-review",
-    "cli"
+    "llm",
+    "sarif",
+    "cli",
+    "pipeline"
   ],
   "license": "MIT",
   "repository": {
@@ -20,7 +23,8 @@
     "node": ">=22.0.0"
   },
   "bin": {
-    "autopilot": "bin/autopilot.js"
+    "claude-autopilot": "bin/claude-autopilot.js",
+    "guardrail": "bin/guardrail.js"
   },
   "types": "./src/index.ts",
   "exports": {
@@ -35,6 +39,7 @@
     "skills/",
     "scripts/test-runner.mjs",
     "scripts/autoregress.ts",
+    "scripts/snapshots/",
     "tests/snapshots/",
     "CHANGELOG.md"
   ],
@@ -47,6 +52,7 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.90.0",
     "@google/generative-ai": "^0.24.1",
+    "@modelcontextprotocol/sdk": "^1.29.0",
     "ajv": "^8",
     "dotenv": ">=16",
     "js-yaml": "^4",
@@ -56,7 +62,7 @@
   },
   "devDependencies": {
     "@types/js-yaml": "^4",
-    "@types/node": "^22",
-    "typescript": "^5"
+    "@types/node": "^25",
+    "typescript": "^6"
   }
 }

package/presets/generic/guardrail.config.yaml ADDED Viewed

@@ -0,0 +1,35 @@
+configVersion: 1
+reviewEngine: { adapter: auto }
+vcsHost: { adapter: github }
+reviewBot: { adapter: cursor }
+# No migrationRunner — the migrate phase will no-op with a notice pointing
+# to `.claude-autopilot/stack.yaml` for users who want to wire one up.
+protectedPaths:
+  - "**/auth/**"
+  - "**/payment/**"
+  - "**/encryption/**"
+  - "**/secret/**"
+  - "**/keys/**"
+staticRules:
+  - hardcoded-secrets
+  - npm-audit
+  - package-lock-sync
+  - sql-injection
+  - missing-auth
+  - ssrf
+  - insecure-redirect
+policy:
+  failOn: critical
+  newOnly: false
+thresholds:
+  bugbotAutoFix: 85
+  bugbotProposePatch: 60
+  maxValidateRetries: 3
+reviewStrategy: auto
+chunking:
+  smallTierMaxTokens: 8000
+  partialReviewTokens: 60000
+  perFileMaxTokens: 32000
+pipeline:
+  runReviewOnStaticFail: true
+  runReviewOnTestFail: false

package/presets/generic/stack.md ADDED Viewed

@@ -0,0 +1,40 @@
+A generic project with no strong framework signals detected.
+This preset makes **no assumptions** about:
+- Database engine or migration runner
+- Type generation
+- Test framework (uses whatever `npm test` / `npm run typecheck` / `npm run lint` find)
+- Deployment target
+It enables the core security rules that apply to most codebases — hardcoded secrets, npm audit, SQL injection patterns, missing auth checks, SSRF, insecure redirects.
+## What's disabled vs stack-specific presets
+- `supabase-rls-bypass` rule (Supabase-only)
+- `schema-alignment` rule (requires declared migration paths)
+- `migrate` phase of the pipeline no-ops with a notice
+## Wiring up migrations
+If your project uses migrations, create `.claude-autopilot/stack.yaml` with:
+```yaml
+migrate:
+  command: "prisma migrate dev"      # or flyway, dbmate, tbls, golang-migrate, etc.
+  environments: [dev, staging, prod]
+  typeGeneration:
+    command: "prisma generate"
+    path: "node_modules/.prisma/client"
+```
+Or pick a stack-specific preset at setup time: `claude-autopilot init --preset nextjs-supabase`.
+## Things that should flag CRITICAL (universal)
+- Secrets committed to code or history
+- SQL string concatenation with user input
+- POST endpoints without auth checks
+- SSRF via user-controlled URLs in `fetch` / `axios`
+- Open redirects (user-controlled `Location` header)
+- Dynamic code evaluation (`eval`, `Function` constructor) with user input
+- Shell command construction with user input