@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/README.md +169 -106
  3. package/bin/_launcher.js +77 -0
  4. package/bin/claude-autopilot.js +3 -0
  5. package/bin/guardrail.js +3 -0
  6. package/package.json +23 -9
  7. package/presets/generic/guardrail.config.yaml +35 -0
  8. package/presets/generic/stack.md +40 -0
  9. package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
  10. package/scripts/autoregress.ts +27 -11
  11. package/skills/autopilot/SKILL.md +170 -0
  12. package/skills/claude-autopilot.md +80 -0
  13. package/skills/guardrail.md +39 -0
  14. package/skills/migrate/SKILL.md +83 -0
  15. package/src/adapters/council/claude.ts +41 -0
  16. package/src/adapters/council/openai.ts +40 -0
  17. package/src/adapters/council/types.ts +7 -0
  18. package/src/adapters/loader.ts +7 -7
  19. package/src/adapters/review-engine/auto.ts +2 -2
  20. package/src/adapters/review-engine/claude.ts +9 -11
  21. package/src/adapters/review-engine/codex.ts +9 -11
  22. package/src/adapters/review-engine/gemini.ts +9 -11
  23. package/src/adapters/review-engine/openai-compatible.ts +10 -12
  24. package/src/adapters/review-engine/parse-output.ts +32 -6
  25. package/src/adapters/review-engine/prompt-builder.ts +19 -0
  26. package/src/adapters/review-engine/types.ts +1 -1
  27. package/src/adapters/vcs-host/commit-status.ts +39 -0
  28. package/src/adapters/vcs-host/github.ts +2 -2
  29. package/src/cli/baseline.ts +125 -0
  30. package/src/cli/ci.ts +11 -8
  31. package/src/cli/costs.ts +2 -2
  32. package/src/cli/council.ts +96 -0
  33. package/src/cli/detector.ts +21 -5
  34. package/src/cli/explain.ts +197 -0
  35. package/src/cli/fix.ts +173 -111
  36. package/src/cli/hook.ts +72 -27
  37. package/src/cli/ignore-helper.ts +116 -0
  38. package/src/cli/index.ts +355 -31
  39. package/src/cli/init.ts +12 -12
  40. package/src/cli/lsp.ts +200 -0
  41. package/src/cli/mcp.ts +206 -0
  42. package/src/cli/pr-comment.ts +5 -5
  43. package/src/cli/pr-desc.ts +168 -0
  44. package/src/cli/pr-review-comments.ts +3 -3
  45. package/src/cli/pr.ts +76 -0
  46. package/src/cli/preflight.ts +109 -32
  47. package/src/cli/report.ts +186 -0
  48. package/src/cli/run.ts +140 -36
  49. package/src/cli/scan.ts +233 -0
  50. package/src/cli/setup.ts +121 -15
  51. package/src/cli/test-gen.ts +125 -0
  52. package/src/cli/triage.ts +137 -0
  53. package/src/cli/watch.ts +52 -31
  54. package/src/cli/worker.ts +109 -0
  55. package/src/core/cache/review-cache.ts +2 -2
  56. package/src/core/chunking/index.ts +2 -2
  57. package/src/core/config/loader.ts +10 -10
  58. package/src/core/config/preset-resolver.ts +6 -6
  59. package/src/core/config/schema.ts +103 -2
  60. package/src/core/config/types.ts +57 -2
  61. package/src/core/council/config.ts +71 -0
  62. package/src/core/council/context.ts +17 -0
  63. package/src/core/council/runner.ts +83 -0
  64. package/src/core/council/types.ts +45 -0
  65. package/src/core/detect/llm-key.ts +89 -0
  66. package/src/core/detect/workspaces.ts +103 -0
  67. package/src/core/errors.ts +4 -4
  68. package/src/core/fix/generator.ts +149 -0
  69. package/src/core/ignore/index.ts +4 -4
  70. package/src/core/mcp/concurrency.ts +16 -0
  71. package/src/core/mcp/handlers/fix-finding.ts +126 -0
  72. package/src/core/mcp/handlers/get-capabilities.ts +62 -0
  73. package/src/core/mcp/handlers/get-findings.ts +36 -0
  74. package/src/core/mcp/handlers/review-diff.ts +65 -0
  75. package/src/core/mcp/handlers/scan-files.ts +65 -0
  76. package/src/core/mcp/handlers/validate-fix.ts +41 -0
  77. package/src/core/mcp/run-store.ts +85 -0
  78. package/src/core/mcp/workspace.ts +35 -0
  79. package/src/core/persist/baseline.ts +112 -0
  80. package/src/core/persist/cost-log.ts +1 -1
  81. package/src/core/persist/findings-cache.ts +1 -1
  82. package/src/core/persist/triage.ts +112 -0
  83. package/src/core/phases/static-rules.ts +18 -5
  84. package/src/core/pipeline/review-phase.ts +65 -26
  85. package/src/core/pipeline/run.ts +42 -10
  86. package/src/core/runtime/lock.ts +2 -2
  87. package/src/core/runtime/state.ts +2 -2
  88. package/src/core/schema-alignment/detector.ts +59 -0
  89. package/src/core/schema-alignment/extractor/index.ts +24 -0
  90. package/src/core/schema-alignment/extractor/prisma.ts +21 -0
  91. package/src/core/schema-alignment/extractor/sql.ts +99 -0
  92. package/src/core/schema-alignment/llm-check.ts +91 -0
  93. package/src/core/schema-alignment/scanner.ts +107 -0
  94. package/src/core/schema-alignment/types.ts +43 -0
  95. package/src/core/shell.ts +3 -3
  96. package/src/core/static-rules/registry.ts +17 -8
  97. package/src/core/static-rules/rules/brand-tokens.ts +145 -0
  98. package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
  99. package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
  100. package/src/core/static-rules/rules/missing-auth.ts +70 -0
  101. package/src/core/static-rules/rules/schema-alignment.ts +132 -0
  102. package/src/core/static-rules/rules/sql-injection.ts +71 -0
  103. package/src/core/static-rules/rules/ssrf.ts +63 -0
  104. package/src/core/static-rules/tailwind-extractor.ts +38 -0
  105. package/src/core/test-gen/coverage-analyzer.ts +93 -0
  106. package/src/core/test-gen/framework-detector.ts +21 -0
  107. package/src/core/test-gen/test-writer.ts +33 -0
  108. package/src/core/ui/design-context-loader.ts +87 -0
  109. package/src/core/worker/client.ts +46 -0
  110. package/src/core/worker/lockfile.ts +38 -0
  111. package/src/core/worker/server.ts +81 -0
  112. package/src/formatters/junit.ts +52 -0
  113. package/src/formatters/sarif.ts +2 -2
  114. package/src/index.ts +1 -2
  115. package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
  116. package/tests/snapshots/index.json +3 -3
  117. package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
  118. package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
  119. package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
  120. package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
  121. package/bin/autopilot.js +0 -20
  122. package/skills/autopilot.md +0 -157
  123. /package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  124. /package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  125. /package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  126. /package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
  127. /package/{src → scripts}/snapshots/impact-selector.ts +0 -0
  128. /package/{src → scripts}/snapshots/import-scanner.ts +0 -0
  129. /package/{src → scripts}/snapshots/serializer.ts +0 -0
@@ -0,0 +1,40 @@
1
+ A generic project with no strong framework signals detected.
2
+
3
+ This preset makes **no assumptions** about:
4
+ - Database engine or migration runner
5
+ - Type generation
6
+ - Test framework (uses whatever `npm test` / `npm run typecheck` / `npm run lint` find)
7
+ - Deployment target
8
+
9
+ It enables the core security rules that apply to most codebases — hardcoded secrets, npm audit, SQL injection patterns, missing auth checks, SSRF, insecure redirects.
10
+
11
+ ## What's disabled vs stack-specific presets
12
+
13
+ - `supabase-rls-bypass` rule (Supabase-only)
14
+ - `schema-alignment` rule (requires declared migration paths)
15
+ - `migrate` phase of the pipeline no-ops with a notice
16
+
17
+ ## Wiring up migrations
18
+
19
+ If your project uses migrations, create `.claude-autopilot/stack.yaml` with:
20
+
21
+ ```yaml
22
+ migrate:
23
+ command: "prisma migrate dev" # or flyway, dbmate, tbls, golang-migrate, etc.
24
+ environments: [dev, staging, prod]
25
+ typeGeneration:
26
+ command: "prisma generate"
27
+ path: "node_modules/.prisma/client"
28
+ ```
29
+
30
+ Or pick a stack-specific preset at setup time: `claude-autopilot init --preset nextjs-supabase`.
31
+
32
+ ## Things that should flag CRITICAL (universal)
33
+
34
+ - Secrets committed to code or history
35
+ - SQL string concatenation with user input
36
+ - POST endpoints without auth checks
37
+ - SSRF via user-controlled URLs in `fetch` / `axios`
38
+ - Open redirects (user-controlled `Location` header)
39
+ - Dynamic code evaluation (`eval`, `Function` constructor) with user input
40
+ - Shell command construction with user input
@@ -18,6 +18,13 @@ staticRules:
18
18
  - npm-audit
19
19
  - package-lock-sync
20
20
  - supabase-rls-bypass
21
+ - sql-injection
22
+ - missing-auth
23
+ - ssrf
24
+ - insecure-redirect
25
+ policy:
26
+ failOn: critical
27
+ newOnly: false
21
28
  thresholds:
22
29
  bugbotAutoFix: 85
23
30
  bugbotProposePatch: 60
@@ -3,11 +3,11 @@
3
3
  import * as fs from 'node:fs';
4
4
  import * as path from 'node:path';
5
5
  import * as os from 'node:os';
6
- import { execSync, spawnSync } from 'node:child_process';
6
+ import { spawnSync } from 'node:child_process';
7
7
  import { fileURLToPath } from 'node:url';
8
- import { selectSnapshots } from '../src/snapshots/impact-selector.ts';
8
+ import { selectSnapshots } from './snapshots/impact-selector.ts';
9
9
  import OpenAI from 'openai';
10
- import { buildImportMap } from '../src/snapshots/import-scanner.ts';
10
+ import { buildImportMap } from './snapshots/import-scanner.ts';
11
11
 
12
12
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
13
13
  const ROOT = path.resolve(__dirname, '..');
@@ -38,11 +38,15 @@ export function diffBaselines(baselineJson: string, currentJson: string): string
38
38
 
39
39
  function getChangedFiles(since?: string): string[] | null {
40
40
  try {
41
- const base = since
42
- ? since
43
- : execSync('git merge-base origin/main HEAD', { cwd: ROOT }).toString().trim();
44
- const out = execSync(`git diff ${base} HEAD --name-only`, { cwd: ROOT }).toString();
45
- return out.trim().split('\n').filter(Boolean);
41
+ let base = since;
42
+ if (!base) {
43
+ const r = spawnSync('git', ['merge-base', 'origin/main', 'HEAD'], { cwd: ROOT, encoding: 'utf8' });
44
+ if (r.status !== 0) return null;
45
+ base = r.stdout.trim();
46
+ }
47
+ const r = spawnSync('git', ['diff', base, 'HEAD', '--name-only'], { cwd: ROOT, encoding: 'utf8' });
48
+ if (r.status !== 0) return null;
49
+ return r.stdout.trim().split('\n').filter(Boolean);
46
50
  } catch { return null; }
47
51
  }
48
52
 
@@ -146,8 +150,17 @@ function cmdUpdate(args: string[]): number {
146
150
  continue;
147
151
  }
148
152
  process.stdout.write(` ${snap} ... `);
153
+ const slug2 = path.basename(snap, '.snap.ts');
154
+ const baselinePath = path.join(BASELINES_DIR, `${slug2}.json`);
155
+ const beforeMtime = fs.existsSync(baselinePath) ? fs.statSync(baselinePath).mtimeMs : 0;
149
156
  runSnapshot(snap, true);
150
- console.log('updated');
157
+ const captured = fs.existsSync(baselinePath) && fs.statSync(baselinePath).mtimeMs > beforeMtime;
158
+ if (captured) {
159
+ console.log('updated');
160
+ } else {
161
+ console.error('CAPTURE FAILED (baseline not written)');
162
+ failed++;
163
+ }
151
164
  }
152
165
  return failed > 0 ? 1 : 0;
153
166
  }
@@ -167,7 +180,7 @@ Write a snapshot test file. Requirements:
167
180
  // @source-commit: {sourceCommit}
168
181
  // @generator-version: {version}
169
182
  2. Import the module's exported functions under test
170
- 3. Import { normalizeSnapshot } from '../../src/snapshots/serializer.ts'
183
+ 3. Import { normalizeSnapshot } from '../../scripts/snapshots/serializer.ts'
171
184
  4. Import fs from 'node:fs', describe/it from 'node:test', assert from 'node:assert/strict'
172
185
  5. Baseline loading pattern (use slug {slug}):
173
186
  const SLUG = '{slug}';
@@ -219,7 +232,10 @@ async function cmdGenerate(args: string[]): Promise<number> {
219
232
 
220
233
  const client = new OpenAI({ apiKey });
221
234
  let sourceCommit = 'unknown';
222
- try { sourceCommit = execSync('git rev-parse --short HEAD', { cwd: ROOT }).toString().trim(); } catch {}
235
+ try {
236
+ const r = spawnSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ROOT, encoding: 'utf8' });
237
+ if (r.status === 0) sourceCommit = r.stdout.trim();
238
+ } catch {}
223
239
  const generatedAt = new Date().toISOString();
224
240
 
225
241
  for (const srcFile of srcFiles) {
@@ -0,0 +1,170 @@
1
+ ---
2
+ name: autopilot
3
+ description: After spec approval, automatically execute the full pipeline — plan → implement → migrate → validate → PR → Codex review. No manual intervention required.
4
+ ---
5
+
6
+ # Autopilot — Spec to PR Pipeline
7
+
8
+ After the user approves a spec during brainstorming, this skill runs the full pipeline automatically.
9
+
10
+ ## Prerequisites
11
+
12
+ - Approved spec file at `docs/superpowers/specs/YYYY-MM-DD-<topic>-design.md`
13
+ - Superpowers plugin installed (`writing-plans`, `using-git-worktrees`, `subagent-driven-development`)
14
+ - Scripts installed and dependencies present (run step 0 preflight to verify)
15
+
16
+ ## CRITICAL: Do Not Pause
17
+
18
+ **Run the entire pipeline without stopping.** Do NOT:
19
+ - Ask "want me to continue?" between steps
20
+ - Show intermediate results or ask for confirmation
21
+ - Pause to report progress mid-pipeline
22
+ - Wait for user input between any steps
23
+
24
+ The ONLY time you stop is if a step **fails and cannot be recovered**. Otherwise, execute all steps sequentially and report ONCE at the end (Step 9).
25
+
26
+ Brief status lines like `[autopilot] Step 3: Executing plan...` are fine. Full summaries, questions, or check-ins are not.
27
+
28
+ ## Pipeline
29
+
30
+ Execute these steps in order. Do NOT pause between steps unless a step fails.
31
+
32
+ ### Step 0: Preflight
33
+
34
+ ```bash
35
+ npx tsx scripts/preflight.ts
36
+ ```
37
+
38
+ If any check **fails** (red ✗): stop and tell the user what to fix before continuing.
39
+ If checks only **warn** (yellow !): proceed — degraded steps will be noted in the final report.
40
+ If all pass: continue immediately, no user interaction needed.
41
+
42
+ ### Step 1: Write Implementation Plan
43
+
44
+ ```
45
+ Invoke: superpowers:writing-plans
46
+ Input: The approved spec file
47
+ Output: Plan at docs/superpowers/plans/YYYY-MM-DD-<topic>.md
48
+ ```
49
+
50
+ Commit the plan. Do NOT ask the user for execution choice — always use subagent-driven development.
51
+
52
+ ### Step 2: Set Up Worktree
53
+
54
+ ```
55
+ Invoke: superpowers:using-git-worktrees
56
+ Branch: feature/<topic-slug>
57
+ ```
58
+
59
+ After the worktree is created, symlink the local env file into it so scripts
60
+ (validate, Codex review, migrate) can read secrets:
61
+
62
+ ```bash
63
+ # Detect which env file the project uses
64
+ ENV_FILE=$(ls .env.local .env.dev .env.development .env 2>/dev/null | head -1)
65
+ if [ -n "$ENV_FILE" ]; then
66
+ ln -sf "$(pwd)/$ENV_FILE" ".claude/worktrees/<branch>/$ENV_FILE"
67
+ fi
68
+ ```
69
+
70
+ If no env file is found, note it in the preflight output (step 0 will have caught this).
71
+
72
+ ### Step 3: Execute Plan
73
+
74
+ ```
75
+ Invoke: superpowers:subagent-driven-development
76
+ Input: The plan file
77
+ Mode: dispatch fresh subagent per task
78
+ ```
79
+
80
+ For each task:
81
+ - Dispatch implementer subagent
82
+ - On completion: verify commit landed in worktree
83
+ - Skip formal spec/quality review to maintain speed (the validate step catches issues)
84
+ - If subagent fails to write to worktree: implement directly
85
+
86
+ ### Step 4: Auto-Migrate
87
+
88
+ For any `.sql` files created in `data/deltas/` during implementation:
89
+
90
+ ```bash
91
+ /migrate
92
+ ```
93
+
94
+ Run against **dev only** by default. Stop after dev succeeds and continue the pipeline.
95
+
96
+ Only promote to QA → prod if the user has explicitly enabled it (e.g., `AUTOPILOT_ALLOW_PROD_MIGRATIONS=true` in their env) or asked for it directly. Production migrations are irreversible — never auto-promote without a clear signal.
97
+
98
+ If migration fails, fix the SQL and retry (max 2 retries). If it still fails, stop and report.
99
+
100
+ ### Step 5: Validate
101
+
102
+ ```bash
103
+ npx tsx scripts/validate.ts --commit-autofix --allow-dirty
104
+ ```
105
+
106
+ If FAIL:
107
+ - Read the validation report at `.claude/validation-report.json`
108
+ - Fix the blocking issues
109
+ - Re-run validate
110
+ - Max 3 retry iterations
111
+
112
+ If PASS: proceed to PR.
113
+
114
+ ### Step 6: Push + Create PR
115
+
116
+ ```bash
117
+ git push -u origin <branch>
118
+ gh pr create --title "<concise title>" --body "<generated PR body with spec link, test plan>"
119
+ ```
120
+
121
+ ### Step 7: Codex PR Review
122
+
123
+ ```bash
124
+ npx tsx scripts/codex-pr-review.ts <pr-number>
125
+ ```
126
+
127
+ Posts Codex review as a GitHub PR comment. If critical findings:
128
+ - Fix them on the branch
129
+ - Push
130
+ - Re-run Codex review
131
+ - Max 2 iterations
132
+
133
+ ### Step 8: Bugbot Triage + Fix
134
+
135
+ Wait 60 seconds for Cursor bugbot to post comments, then:
136
+
137
+ ```bash
138
+ npx tsx scripts/bugbot.ts --pr <pr-number>
139
+ ```
140
+
141
+ Triages each finding (real bug vs false positive), auto-fixes real bugs, dismisses false positives with GitHub replies. If fixes applied:
142
+ - Push
143
+ - Wait for new bugbot comments (30s)
144
+ - Re-run /bugbot
145
+ - Max 3 rounds
146
+
147
+ ### Step 9: Report
148
+
149
+ Tell the user:
150
+ - PR URL
151
+ - Test count
152
+ - Validation verdict
153
+ - Codex review summary
154
+ - Bugbot triage summary (fixed / dismissed / needs-human)
155
+ - Any human-required items that couldn't be auto-fixed
156
+
157
+ ## Error Recovery
158
+
159
+ - **Subagent failure:** Re-dispatch with more context or implement directly
160
+ - **Migration failure:** Fix SQL, re-run /migrate
161
+ - **Validate failure:** Fix issues, re-run (max 3 retries)
162
+ - **Codex critical findings:** Fix, push, re-review (max 2 retries)
163
+ - **Bugbot findings:** /bugbot handles triage + fix automatically (max 3 rounds)
164
+ - **Unrecoverable error:** Stop, report what was completed, show remaining work
165
+
166
+ ## When NOT to Use
167
+
168
+ - During brainstorming (this runs AFTER spec approval)
169
+ - For hotfixes (too heavy — just commit and push)
170
+ - When the user wants manual control over each step
@@ -0,0 +1,80 @@
1
+ ---
2
+ name: claude-autopilot
3
+ description: Autonomous development pipeline — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Use when the user asks to "ship", "implement", "build", or "autopilot" a feature that's past the idea stage. Runs end-to-end without pausing for check-ins.
4
+ ---
5
+
6
+ # claude-autopilot — Agent Loop
7
+
8
+ This skill drives the full claude-autopilot pipeline when a user asks Claude to ship a feature. It is an *agent loop*, not a CLI reference — the commands it invokes are an implementation detail. The skill's job is to decide which phase applies, when to pause for user approval, and when to recover from a failed phase.
9
+
10
+ ## When to invoke
11
+
12
+ - User says "ship X", "implement X", "build X", "autopilot X", or hands Claude a spec and says "go"
13
+ - User approved a spec during `/brainstorm` and the next step is implementation
14
+ - User is resuming a paused pipeline after fixing a failed phase by hand
15
+
16
+ ## When NOT to invoke
17
+
18
+ - User is still in discovery ("help me think through X") — invoke `brainstorming` first
19
+ - User wants Claude to run one specific phase only (they'll invoke that skill directly — `migrate`, `review`, `triage`, etc.)
20
+ - User is hot-fixing a bug — too heavy, just edit and push
21
+
22
+ ## The pipeline
23
+
24
+ Each phase writes its output to disk. Claude can stop, the user can edit the artifact, and Claude can resume from that phase without re-running earlier ones.
25
+
26
+ | Phase | Artifact | What Claude does | When it stops |
27
+ |---|---|---|---|
28
+ | **Brainstorm** | `docs/specs/YYYY-MM-DD-<topic>-design.md` | Invokes `brainstorming` skill to turn idea into reviewed spec | When spec is committed + user approves |
29
+ | **Spec review** | PR comment or inline notes | Invokes `codex-review` skill against the spec file | After one round unless criticals found |
30
+ | **Plan** | `docs/plans/YYYY-MM-DD-<topic>.md` | Invokes `writing-plans` to break spec into phases | When plan is committed |
31
+ | **Plan review** | Inline notes | Invokes `codex-review` skill against the plan | After one round unless criticals found |
32
+ | **Branch** | git worktree at `.claude/worktrees/<slug>` or branch on HEAD | Invokes `using-git-worktrees` or cuts branch directly | When branch exists |
33
+ | **Implement** | Git commits on the branch | Invokes `subagent-driven-development`, one subagent per plan phase | When all plan phases have landing commits |
34
+ | **Migrate** | SQL deltas applied | Invokes `migrate` skill if DB migrations exist in the branch; skips otherwise | When all environments (dev → QA → prod) are in sync |
35
+ | **Validate** | `.claude/validation-report.json` | Runs static rules + tests + typecheck + LLM review via `claude-autopilot run` | When validation passes or after 3 failed retries |
36
+ | **PR** | GitHub PR number | Invokes `commit-push-pr` or runs `gh pr create` directly | When PR is open |
37
+ | **PR review** | PR comment | Invokes `review-2pass` or `codex-pr-review` against the PR | After one round unless criticals found |
38
+ | **Triage** | Bugbot thread replies + follow-up commits | Invokes `bugbot` skill to triage reviewer findings | When all HIGH severity items are resolved or human-dismissed |
39
+
40
+ ## Core rules
41
+
42
+ 1. **Do not pause mid-pipeline.** Once past the Brainstorm gate (which is inherently interactive), execute phases end-to-end. Do not ask "want me to continue?" between phases. Do not show intermediate reports. The user gets one report at the end.
43
+ 2. **Each phase's artifact is the source of truth for the next.** If the plan file changes between phases, the implementation uses the new plan. Claude does not keep phase outputs in memory — re-read from disk.
44
+ 3. **Failure in a phase triggers recovery, not pause.**
45
+ - Migration fails → fix the SQL, re-run.
46
+ - Validation fails → read the report, fix the blockers, re-run (max 3 attempts).
47
+ - PR review finds criticals → fix on branch, push, re-review (max 2 rounds).
48
+ - Bugbot finds real bugs → fix, push, re-triage (max 3 rounds).
49
+ - Unrecoverable failure → stop, report what completed, show what remains.
50
+ 4. **Codex review is part of the loop, not optional.** The pipeline explicitly dispatches to `gpt-5.3-codex` for spec review, plan review, and PR review. This is the multi-model moat — don't skip it.
51
+ 5. **Skills are swappable.** `review-2pass` and `council` are alternative review phases — a user can configure which runs. The pipeline doesn't hardcode Claude or Codex.
52
+
53
+ ## Phase outputs
54
+
55
+ Every phase writes to a predictable path. If Claude crashes or the user stops the pipeline, the resume point is "whatever's the newest unfinished artifact."
56
+
57
+ ```
58
+ docs/
59
+ ├── specs/YYYY-MM-DD-<topic>-design.md # from Brainstorm
60
+ ├── plans/YYYY-MM-DD-<topic>.md # from Plan
61
+ └── reviews/<PR>-codex.md # from PR review (optional)
62
+ .claude/
63
+ ├── validation-report.json # from Validate
64
+ └── bugbot-state.json # from Triage
65
+ ```
66
+
67
+ ## Recovery
68
+
69
+ - **Resume mid-pipeline.** User runs `/autopilot` after fixing a failed phase. Claude reads the newest artifacts, skips completed phases, starts from the first incomplete one.
70
+ - **Skip a phase.** `/autopilot --skip migrate` — useful when the pipeline auto-detection is wrong (no migrations exist but the skill wants to run).
71
+ - **Rewire a phase.** User edits `.claude/skills/autopilot/SKILL.md` to swap `review-2pass` for `council`. Claude picks up the change on next invocation — skill is the config.
72
+
73
+ ## Why this skill exists separately from CLI subcommands
74
+
75
+ The CLI subcommands (`claude-autopilot run`, `claude-autopilot migrate`, etc.) are imperative — each does one thing. This skill is declarative — it describes the pipeline's *loop invariants* (phase order, artifact paths, recovery rules, when to pause). Claude reads this skill to decide *which* CLI subcommand to run *next*. Users who want to run one phase by hand use the CLI; users who want Claude to drive the whole pipeline invoke this skill.
76
+
77
+ See also:
78
+ - `skills/autopilot/SKILL.md` — detailed step-by-step runbook (deprecated alias for this file in v5; retained for back-compat)
79
+ - `skills/migrate/SKILL.md` — migrate phase runbook
80
+ - `skills/guardrail.md` — review phase alias (legacy; use `review` subcommand directly)
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: guardrail
3
+ description: (Legacy alias) LLM-powered code review — runs static rules + LLM review over git-changed files. As of v5, this is the review *phase* of claude-autopilot. Invoke via `claude-autopilot run` or the full pipeline via `claude-autopilot` skill.
4
+ ---
5
+
6
+ # guardrail — review phase (legacy alias)
7
+
8
+ As of `@delegance/claude-autopilot@5.0.0`, `guardrail` is the review phase of the full pipeline, not a standalone product. This skill is preserved as a back-compat alias for Claude Code agents that were configured against v4.x.
9
+
10
+ **For new configurations**, invoke `skills/claude-autopilot.md` to drive the full pipeline, or use the flat review subcommands (`run`, `scan`, `ci`, etc.) for just the review phase. Grouped syntax (`claude-autopilot review <verb>`) lands in alpha.2.
11
+
12
+ ## What it does
13
+
14
+ Static rules (`hardcoded-secrets`, `sql-injection`, `missing-auth`, `ssrf`, `insecure-redirect`, `npm-audit`, `package-lock-sync`, `console-log`, `todo-fixme`, `large-file`, `missing-tests`, `brand-tokens`, `schema-alignment`) run first, then an LLM reviewer (`claude`, `codex`, `gemini`, or `openai-compatible`) gets the code with context. Output is SARIF / JUnit / inline PR comments.
15
+
16
+ ## When to use
17
+
18
+ - Before creating a PR — `claude-autopilot run --base main`
19
+ - To audit a path without git changes — `claude-autopilot scan src/auth/`
20
+ - To ask a targeted question — `claude-autopilot scan --ask "is there an IDOR here?" src/api/`
21
+ - Inside CI — `claude-autopilot ci`
22
+ - Dev loop — `claude-autopilot watch`
23
+
24
+ ## Legacy commands that still work
25
+
26
+ All v4 `guardrail <cmd>` invocations work unchanged through v5.x. A one-line deprecation notice prints on first invocation per terminal session. Migration guide: `docs/migration/v4-to-v5.md`.
27
+
28
+ ```bash
29
+ guardrail run --base main # still works — equivalent to `claude-autopilot run --base main`
30
+ guardrail scan src/auth/ # still works
31
+ guardrail ci # still works
32
+ ```
33
+
34
+ ## What changed in v5
35
+
36
+ - `guardrail` is now one phase of a pipeline, not a standalone product.
37
+ - The full pipeline runs via the `claude-autopilot` skill or `claude-autopilot` CLI.
38
+ - Review subcommands remain flat in alpha.1 (`run`, `scan`, `ci`, `explain`, `fix`, `baseline`). The grouped `claude-autopilot review <verb>` syntax arrives in alpha.2 as an alias — flat forms will continue to work.
39
+ - The package is `@delegance/claude-autopilot` — the old `@delegance/guardrail` will be a thin tombstone forwarding to the new package in v5.0.0 GA.
@@ -0,0 +1,83 @@
1
+ ---
2
+ name: migrate
3
+ description: Run database migrations against Supabase environments (dev → QA → prod). Validates SQL, executes with ledger tracking, and auto-generates types/supabase.ts.
4
+ ---
5
+
6
+ # Database Migration
7
+
8
+ Run a migration through the dev → QA → prod pipeline with validation at each step.
9
+
10
+ ## Usage
11
+
12
+ ### 1. Identify the migration file
13
+
14
+ If given as argument, use that. Otherwise find the most recently modified `.sql` file in `data/deltas/`.
15
+
16
+ ### 2. Validate (dry run on dev)
17
+
18
+ ```bash
19
+ npx tsx scripts/supabase/migrate.ts <file> --env dev --dry-run
20
+ ```
21
+
22
+ Present validation results. If errors, help the user fix them before proceeding.
23
+
24
+ ### 3. Run on dev
25
+
26
+ ```bash
27
+ npx tsx scripts/supabase/migrate.ts <file> --env dev
28
+ ```
29
+
30
+ ### 4. Ask the user
31
+
32
+ > "Migration succeeded on dev. `types/supabase.ts` updated. Promote to QA?"
33
+
34
+ ### 5. Run on QA
35
+
36
+ ```bash
37
+ npx tsx scripts/supabase/migrate.ts --promote qa
38
+ ```
39
+
40
+ ### 6. Ask the user
41
+
42
+ > "Migration succeeded on QA. Promote to prod?"
43
+
44
+ ### 7. Run on prod
45
+
46
+ ```bash
47
+ npx tsx scripts/supabase/migrate.ts --promote prod --confirm-prod
48
+ ```
49
+
50
+ ### 8. Commit
51
+
52
+ After all environments are done, commit the updated `types/supabase.ts` and the migration file:
53
+
54
+ ```bash
55
+ git add types/supabase.ts data/deltas/<migration-file>
56
+ git commit -m "feat: <description of schema change>"
57
+ ```
58
+
59
+ ## Flags
60
+
61
+ | Flag | Purpose |
62
+ |------|---------|
63
+ | `--env dev\|qa\|prod` | Target environment |
64
+ | `--dry-run` | Validate only, don't execute |
65
+ | `--force` | Allow destructive operations (DROP, TRUNCATE) |
66
+ | `--confirm-prod` | Required for prod execution |
67
+ | `--promote qa\|prod` | Run missing migrations from source env |
68
+
69
+ ## Validation Checks
70
+
71
+ The system validates before every execution:
72
+ - Duplicate table/column detection
73
+ - snake_case naming enforcement
74
+ - RLS + policy required for every new table
75
+ - Destructive operation blocking (unless --force)
76
+ - Cross-env prerequisite verification
77
+ - Checksum integrity (modified files are rejected)
78
+ - Promotion chain enforcement (prod requires QA first)
79
+
80
+ ## Requirements
81
+
82
+ - `.claude/supabase-envs.json` with `dbUrl` for each env (gitignored)
83
+ - `postgres` npm package installed
@@ -0,0 +1,41 @@
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+ import { GuardrailError } from '../../core/errors.ts';
3
+ import { classifyError } from '../review-engine/prompt-builder.ts';
4
+ import type { CouncilAdapter } from './types.ts';
5
+
6
+ const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
7
+ const MAX_OUTPUT_TOKENS = 2048;
8
+
9
+ export function makeClaudeCouncilAdapter(model: string, label: string): CouncilAdapter {
10
+ return {
11
+ label,
12
+ async consult(prompt: string, context: string): Promise<string> {
13
+ const apiKey = process.env.ANTHROPIC_API_KEY;
14
+ if (!apiKey) {
15
+ throw new GuardrailError('ANTHROPIC_API_KEY not set', { code: 'auth', provider: 'claude' });
16
+ }
17
+ const client = new Anthropic({ apiKey });
18
+ let response: Anthropic.Message;
19
+ try {
20
+ response = await client.messages.create({
21
+ model,
22
+ max_tokens: MAX_OUTPUT_TOKENS,
23
+ system: SYSTEM_PROMPT,
24
+ messages: [{ role: 'user', content: `## Context\n\n${context}\n\n## Question\n\n${prompt}` }],
25
+ });
26
+ } catch (err) {
27
+ const message = err instanceof Error ? err.message : String(err);
28
+ const code = classifyError(message);
29
+ throw new GuardrailError(`Claude council call failed: ${message}`, {
30
+ code,
31
+ provider: 'claude',
32
+ retryable: code === 'rate_limit',
33
+ });
34
+ }
35
+ return response.content
36
+ .filter(b => b.type === 'text')
37
+ .map(b => (b as Anthropic.TextBlock).text)
38
+ .join('');
39
+ },
40
+ };
41
+ }
@@ -0,0 +1,40 @@
1
+ import OpenAI from 'openai';
2
+ import { GuardrailError } from '../../core/errors.ts';
3
+ import { classifyError } from '../review-engine/prompt-builder.ts';
4
+ import type { CouncilAdapter } from './types.ts';
5
+
6
+ const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
7
+ const MAX_OUTPUT_TOKENS = 2048;
8
+
9
+ export function makeOpenAICouncilAdapter(model: string, label: string): CouncilAdapter {
10
+ return {
11
+ label,
12
+ async consult(prompt: string, context: string): Promise<string> {
13
+ const apiKey = process.env.OPENAI_API_KEY;
14
+ if (!apiKey) {
15
+ throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
16
+ }
17
+ const client = new OpenAI({ apiKey });
18
+ let response: OpenAI.ChatCompletion;
19
+ try {
20
+ response = await client.chat.completions.create({
21
+ model,
22
+ max_tokens: MAX_OUTPUT_TOKENS,
23
+ messages: [
24
+ { role: 'system', content: SYSTEM_PROMPT },
25
+ { role: 'user', content: `## Context\n\n${context}\n\n## Question\n\n${prompt}` },
26
+ ],
27
+ });
28
+ } catch (err) {
29
+ const message = err instanceof Error ? err.message : String(err);
30
+ const code = classifyError(message);
31
+ throw new GuardrailError(`OpenAI council call failed: ${message}`, {
32
+ code,
33
+ provider: 'openai',
34
+ retryable: code === 'rate_limit',
35
+ });
36
+ }
37
+ return response.choices[0]?.message?.content ?? '';
38
+ },
39
+ };
40
+ }
@@ -0,0 +1,7 @@
1
+ // Council adapters are factory-created (not loaded via src/adapters/loader.ts),
2
+ // so they don't implement AdapterBase. `label` is a display name for output
3
+ // grouping, distinct from the machine-identifier `name` on AdapterBase.
4
+ export interface CouncilAdapter {
5
+ readonly label: string;
6
+ consult(prompt: string, context: string): Promise<string>;
7
+ }
@@ -1,5 +1,5 @@
1
1
  import * as path from 'node:path';
2
- import { AutopilotError } from '../core/errors.ts';
2
+ import { GuardrailError } from '../core/errors.ts';
3
3
  import { checkApiVersionCompatibility, type AdapterBase } from './base.ts';
4
4
 
5
5
  export type IntegrationPoint = 'review-engine' | 'vcs-host' | 'migration-runner' | 'review-bot-parser';
@@ -42,7 +42,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
42
42
 
43
43
  if (isPathRef(ref)) {
44
44
  if (!options.unsafeAllowLocalAdapters) {
45
- throw new AutopilotError(
45
+ throw new GuardrailError(
46
46
  `Path-based adapter refs require unsafeAllowLocalAdapters:true — set this only for trusted local adapters`,
47
47
  { code: 'invalid_config', details: { point, ref } }
48
48
  );
@@ -51,7 +51,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
51
51
  } else {
52
52
  const builtin = BUILTIN_PATHS[point]?.[ref];
53
53
  if (!builtin) {
54
- throw new AutopilotError(`Unknown built-in ${point} adapter: "${ref}"`, {
54
+ throw new GuardrailError(`Unknown built-in ${point} adapter: "${ref}"`, {
55
55
  code: 'invalid_config',
56
56
  details: { point, ref, available: Object.keys(BUILTIN_PATHS[point] ?? {}) },
57
57
  });
@@ -63,7 +63,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
63
63
  try {
64
64
  mod = (await import(modulePath)) as { default?: T } | T;
65
65
  } catch (err) {
66
- throw new AutopilotError(`Failed to import adapter from ${modulePath}`, {
66
+ throw new GuardrailError(`Failed to import adapter from ${modulePath}`, {
67
67
  code: 'invalid_config',
68
68
  details: { point, ref, modulePath, cause: err instanceof Error ? err.message : String(err) },
69
69
  });
@@ -71,7 +71,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
71
71
 
72
72
  const adapter = ('default' in mod ? mod.default : mod) as T;
73
73
  if (!adapter || typeof adapter !== 'object') {
74
- throw new AutopilotError(`Adapter module did not export a valid adapter object`, {
74
+ throw new GuardrailError(`Adapter module did not export a valid adapter object`, {
75
75
  code: 'invalid_config',
76
76
  details: { point, ref, modulePath },
77
77
  });
@@ -80,7 +80,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
80
80
  validateShape(adapter, point, modulePath);
81
81
 
82
82
  if (!checkApiVersionCompatibility(adapter.apiVersion)) {
83
- throw new AutopilotError(`Adapter apiVersion ${adapter.apiVersion} incompatible with core`, {
83
+ throw new GuardrailError(`Adapter apiVersion ${adapter.apiVersion} incompatible with core`, {
84
84
  code: 'invalid_config',
85
85
  details: { point, ref, adapterApiVersion: adapter.apiVersion },
86
86
  });
@@ -99,7 +99,7 @@ function validateShape(adapter: AdapterBase, point: IntegrationPoint, modulePath
99
99
  missing.push('name/apiVersion');
100
100
  }
101
101
  if (missing.length > 0) {
102
- throw new AutopilotError(
102
+ throw new GuardrailError(
103
103
  `Adapter at ${modulePath} missing required methods: ${missing.join(', ')}`,
104
104
  { code: 'invalid_config', details: { point, modulePath, missing } }
105
105
  );
@@ -1,6 +1,6 @@
1
1
  import type { Capabilities } from '../base.ts';
2
2
  import type { ReviewEngine, ReviewInput, ReviewOutput } from './types.ts';
3
- import { AutopilotError } from '../../core/errors.ts';
3
+ import { GuardrailError } from '../../core/errors.ts';
4
4
  import { detectProviderUsage, dominantProvider, type Provider } from '../../core/detect/provider-usage.ts';
5
5
 
6
6
  interface AvailableProvider {
@@ -50,7 +50,7 @@ async function resolveAdapter(cwd: string): Promise<ReviewEngine> {
50
50
  const available = getAvailableProviders();
51
51
 
52
52
  if (available.length === 0) {
53
- throw new AutopilotError(
53
+ throw new GuardrailError(
54
54
  'No LLM API key found. Set one of: ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, GROQ_API_KEY',
55
55
  { code: 'auth', provider: 'auto' },
56
56
  );