@delegance/claude-autopilot 2.5.0 → 5.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +164 -106
- package/bin/_launcher.js +77 -0
- package/bin/claude-autopilot.js +3 -0
- package/bin/guardrail.js +3 -0
- package/package.json +15 -9
- package/presets/generic/guardrail.config.yaml +35 -0
- package/presets/generic/stack.md +40 -0
- package/presets/nextjs-supabase/{autopilot.config.yaml → guardrail.config.yaml} +7 -0
- package/scripts/autoregress.ts +27 -11
- package/skills/autopilot/SKILL.md +170 -0
- package/skills/claude-autopilot.md +80 -0
- package/skills/guardrail.md +39 -0
- package/skills/migrate/SKILL.md +83 -0
- package/src/adapters/council/claude.ts +41 -0
- package/src/adapters/council/openai.ts +40 -0
- package/src/adapters/council/types.ts +7 -0
- package/src/adapters/loader.ts +7 -7
- package/src/adapters/review-engine/auto.ts +2 -2
- package/src/adapters/review-engine/claude.ts +9 -11
- package/src/adapters/review-engine/codex.ts +9 -11
- package/src/adapters/review-engine/gemini.ts +9 -11
- package/src/adapters/review-engine/openai-compatible.ts +10 -12
- package/src/adapters/review-engine/parse-output.ts +32 -6
- package/src/adapters/review-engine/prompt-builder.ts +19 -0
- package/src/adapters/review-engine/types.ts +1 -1
- package/src/adapters/vcs-host/commit-status.ts +39 -0
- package/src/adapters/vcs-host/github.ts +2 -2
- package/src/cli/baseline.ts +125 -0
- package/src/cli/ci.ts +11 -8
- package/src/cli/costs.ts +2 -2
- package/src/cli/council.ts +96 -0
- package/src/cli/detector.ts +21 -5
- package/src/cli/explain.ts +197 -0
- package/src/cli/fix.ts +173 -111
- package/src/cli/hook.ts +72 -27
- package/src/cli/ignore-helper.ts +116 -0
- package/src/cli/index.ts +272 -31
- package/src/cli/init.ts +12 -12
- package/src/cli/lsp.ts +200 -0
- package/src/cli/mcp.ts +206 -0
- package/src/cli/pr-comment.ts +5 -5
- package/src/cli/pr-desc.ts +168 -0
- package/src/cli/pr-review-comments.ts +3 -3
- package/src/cli/pr.ts +76 -0
- package/src/cli/preflight.ts +15 -32
- package/src/cli/report.ts +186 -0
- package/src/cli/run.ts +140 -36
- package/src/cli/scan.ts +233 -0
- package/src/cli/setup.ts +121 -15
- package/src/cli/test-gen.ts +125 -0
- package/src/cli/triage.ts +137 -0
- package/src/cli/watch.ts +52 -31
- package/src/cli/worker.ts +109 -0
- package/src/core/cache/review-cache.ts +2 -2
- package/src/core/chunking/index.ts +2 -2
- package/src/core/config/loader.ts +10 -10
- package/src/core/config/preset-resolver.ts +6 -6
- package/src/core/config/schema.ts +103 -2
- package/src/core/config/types.ts +57 -2
- package/src/core/council/config.ts +71 -0
- package/src/core/council/context.ts +17 -0
- package/src/core/council/runner.ts +83 -0
- package/src/core/council/types.ts +45 -0
- package/src/core/detect/llm-key.ts +89 -0
- package/src/core/detect/workspaces.ts +103 -0
- package/src/core/errors.ts +4 -4
- package/src/core/fix/generator.ts +149 -0
- package/src/core/ignore/index.ts +4 -4
- package/src/core/mcp/concurrency.ts +16 -0
- package/src/core/mcp/handlers/fix-finding.ts +126 -0
- package/src/core/mcp/handlers/get-capabilities.ts +62 -0
- package/src/core/mcp/handlers/get-findings.ts +36 -0
- package/src/core/mcp/handlers/review-diff.ts +65 -0
- package/src/core/mcp/handlers/scan-files.ts +65 -0
- package/src/core/mcp/handlers/validate-fix.ts +41 -0
- package/src/core/mcp/run-store.ts +85 -0
- package/src/core/mcp/workspace.ts +35 -0
- package/src/core/persist/baseline.ts +112 -0
- package/src/core/persist/cost-log.ts +1 -1
- package/src/core/persist/findings-cache.ts +1 -1
- package/src/core/persist/triage.ts +112 -0
- package/src/core/phases/static-rules.ts +18 -5
- package/src/core/pipeline/review-phase.ts +65 -26
- package/src/core/pipeline/run.ts +42 -10
- package/src/core/runtime/lock.ts +2 -2
- package/src/core/runtime/state.ts +2 -2
- package/src/core/schema-alignment/detector.ts +59 -0
- package/src/core/schema-alignment/extractor/index.ts +24 -0
- package/src/core/schema-alignment/extractor/prisma.ts +21 -0
- package/src/core/schema-alignment/extractor/sql.ts +99 -0
- package/src/core/schema-alignment/llm-check.ts +91 -0
- package/src/core/schema-alignment/scanner.ts +107 -0
- package/src/core/schema-alignment/types.ts +43 -0
- package/src/core/shell.ts +3 -3
- package/src/core/static-rules/registry.ts +17 -8
- package/src/core/static-rules/rules/brand-tokens.ts +145 -0
- package/src/core/static-rules/rules/hardcoded-secrets.ts +27 -1
- package/src/core/static-rules/rules/insecure-redirect.ts +67 -0
- package/src/core/static-rules/rules/missing-auth.ts +70 -0
- package/src/core/static-rules/rules/schema-alignment.ts +132 -0
- package/src/core/static-rules/rules/sql-injection.ts +71 -0
- package/src/core/static-rules/rules/ssrf.ts +63 -0
- package/src/core/static-rules/tailwind-extractor.ts +38 -0
- package/src/core/test-gen/coverage-analyzer.ts +93 -0
- package/src/core/test-gen/framework-detector.ts +21 -0
- package/src/core/test-gen/test-writer.ts +33 -0
- package/src/core/ui/design-context-loader.ts +87 -0
- package/src/core/worker/client.ts +46 -0
- package/src/core/worker/lockfile.ts +38 -0
- package/src/core/worker/server.ts +81 -0
- package/src/formatters/junit.ts +52 -0
- package/src/formatters/sarif.ts +2 -2
- package/src/index.ts +1 -2
- package/tests/snapshots/baselines/src-formatters-sarif.json +4 -4
- package/tests/snapshots/index.json +3 -3
- package/tests/snapshots/src-formatters-sarif.snap.ts +1 -1
- package/tests/snapshots/src-snapshots-impact-selector.snap.ts +3 -3
- package/tests/snapshots/src-snapshots-import-scanner.snap.ts +3 -3
- package/tests/snapshots/src-snapshots-serializer.snap.ts +2 -2
- package/bin/autopilot.js +0 -20
- package/skills/autopilot.md +0 -157
- /package/presets/go/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/python-fastapi/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/rails-postgres/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/presets/t3/{autopilot.config.yaml → guardrail.config.yaml} +0 -0
- /package/{src → scripts}/snapshots/impact-selector.ts +0 -0
- /package/{src → scripts}/snapshots/import-scanner.ts +0 -0
- /package/{src → scripts}/snapshots/serializer.ts +0 -0
|
@@ -18,6 +18,13 @@ staticRules:
|
|
|
18
18
|
- npm-audit
|
|
19
19
|
- package-lock-sync
|
|
20
20
|
- supabase-rls-bypass
|
|
21
|
+
- sql-injection
|
|
22
|
+
- missing-auth
|
|
23
|
+
- ssrf
|
|
24
|
+
- insecure-redirect
|
|
25
|
+
policy:
|
|
26
|
+
failOn: critical
|
|
27
|
+
newOnly: false
|
|
21
28
|
thresholds:
|
|
22
29
|
bugbotAutoFix: 85
|
|
23
30
|
bugbotProposePatch: 60
|
package/scripts/autoregress.ts
CHANGED
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
import * as fs from 'node:fs';
|
|
4
4
|
import * as path from 'node:path';
|
|
5
5
|
import * as os from 'node:os';
|
|
6
|
-
import {
|
|
6
|
+
import { spawnSync } from 'node:child_process';
|
|
7
7
|
import { fileURLToPath } from 'node:url';
|
|
8
|
-
import { selectSnapshots } from '
|
|
8
|
+
import { selectSnapshots } from './snapshots/impact-selector.ts';
|
|
9
9
|
import OpenAI from 'openai';
|
|
10
|
-
import { buildImportMap } from '
|
|
10
|
+
import { buildImportMap } from './snapshots/import-scanner.ts';
|
|
11
11
|
|
|
12
12
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
const ROOT = path.resolve(__dirname, '..');
|
|
@@ -38,11 +38,15 @@ export function diffBaselines(baselineJson: string, currentJson: string): string
|
|
|
38
38
|
|
|
39
39
|
function getChangedFiles(since?: string): string[] | null {
|
|
40
40
|
try {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
let base = since;
|
|
42
|
+
if (!base) {
|
|
43
|
+
const r = spawnSync('git', ['merge-base', 'origin/main', 'HEAD'], { cwd: ROOT, encoding: 'utf8' });
|
|
44
|
+
if (r.status !== 0) return null;
|
|
45
|
+
base = r.stdout.trim();
|
|
46
|
+
}
|
|
47
|
+
const r = spawnSync('git', ['diff', base, 'HEAD', '--name-only'], { cwd: ROOT, encoding: 'utf8' });
|
|
48
|
+
if (r.status !== 0) return null;
|
|
49
|
+
return r.stdout.trim().split('\n').filter(Boolean);
|
|
46
50
|
} catch { return null; }
|
|
47
51
|
}
|
|
48
52
|
|
|
@@ -146,8 +150,17 @@ function cmdUpdate(args: string[]): number {
|
|
|
146
150
|
continue;
|
|
147
151
|
}
|
|
148
152
|
process.stdout.write(` ${snap} ... `);
|
|
153
|
+
const slug2 = path.basename(snap, '.snap.ts');
|
|
154
|
+
const baselinePath = path.join(BASELINES_DIR, `${slug2}.json`);
|
|
155
|
+
const beforeMtime = fs.existsSync(baselinePath) ? fs.statSync(baselinePath).mtimeMs : 0;
|
|
149
156
|
runSnapshot(snap, true);
|
|
150
|
-
|
|
157
|
+
const captured = fs.existsSync(baselinePath) && fs.statSync(baselinePath).mtimeMs > beforeMtime;
|
|
158
|
+
if (captured) {
|
|
159
|
+
console.log('updated');
|
|
160
|
+
} else {
|
|
161
|
+
console.error('CAPTURE FAILED (baseline not written)');
|
|
162
|
+
failed++;
|
|
163
|
+
}
|
|
151
164
|
}
|
|
152
165
|
return failed > 0 ? 1 : 0;
|
|
153
166
|
}
|
|
@@ -167,7 +180,7 @@ Write a snapshot test file. Requirements:
|
|
|
167
180
|
// @source-commit: {sourceCommit}
|
|
168
181
|
// @generator-version: {version}
|
|
169
182
|
2. Import the module's exported functions under test
|
|
170
|
-
3. Import { normalizeSnapshot } from '../../
|
|
183
|
+
3. Import { normalizeSnapshot } from '../../scripts/snapshots/serializer.ts'
|
|
171
184
|
4. Import fs from 'node:fs', describe/it from 'node:test', assert from 'node:assert/strict'
|
|
172
185
|
5. Baseline loading pattern (use slug {slug}):
|
|
173
186
|
const SLUG = '{slug}';
|
|
@@ -219,7 +232,10 @@ async function cmdGenerate(args: string[]): Promise<number> {
|
|
|
219
232
|
|
|
220
233
|
const client = new OpenAI({ apiKey });
|
|
221
234
|
let sourceCommit = 'unknown';
|
|
222
|
-
try {
|
|
235
|
+
try {
|
|
236
|
+
const r = spawnSync('git', ['rev-parse', '--short', 'HEAD'], { cwd: ROOT, encoding: 'utf8' });
|
|
237
|
+
if (r.status === 0) sourceCommit = r.stdout.trim();
|
|
238
|
+
} catch {}
|
|
223
239
|
const generatedAt = new Date().toISOString();
|
|
224
240
|
|
|
225
241
|
for (const srcFile of srcFiles) {
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: autopilot
|
|
3
|
+
description: After spec approval, automatically execute the full pipeline — plan → implement → migrate → validate → PR → Codex review. No manual intervention required.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Autopilot — Spec to PR Pipeline
|
|
7
|
+
|
|
8
|
+
After the user approves a spec during brainstorming, this skill runs the full pipeline automatically.
|
|
9
|
+
|
|
10
|
+
## Prerequisites
|
|
11
|
+
|
|
12
|
+
- Approved spec file at `docs/superpowers/specs/YYYY-MM-DD-<topic>-design.md`
|
|
13
|
+
- Superpowers plugin installed (`writing-plans`, `using-git-worktrees`, `subagent-driven-development`)
|
|
14
|
+
- Scripts installed and dependencies present (run step 0 preflight to verify)
|
|
15
|
+
|
|
16
|
+
## CRITICAL: Do Not Pause
|
|
17
|
+
|
|
18
|
+
**Run the entire pipeline without stopping.** Do NOT:
|
|
19
|
+
- Ask "want me to continue?" between steps
|
|
20
|
+
- Show intermediate results or ask for confirmation
|
|
21
|
+
- Pause to report progress mid-pipeline
|
|
22
|
+
- Wait for user input between any steps
|
|
23
|
+
|
|
24
|
+
The ONLY time you stop is if a step **fails and cannot be recovered**. Otherwise, execute all steps sequentially and report ONCE at the end (Step 9).
|
|
25
|
+
|
|
26
|
+
Brief status lines like `[autopilot] Step 3: Executing plan...` are fine. Full summaries, questions, or check-ins are not.
|
|
27
|
+
|
|
28
|
+
## Pipeline
|
|
29
|
+
|
|
30
|
+
Execute these steps in order. Do NOT pause between steps unless a step fails.
|
|
31
|
+
|
|
32
|
+
### Step 0: Preflight
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
npx tsx scripts/preflight.ts
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
If any check **fails** (red ✗): stop and tell the user what to fix before continuing.
|
|
39
|
+
If checks only **warn** (yellow !): proceed — degraded steps will be noted in the final report.
|
|
40
|
+
If all pass: continue immediately, no user interaction needed.
|
|
41
|
+
|
|
42
|
+
### Step 1: Write Implementation Plan
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Invoke: superpowers:writing-plans
|
|
46
|
+
Input: The approved spec file
|
|
47
|
+
Output: Plan at docs/superpowers/plans/YYYY-MM-DD-<topic>.md
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Commit the plan. Do NOT ask the user for execution choice — always use subagent-driven development.
|
|
51
|
+
|
|
52
|
+
### Step 2: Set Up Worktree
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Invoke: superpowers:using-git-worktrees
|
|
56
|
+
Branch: feature/<topic-slug>
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
After the worktree is created, symlink the local env file into it so scripts
|
|
60
|
+
(validate, Codex review, migrate) can read secrets:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Detect which env file the project uses
|
|
64
|
+
ENV_FILE=$(ls .env.local .env.dev .env.development .env 2>/dev/null | head -1)
|
|
65
|
+
if [ -n "$ENV_FILE" ]; then
|
|
66
|
+
ln -sf "$(pwd)/$ENV_FILE" ".claude/worktrees/<branch>/$ENV_FILE"
|
|
67
|
+
fi
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
If no env file is found, note it in the preflight output (step 0 will have caught this).
|
|
71
|
+
|
|
72
|
+
### Step 3: Execute Plan
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
Invoke: superpowers:subagent-driven-development
|
|
76
|
+
Input: The plan file
|
|
77
|
+
Mode: dispatch fresh subagent per task
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
For each task:
|
|
81
|
+
- Dispatch implementer subagent
|
|
82
|
+
- On completion: verify commit landed in worktree
|
|
83
|
+
- Skip formal spec/quality review to maintain speed (the validate step catches issues)
|
|
84
|
+
- If subagent fails to write to worktree: implement directly
|
|
85
|
+
|
|
86
|
+
### Step 4: Auto-Migrate
|
|
87
|
+
|
|
88
|
+
For any `.sql` files created in `data/deltas/` during implementation:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
/migrate
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Run against **dev only** by default. Stop after dev succeeds and continue the pipeline.
|
|
95
|
+
|
|
96
|
+
Only promote to QA → prod if the user has explicitly enabled it (e.g., `AUTOPILOT_ALLOW_PROD_MIGRATIONS=true` in their env) or asked for it directly. Production migrations are irreversible — never auto-promote without a clear signal.
|
|
97
|
+
|
|
98
|
+
If migration fails, fix the SQL and retry (max 2 retries). If it still fails, stop and report.
|
|
99
|
+
|
|
100
|
+
### Step 5: Validate
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
npx tsx scripts/validate.ts --commit-autofix --allow-dirty
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
If FAIL:
|
|
107
|
+
- Read the validation report at `.claude/validation-report.json`
|
|
108
|
+
- Fix the blocking issues
|
|
109
|
+
- Re-run validate
|
|
110
|
+
- Max 3 retry iterations
|
|
111
|
+
|
|
112
|
+
If PASS: proceed to PR.
|
|
113
|
+
|
|
114
|
+
### Step 6: Push + Create PR
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
git push -u origin <branch>
|
|
118
|
+
gh pr create --title "<concise title>" --body "<generated PR body with spec link, test plan>"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Step 7: Codex PR Review
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
npx tsx scripts/codex-pr-review.ts <pr-number>
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Posts Codex review as a GitHub PR comment. If critical findings:
|
|
128
|
+
- Fix them on the branch
|
|
129
|
+
- Push
|
|
130
|
+
- Re-run Codex review
|
|
131
|
+
- Max 2 iterations
|
|
132
|
+
|
|
133
|
+
### Step 8: Bugbot Triage + Fix
|
|
134
|
+
|
|
135
|
+
Wait 60 seconds for Cursor bugbot to post comments, then:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
npx tsx scripts/bugbot.ts --pr <pr-number>
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Triages each finding (real bug vs false positive), auto-fixes real bugs, dismisses false positives with GitHub replies. If fixes applied:
|
|
142
|
+
- Push
|
|
143
|
+
- Wait for new bugbot comments (30s)
|
|
144
|
+
- Re-run /bugbot
|
|
145
|
+
- Max 3 rounds
|
|
146
|
+
|
|
147
|
+
### Step 9: Report
|
|
148
|
+
|
|
149
|
+
Tell the user:
|
|
150
|
+
- PR URL
|
|
151
|
+
- Test count
|
|
152
|
+
- Validation verdict
|
|
153
|
+
- Codex review summary
|
|
154
|
+
- Bugbot triage summary (fixed / dismissed / needs-human)
|
|
155
|
+
- Any human-required items that couldn't be auto-fixed
|
|
156
|
+
|
|
157
|
+
## Error Recovery
|
|
158
|
+
|
|
159
|
+
- **Subagent failure:** Re-dispatch with more context or implement directly
|
|
160
|
+
- **Migration failure:** Fix SQL, re-run /migrate
|
|
161
|
+
- **Validate failure:** Fix issues, re-run (max 3 retries)
|
|
162
|
+
- **Codex critical findings:** Fix, push, re-review (max 2 retries)
|
|
163
|
+
- **Bugbot findings:** /bugbot handles triage + fix automatically (max 3 rounds)
|
|
164
|
+
- **Unrecoverable error:** Stop, report what was completed, show remaining work
|
|
165
|
+
|
|
166
|
+
## When NOT to Use
|
|
167
|
+
|
|
168
|
+
- During brainstorming (this runs AFTER spec approval)
|
|
169
|
+
- For hotfixes (too heavy — just commit and push)
|
|
170
|
+
- When the user wants manual control over each step
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: claude-autopilot
|
|
3
|
+
description: Autonomous development pipeline — brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Use when the user asks to "ship", "implement", "build", or "autopilot" a feature that's past the idea stage. Runs end-to-end without pausing for check-ins.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# claude-autopilot — Agent Loop
|
|
7
|
+
|
|
8
|
+
This skill drives the full claude-autopilot pipeline when a user asks Claude to ship a feature. It is an *agent loop*, not a CLI reference — the commands it invokes are an implementation detail. The skill's job is to decide which phase applies, when to pause for user approval, and when to recover from a failed phase.
|
|
9
|
+
|
|
10
|
+
## When to invoke
|
|
11
|
+
|
|
12
|
+
- User says "ship X", "implement X", "build X", "autopilot X", or hands Claude a spec and says "go"
|
|
13
|
+
- User approved a spec during `/brainstorm` and the next step is implementation
|
|
14
|
+
- User is resuming a paused pipeline after fixing a failed phase by hand
|
|
15
|
+
|
|
16
|
+
## When NOT to invoke
|
|
17
|
+
|
|
18
|
+
- User is still in discovery ("help me think through X") — invoke `brainstorming` first
|
|
19
|
+
- User wants Claude to run one specific phase only (they'll invoke that skill directly — `migrate`, `review`, `triage`, etc.)
|
|
20
|
+
- User is hot-fixing a bug — too heavy, just edit and push
|
|
21
|
+
|
|
22
|
+
## The pipeline
|
|
23
|
+
|
|
24
|
+
Each phase writes its output to disk. Claude can stop, the user can edit the artifact, and Claude can resume from that phase without re-running earlier ones.
|
|
25
|
+
|
|
26
|
+
| Phase | Artifact | What Claude does | When it stops |
|
|
27
|
+
|---|---|---|---|
|
|
28
|
+
| **Brainstorm** | `docs/specs/YYYY-MM-DD-<topic>-design.md` | Invokes `brainstorming` skill to turn idea into reviewed spec | When spec is committed + user approves |
|
|
29
|
+
| **Spec review** | PR comment or inline notes | Invokes `codex-review` skill against the spec file | After one round unless criticals found |
|
|
30
|
+
| **Plan** | `docs/plans/YYYY-MM-DD-<topic>.md` | Invokes `writing-plans` to break spec into phases | When plan is committed |
|
|
31
|
+
| **Plan review** | Inline notes | Invokes `codex-review` skill against the plan | After one round unless criticals found |
|
|
32
|
+
| **Branch** | git worktree at `.claude/worktrees/<slug>` or branch on HEAD | Invokes `using-git-worktrees` or cuts branch directly | When branch exists |
|
|
33
|
+
| **Implement** | Git commits on the branch | Invokes `subagent-driven-development`, one subagent per plan phase | When all plan phases have landing commits |
|
|
34
|
+
| **Migrate** | SQL deltas applied | Invokes `migrate` skill if DB migrations exist in the branch; skips otherwise | When all environments (dev → QA → prod) are in sync |
|
|
35
|
+
| **Validate** | `.claude/validation-report.json` | Runs static rules + tests + typecheck + LLM review via `claude-autopilot run` | When validation passes or after 3 failed retries |
|
|
36
|
+
| **PR** | GitHub PR number | Invokes `commit-push-pr` or runs `gh pr create` directly | When PR is open |
|
|
37
|
+
| **PR review** | PR comment | Invokes `review-2pass` or `codex-pr-review` against the PR | After one round unless criticals found |
|
|
38
|
+
| **Triage** | Bugbot thread replies + follow-up commits | Invokes `bugbot` skill to triage reviewer findings | When all HIGH severity items are resolved or human-dismissed |
|
|
39
|
+
|
|
40
|
+
## Core rules
|
|
41
|
+
|
|
42
|
+
1. **Do not pause mid-pipeline.** Once past the Brainstorm gate (which is inherently interactive), execute phases end-to-end. Do not ask "want me to continue?" between phases. Do not show intermediate reports. The user gets one report at the end.
|
|
43
|
+
2. **Each phase's artifact is the source of truth for the next.** If the plan file changes between phases, the implementation uses the new plan. Claude does not keep phase outputs in memory — re-read from disk.
|
|
44
|
+
3. **Failure in a phase triggers recovery, not pause.**
|
|
45
|
+
- Migration fails → fix the SQL, re-run.
|
|
46
|
+
- Validation fails → read the report, fix the blockers, re-run (max 3 attempts).
|
|
47
|
+
- PR review finds criticals → fix on branch, push, re-review (max 2 rounds).
|
|
48
|
+
- Bugbot finds real bugs → fix, push, re-triage (max 3 rounds).
|
|
49
|
+
- Unrecoverable failure → stop, report what completed, show what remains.
|
|
50
|
+
4. **Codex review is part of the loop, not optional.** The pipeline explicitly dispatches to `gpt-5.3-codex` for spec review, plan review, and PR review. This is the multi-model moat — don't skip it.
|
|
51
|
+
5. **Skills are swappable.** `review-2pass` and `council` are alternative review phases — a user can configure which runs. The pipeline doesn't hardcode Claude or Codex.
|
|
52
|
+
|
|
53
|
+
## Phase outputs
|
|
54
|
+
|
|
55
|
+
Every phase writes to a predictable path. If Claude crashes or the user stops the pipeline, the resume point is "whatever's the newest unfinished artifact."
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
docs/
|
|
59
|
+
├── specs/YYYY-MM-DD-<topic>-design.md # from Brainstorm
|
|
60
|
+
├── plans/YYYY-MM-DD-<topic>.md # from Plan
|
|
61
|
+
└── reviews/<PR>-codex.md # from PR review (optional)
|
|
62
|
+
.claude/
|
|
63
|
+
├── validation-report.json # from Validate
|
|
64
|
+
└── bugbot-state.json # from Triage
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Recovery
|
|
68
|
+
|
|
69
|
+
- **Resume mid-pipeline.** User runs `/autopilot` after fixing a failed phase. Claude reads the newest artifacts, skips completed phases, starts from the first incomplete one.
|
|
70
|
+
- **Skip a phase.** `/autopilot --skip migrate` — useful when the pipeline auto-detection is wrong (no migrations exist but the skill wants to run).
|
|
71
|
+
- **Rewire a phase.** User edits `.claude/skills/autopilot/SKILL.md` to swap `review-2pass` for `council`. Claude picks up the change on next invocation — skill is the config.
|
|
72
|
+
|
|
73
|
+
## Why this skill exists separately from CLI subcommands
|
|
74
|
+
|
|
75
|
+
The CLI subcommands (`claude-autopilot run`, `claude-autopilot migrate`, etc.) are imperative — each does one thing. This skill is declarative — it describes the pipeline's *loop invariants* (phase order, artifact paths, recovery rules, when to pause). Claude reads this skill to decide *which* CLI subcommand to run *next*. Users who want to run one phase by hand use the CLI; users who want Claude to drive the whole pipeline invoke this skill.
|
|
76
|
+
|
|
77
|
+
See also:
|
|
78
|
+
- `skills/autopilot/SKILL.md` — detailed step-by-step runbook (deprecated alias for this file in v5; retained for back-compat)
|
|
79
|
+
- `skills/migrate/SKILL.md` — migrate phase runbook
|
|
80
|
+
- `skills/guardrail.md` — review phase alias (legacy; use `review` subcommand directly)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: guardrail
|
|
3
|
+
description: (Legacy alias) LLM-powered code review — runs static rules + LLM review over git-changed files. As of v5, this is the review *phase* of claude-autopilot. Invoke via `claude-autopilot run` or the full pipeline via `claude-autopilot` skill.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# guardrail — review phase (legacy alias)
|
|
7
|
+
|
|
8
|
+
As of `@delegance/claude-autopilot@5.0.0`, `guardrail` is the review phase of the full pipeline, not a standalone product. This skill is preserved as a back-compat alias for Claude Code agents that were configured against v4.x.
|
|
9
|
+
|
|
10
|
+
**For new configurations**, invoke `skills/claude-autopilot.md` to drive the full pipeline, or use the flat review subcommands (`run`, `scan`, `ci`, etc.) for just the review phase. Grouped syntax (`claude-autopilot review <verb>`) lands in alpha.2.
|
|
11
|
+
|
|
12
|
+
## What it does
|
|
13
|
+
|
|
14
|
+
Static rules (`hardcoded-secrets`, `sql-injection`, `missing-auth`, `ssrf`, `insecure-redirect`, `npm-audit`, `package-lock-sync`, `console-log`, `todo-fixme`, `large-file`, `missing-tests`, `brand-tokens`, `schema-alignment`) run first, then an LLM reviewer (`claude`, `codex`, `gemini`, or `openai-compatible`) gets the code with context. Output is SARIF / JUnit / inline PR comments.
|
|
15
|
+
|
|
16
|
+
## When to use
|
|
17
|
+
|
|
18
|
+
- Before creating a PR — `claude-autopilot run --base main`
|
|
19
|
+
- To audit a path without git changes — `claude-autopilot scan src/auth/`
|
|
20
|
+
- To ask a targeted question — `claude-autopilot scan --ask "is there an IDOR here?" src/api/`
|
|
21
|
+
- Inside CI — `claude-autopilot ci`
|
|
22
|
+
- Dev loop — `claude-autopilot watch`
|
|
23
|
+
|
|
24
|
+
## Legacy commands that still work
|
|
25
|
+
|
|
26
|
+
All v4 `guardrail <cmd>` invocations work unchanged through v5.x. A one-line deprecation notice prints on first invocation per terminal session. Migration guide: `docs/migration/v4-to-v5.md`.
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
guardrail run --base main # still works — equivalent to `claude-autopilot run --base main`
|
|
30
|
+
guardrail scan src/auth/ # still works
|
|
31
|
+
guardrail ci # still works
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## What changed in v5
|
|
35
|
+
|
|
36
|
+
- `guardrail` is now one phase of a pipeline, not a standalone product.
|
|
37
|
+
- The full pipeline runs via the `claude-autopilot` skill or `claude-autopilot` CLI.
|
|
38
|
+
- Review subcommands remain flat in alpha.1 (`run`, `scan`, `ci`, `explain`, `fix`, `baseline`). The grouped `claude-autopilot review <verb>` syntax arrives in alpha.2 as an alias — flat forms will continue to work.
|
|
39
|
+
- The package is `@delegance/claude-autopilot` — the old `@delegance/guardrail` will be a thin tombstone forwarding to the new package in v5.0.0 GA.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: migrate
|
|
3
|
+
description: Run database migrations against Supabase environments (dev → QA → prod). Validates SQL, executes with ledger tracking, and auto-generates types/supabase.ts.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Database Migration
|
|
7
|
+
|
|
8
|
+
Run a migration through the dev → QA → prod pipeline with validation at each step.
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
### 1. Identify the migration file
|
|
13
|
+
|
|
14
|
+
If given as argument, use that. Otherwise find the most recently modified `.sql` file in `data/deltas/`.
|
|
15
|
+
|
|
16
|
+
### 2. Validate (dry run on dev)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npx tsx scripts/supabase/migrate.ts <file> --env dev --dry-run
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Present validation results. If errors, help the user fix them before proceeding.
|
|
23
|
+
|
|
24
|
+
### 3. Run on dev
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npx tsx scripts/supabase/migrate.ts <file> --env dev
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### 4. Ask the user
|
|
31
|
+
|
|
32
|
+
> "Migration succeeded on dev. `types/supabase.ts` updated. Promote to QA?"
|
|
33
|
+
|
|
34
|
+
### 5. Run on QA
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
npx tsx scripts/supabase/migrate.ts --promote qa
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### 6. Ask the user
|
|
41
|
+
|
|
42
|
+
> "Migration succeeded on QA. Promote to prod?"
|
|
43
|
+
|
|
44
|
+
### 7. Run on prod
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
npx tsx scripts/supabase/migrate.ts --promote prod --confirm-prod
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 8. Commit
|
|
51
|
+
|
|
52
|
+
After all environments are done, commit the updated `types/supabase.ts` and the migration file:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
git add types/supabase.ts data/deltas/<migration-file>
|
|
56
|
+
git commit -m "feat: <description of schema change>"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Flags
|
|
60
|
+
|
|
61
|
+
| Flag | Purpose |
|
|
62
|
+
|------|---------|
|
|
63
|
+
| `--env dev\|qa\|prod` | Target environment |
|
|
64
|
+
| `--dry-run` | Validate only, don't execute |
|
|
65
|
+
| `--force` | Allow destructive operations (DROP, TRUNCATE) |
|
|
66
|
+
| `--confirm-prod` | Required for prod execution |
|
|
67
|
+
| `--promote qa\|prod` | Run missing migrations from source env |
|
|
68
|
+
|
|
69
|
+
## Validation Checks
|
|
70
|
+
|
|
71
|
+
The system validates before every execution:
|
|
72
|
+
- Duplicate table/column detection
|
|
73
|
+
- snake_case naming enforcement
|
|
74
|
+
- RLS + policy required for every new table
|
|
75
|
+
- Destructive operation blocking (unless --force)
|
|
76
|
+
- Cross-env prerequisite verification
|
|
77
|
+
- Checksum integrity (modified files are rejected)
|
|
78
|
+
- Promotion chain enforcement (prod requires QA first)
|
|
79
|
+
|
|
80
|
+
## Requirements
|
|
81
|
+
|
|
82
|
+
- `.claude/supabase-envs.json` with `dbUrl` for each env (gitignored)
|
|
83
|
+
- `postgres` npm package installed
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
+
import { GuardrailError } from '../../core/errors.ts';
|
|
3
|
+
import { classifyError } from '../review-engine/prompt-builder.ts';
|
|
4
|
+
import type { CouncilAdapter } from './types.ts';
|
|
5
|
+
|
|
6
|
+
const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
|
|
7
|
+
const MAX_OUTPUT_TOKENS = 2048;
|
|
8
|
+
|
|
9
|
+
export function makeClaudeCouncilAdapter(model: string, label: string): CouncilAdapter {
|
|
10
|
+
return {
|
|
11
|
+
label,
|
|
12
|
+
async consult(prompt: string, context: string): Promise<string> {
|
|
13
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
14
|
+
if (!apiKey) {
|
|
15
|
+
throw new GuardrailError('ANTHROPIC_API_KEY not set', { code: 'auth', provider: 'claude' });
|
|
16
|
+
}
|
|
17
|
+
const client = new Anthropic({ apiKey });
|
|
18
|
+
let response: Anthropic.Message;
|
|
19
|
+
try {
|
|
20
|
+
response = await client.messages.create({
|
|
21
|
+
model,
|
|
22
|
+
max_tokens: MAX_OUTPUT_TOKENS,
|
|
23
|
+
system: SYSTEM_PROMPT,
|
|
24
|
+
messages: [{ role: 'user', content: `## Context\n\n${context}\n\n## Question\n\n${prompt}` }],
|
|
25
|
+
});
|
|
26
|
+
} catch (err) {
|
|
27
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
28
|
+
const code = classifyError(message);
|
|
29
|
+
throw new GuardrailError(`Claude council call failed: ${message}`, {
|
|
30
|
+
code,
|
|
31
|
+
provider: 'claude',
|
|
32
|
+
retryable: code === 'rate_limit',
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
return response.content
|
|
36
|
+
.filter(b => b.type === 'text')
|
|
37
|
+
.map(b => (b as Anthropic.TextBlock).text)
|
|
38
|
+
.join('');
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import { GuardrailError } from '../../core/errors.ts';
|
|
3
|
+
import { classifyError } from '../review-engine/prompt-builder.ts';
|
|
4
|
+
import type { CouncilAdapter } from './types.ts';
|
|
5
|
+
|
|
6
|
+
const SYSTEM_PROMPT = `You are a technical advisor reviewing a software design decision. Evaluate the provided context and question critically. Be direct and specific. Surface tradeoffs, risks, and your recommendation.`;
|
|
7
|
+
const MAX_OUTPUT_TOKENS = 2048;
|
|
8
|
+
|
|
9
|
+
export function makeOpenAICouncilAdapter(model: string, label: string): CouncilAdapter {
|
|
10
|
+
return {
|
|
11
|
+
label,
|
|
12
|
+
async consult(prompt: string, context: string): Promise<string> {
|
|
13
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
14
|
+
if (!apiKey) {
|
|
15
|
+
throw new GuardrailError('OPENAI_API_KEY not set', { code: 'auth', provider: 'openai' });
|
|
16
|
+
}
|
|
17
|
+
const client = new OpenAI({ apiKey });
|
|
18
|
+
let response: OpenAI.ChatCompletion;
|
|
19
|
+
try {
|
|
20
|
+
response = await client.chat.completions.create({
|
|
21
|
+
model,
|
|
22
|
+
max_tokens: MAX_OUTPUT_TOKENS,
|
|
23
|
+
messages: [
|
|
24
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
25
|
+
{ role: 'user', content: `## Context\n\n${context}\n\n## Question\n\n${prompt}` },
|
|
26
|
+
],
|
|
27
|
+
});
|
|
28
|
+
} catch (err) {
|
|
29
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
30
|
+
const code = classifyError(message);
|
|
31
|
+
throw new GuardrailError(`OpenAI council call failed: ${message}`, {
|
|
32
|
+
code,
|
|
33
|
+
provider: 'openai',
|
|
34
|
+
retryable: code === 'rate_limit',
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
return response.choices[0]?.message?.content ?? '';
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
// Council adapters are factory-created (not loaded via src/adapters/loader.ts),
|
|
2
|
+
// so they don't implement AdapterBase. `label` is a display name for output
|
|
3
|
+
// grouping, distinct from the machine-identifier `name` on AdapterBase.
|
|
4
|
+
export interface CouncilAdapter {
|
|
5
|
+
readonly label: string;
|
|
6
|
+
consult(prompt: string, context: string): Promise<string>;
|
|
7
|
+
}
|
package/src/adapters/loader.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as path from 'node:path';
|
|
2
|
-
import {
|
|
2
|
+
import { GuardrailError } from '../core/errors.ts';
|
|
3
3
|
import { checkApiVersionCompatibility, type AdapterBase } from './base.ts';
|
|
4
4
|
|
|
5
5
|
export type IntegrationPoint = 'review-engine' | 'vcs-host' | 'migration-runner' | 'review-bot-parser';
|
|
@@ -42,7 +42,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
|
|
|
42
42
|
|
|
43
43
|
if (isPathRef(ref)) {
|
|
44
44
|
if (!options.unsafeAllowLocalAdapters) {
|
|
45
|
-
throw new
|
|
45
|
+
throw new GuardrailError(
|
|
46
46
|
`Path-based adapter refs require unsafeAllowLocalAdapters:true — set this only for trusted local adapters`,
|
|
47
47
|
{ code: 'invalid_config', details: { point, ref } }
|
|
48
48
|
);
|
|
@@ -51,7 +51,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
|
|
|
51
51
|
} else {
|
|
52
52
|
const builtin = BUILTIN_PATHS[point]?.[ref];
|
|
53
53
|
if (!builtin) {
|
|
54
|
-
throw new
|
|
54
|
+
throw new GuardrailError(`Unknown built-in ${point} adapter: "${ref}"`, {
|
|
55
55
|
code: 'invalid_config',
|
|
56
56
|
details: { point, ref, available: Object.keys(BUILTIN_PATHS[point] ?? {}) },
|
|
57
57
|
});
|
|
@@ -63,7 +63,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
|
|
|
63
63
|
try {
|
|
64
64
|
mod = (await import(modulePath)) as { default?: T } | T;
|
|
65
65
|
} catch (err) {
|
|
66
|
-
throw new
|
|
66
|
+
throw new GuardrailError(`Failed to import adapter from ${modulePath}`, {
|
|
67
67
|
code: 'invalid_config',
|
|
68
68
|
details: { point, ref, modulePath, cause: err instanceof Error ? err.message : String(err) },
|
|
69
69
|
});
|
|
@@ -71,7 +71,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
|
|
|
71
71
|
|
|
72
72
|
const adapter = ('default' in mod ? mod.default : mod) as T;
|
|
73
73
|
if (!adapter || typeof adapter !== 'object') {
|
|
74
|
-
throw new
|
|
74
|
+
throw new GuardrailError(`Adapter module did not export a valid adapter object`, {
|
|
75
75
|
code: 'invalid_config',
|
|
76
76
|
details: { point, ref, modulePath },
|
|
77
77
|
});
|
|
@@ -80,7 +80,7 @@ export async function loadAdapter<T extends AdapterBase>(options: LoadAdapterOpt
|
|
|
80
80
|
validateShape(adapter, point, modulePath);
|
|
81
81
|
|
|
82
82
|
if (!checkApiVersionCompatibility(adapter.apiVersion)) {
|
|
83
|
-
throw new
|
|
83
|
+
throw new GuardrailError(`Adapter apiVersion ${adapter.apiVersion} incompatible with core`, {
|
|
84
84
|
code: 'invalid_config',
|
|
85
85
|
details: { point, ref, adapterApiVersion: adapter.apiVersion },
|
|
86
86
|
});
|
|
@@ -99,7 +99,7 @@ function validateShape(adapter: AdapterBase, point: IntegrationPoint, modulePath
|
|
|
99
99
|
missing.push('name/apiVersion');
|
|
100
100
|
}
|
|
101
101
|
if (missing.length > 0) {
|
|
102
|
-
throw new
|
|
102
|
+
throw new GuardrailError(
|
|
103
103
|
`Adapter at ${modulePath} missing required methods: ${missing.join(', ')}`,
|
|
104
104
|
{ code: 'invalid_config', details: { point, modulePath, missing } }
|
|
105
105
|
);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Capabilities } from '../base.ts';
|
|
2
2
|
import type { ReviewEngine, ReviewInput, ReviewOutput } from './types.ts';
|
|
3
|
-
import {
|
|
3
|
+
import { GuardrailError } from '../../core/errors.ts';
|
|
4
4
|
import { detectProviderUsage, dominantProvider, type Provider } from '../../core/detect/provider-usage.ts';
|
|
5
5
|
|
|
6
6
|
interface AvailableProvider {
|
|
@@ -50,7 +50,7 @@ async function resolveAdapter(cwd: string): Promise<ReviewEngine> {
|
|
|
50
50
|
const available = getAvailableProviders();
|
|
51
51
|
|
|
52
52
|
if (available.length === 0) {
|
|
53
|
-
throw new
|
|
53
|
+
throw new GuardrailError(
|
|
54
54
|
'No LLM API key found. Set one of: ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, GROQ_API_KEY',
|
|
55
55
|
{ code: 'auth', provider: 'auto' },
|
|
56
56
|
);
|