@delegance/claude-autopilot 5.0.4 → 5.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/adapters/review-engine/parse-output.js +42 -4
- package/dist/src/cli/detector.js +15 -2
- package/dist/src/cli/pr-desc.d.ts +4 -1
- package/dist/src/cli/pr-desc.js +61 -4
- package/dist/src/cli/setup.js +1 -0
- package/dist/src/core/pipeline/run.js +6 -1
- package/package.json +1 -1
- package/presets/generic/guardrail.config.yaml +6 -1
- package/presets/python/guardrail.config.yaml +24 -0
- package/presets/python/stack.md +30 -0
|
@@ -1,12 +1,50 @@
|
|
|
1
|
-
//
|
|
2
|
-
|
|
1
|
+
// Allowlist of code-file extensions we'll accept as a file reference. Without
|
|
2
|
+
// this constraint the prior regex `\.[a-z]{1,6}` matched prose abbreviations
|
|
3
|
+
// like "e.g" and "i.e", which is how the `fix` command broke for users — every
|
|
4
|
+
// finding got `file: "e.g"` and the auto-fixer matched nothing.
|
|
5
|
+
//
|
|
6
|
+
// JS regex alternation is leftmost-first, so longer alternatives MUST come
|
|
7
|
+
// before shorter ones — otherwise `file.cpp:42` matches `file.c` and the line
|
|
8
|
+
// number `:42` is silently dropped (across cpp/hpp/mdx/jsonc/dart/mm/mk/css/
|
|
9
|
+
// hs/cmake/coffee and more). Sorted strictly by length DESC; ties within a
|
|
10
|
+
// length bucket are alphabetical. Tests in claude-adapter.test.ts pin this.
|
|
11
|
+
const CODE_EXT = String.raw `(?:` +
|
|
12
|
+
// 10
|
|
13
|
+
String.raw `dockerfile|` +
|
|
14
|
+
// 7
|
|
15
|
+
String.raw `graphql|` +
|
|
16
|
+
// 6
|
|
17
|
+
String.raw `coffee|gradle|svelte|` +
|
|
18
|
+
// 5
|
|
19
|
+
String.raw `astro|cmake|jsonc|proto|scala|swift|` +
|
|
20
|
+
// 4
|
|
21
|
+
String.raw `bash|cljs|dart|fish|html|java|json|less|sass|scss|toml|yaml|` +
|
|
22
|
+
// 3
|
|
23
|
+
String.raw `asm|cjs|clj|cpp|css|edn|elm|env|erl|exs|fsi|fsx|gql|hcl|hpp|htm|ini|jsx|lua|mdx|mjs|mli|nim|php|sol|sql|tsx|vue|xml|yml|zig|zsh|` +
|
|
24
|
+
// 2
|
|
25
|
+
String.raw `cc|cs|ex|fs|go|hs|jl|js|kt|md|mk|ml|mm|pl|pm|py|rb|rs|sc|sh|tf|ts|` +
|
|
26
|
+
// 1
|
|
27
|
+
String.raw `c|d|h|m|r|s` +
|
|
28
|
+
String.raw `)`;
|
|
29
|
+
// Matches "path/to/file.ts:42" (bare with known ext), "`path/to/file.ts`" (any
|
|
30
|
+
// ext when explicitly backtick-wrapped). Backtick-wrapped accepts any extension
|
|
31
|
+
// because the LLM signaled intent; bare paths must be a recognized code file.
|
|
32
|
+
const FILE_REF = new RegExp(String.raw `(?:` +
|
|
33
|
+
String.raw `\x60([^\x60]+\.[a-z]{1,6})\x60` +
|
|
34
|
+
String.raw `|(\b[\w./\-]+\.` + CODE_EXT + String.raw `)(?::(\d+))?` +
|
|
35
|
+
String.raw `)`, 'i');
|
|
3
36
|
function extractFileRef(text) {
|
|
4
37
|
const m = text.match(FILE_REF);
|
|
5
38
|
if (!m)
|
|
6
39
|
return { file: '<unspecified>' };
|
|
7
40
|
const raw = (m[1] ?? m[2]);
|
|
8
|
-
// Skip version strings (v1.2.3)
|
|
9
|
-
|
|
41
|
+
// Skip version strings (v1.2.3), bare dotfile extensions with no path
|
|
42
|
+
// separator, and known prose abbreviations that slipped through the regex
|
|
43
|
+
// (only applicable when backtick-wrapped, since the bare branch already
|
|
44
|
+
// requires a known code extension).
|
|
45
|
+
if (/^v?\d/.test(raw) ||
|
|
46
|
+
(!raw.includes('/') && raw.startsWith('.') && raw.split('.').length === 2) ||
|
|
47
|
+
/^(?:e\.g|i\.e|etc|vs|cf|al|U\.S|U\.K)$/i.test(raw)) {
|
|
10
48
|
return { file: '<unspecified>' };
|
|
11
49
|
}
|
|
12
50
|
const line = m[3] ? parseInt(m[3], 10) : undefined;
|
package/dist/src/cli/detector.js
CHANGED
|
@@ -54,10 +54,23 @@ export function detectProject(cwd) {
|
|
|
54
54
|
}
|
|
55
55
|
const reqTxt = path.join(cwd, 'requirements.txt');
|
|
56
56
|
const pyproject = path.join(cwd, 'pyproject.toml');
|
|
57
|
-
|
|
58
|
-
(fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'))
|
|
57
|
+
const hasFastapi = (fs.existsSync(reqTxt) && fileContains(reqTxt, 'fastapi')) ||
|
|
58
|
+
(fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'));
|
|
59
|
+
if (hasFastapi) {
|
|
59
60
|
return { preset: 'python-fastapi', testCommand: 'pytest', confidence: 'high', evidence: 'found fastapi in requirements' };
|
|
60
61
|
}
|
|
62
|
+
// Generic Python — covers any pyproject.toml or requirements.txt project that
|
|
63
|
+
// isn't FastAPI. Previously fell through to the JS/Generic preset, which
|
|
64
|
+
// writes `npm test` and npm-only static rules into a Python repo — the most
|
|
65
|
+
// visible "is this thing built?" papercut for non-JS users.
|
|
66
|
+
if (fs.existsSync(pyproject) || fs.existsSync(reqTxt)) {
|
|
67
|
+
return {
|
|
68
|
+
preset: 'python',
|
|
69
|
+
testCommand: 'pytest',
|
|
70
|
+
confidence: 'high',
|
|
71
|
+
evidence: fs.existsSync(pyproject) ? 'found pyproject.toml' : 'found requirements.txt',
|
|
72
|
+
};
|
|
73
|
+
}
|
|
61
74
|
const pkgPath = path.join(cwd, 'package.json');
|
|
62
75
|
if (fs.existsSync(pkgPath)) {
|
|
63
76
|
const pkg = readJson(pkgPath);
|
|
@@ -23,7 +23,10 @@ export interface PrDescResult {
|
|
|
23
23
|
}
|
|
24
24
|
export declare function truncateDiff(diff: string, charLimit?: number): string;
|
|
25
25
|
export declare function summarizeFindings(findings: Finding[], max?: number): string;
|
|
26
|
-
export declare function parseDescription(raw: string
|
|
26
|
+
export declare function parseDescription(raw: string, fallback?: {
|
|
27
|
+
branchName?: string;
|
|
28
|
+
firstSummaryLine?: string;
|
|
29
|
+
}): {
|
|
27
30
|
title: string;
|
|
28
31
|
body: string;
|
|
29
32
|
};
|
package/dist/src/cli/pr-desc.js
CHANGED
|
@@ -16,13 +16,59 @@ export function summarizeFindings(findings, max = 10) {
|
|
|
16
16
|
.map(f => `- [${f.severity.toUpperCase()}] ${f.file}:${f.line ?? '?'} — ${f.message}`)
|
|
17
17
|
.join('\n');
|
|
18
18
|
}
|
|
19
|
-
export function parseDescription(raw) {
|
|
19
|
+
export function parseDescription(raw, fallback) {
|
|
20
20
|
const titleMatch = raw.match(/^Title:\s*(.+)$/m);
|
|
21
|
-
|
|
21
|
+
let title = titleMatch ? titleMatch[1].trim() : '';
|
|
22
|
+
// Fallback chain: branch-based conventional-commit title (`fix/foo-bar` →
|
|
23
|
+
// `fix: foo bar`), then first summary bullet, then literal "chore: update".
|
|
24
|
+
// The "chore: update" literal was the prior unconditional default — every
|
|
25
|
+
// PR generated by this tool got the same title regardless of diff.
|
|
26
|
+
if (!title) {
|
|
27
|
+
title = deriveTitleFromBranch(fallback?.branchName) ??
|
|
28
|
+
deriveTitleFromSummary(fallback?.firstSummaryLine) ??
|
|
29
|
+
'chore: update';
|
|
30
|
+
}
|
|
22
31
|
const sepIdx = raw.indexOf('\n\n---\n');
|
|
23
32
|
const body = sepIdx !== -1 ? raw.slice(sepIdx + 5).trim() : raw.replace(/^Title:.*\n?/m, '').trim();
|
|
24
33
|
return { title, body };
|
|
25
34
|
}
|
|
35
|
+
const CONVENTIONAL_TYPES = new Set(['feat', 'fix', 'chore', 'docs', 'refactor', 'test', 'perf', 'style', 'ci', 'build']);
|
|
36
|
+
function deriveTitleFromBranch(branch) {
|
|
37
|
+
if (!branch || branch === 'unknown' || branch === 'HEAD' || /^(main|master|develop)$/i.test(branch))
|
|
38
|
+
return null;
|
|
39
|
+
// `fix/cost-tracker` → `fix: cost tracker`. `feature/foo` → `feat: foo`.
|
|
40
|
+
const slashIdx = branch.indexOf('/');
|
|
41
|
+
if (slashIdx > 0) {
|
|
42
|
+
const rawPrefix = branch.slice(0, slashIdx).toLowerCase();
|
|
43
|
+
const rest = branch.slice(slashIdx + 1);
|
|
44
|
+
const prefix = rawPrefix === 'feature' || rawPrefix === 'features' ? 'feat'
|
|
45
|
+
: rawPrefix === 'bugfix' || rawPrefix === 'hotfix' ? 'fix'
|
|
46
|
+
: CONVENTIONAL_TYPES.has(rawPrefix) ? rawPrefix
|
|
47
|
+
: null;
|
|
48
|
+
if (prefix) {
|
|
49
|
+
// Include `/` in the character class — multi-segment branches like
|
|
50
|
+
// `fix/auth/session-leak` should normalize to `fix: auth session leak`,
|
|
51
|
+
// not `fix: auth/session-leak`.
|
|
52
|
+
const cleaned = rest.replace(/[-_/]+/g, ' ').trim();
|
|
53
|
+
return cleaned ? `${prefix}: ${cleaned}` : null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Final fallback — return null (not '') when the branch normalizes to an
|
|
57
|
+
// empty string (e.g. `_`, `---`). The caller chains via `??`, which only
|
|
58
|
+
// short-circuits on null/undefined; an empty string would skip the rest
|
|
59
|
+
// of the fallback chain and produce an empty PR title.
|
|
60
|
+
const cleaned = branch.replace(/[-_/]+/g, ' ').trim();
|
|
61
|
+
return cleaned || null;
|
|
62
|
+
}
|
|
63
|
+
function deriveTitleFromSummary(summaryLine) {
|
|
64
|
+
if (!summaryLine)
|
|
65
|
+
return null;
|
|
66
|
+
// Strip leading bullet/dash and clip to ~60 chars.
|
|
67
|
+
const cleaned = summaryLine.replace(/^[\s\-*•]+/, '').trim();
|
|
68
|
+
if (!cleaned)
|
|
69
|
+
return null;
|
|
70
|
+
return cleaned.length > 70 ? `${cleaned.slice(0, 67)}...` : cleaned;
|
|
71
|
+
}
|
|
26
72
|
export async function runPrDesc(options) {
|
|
27
73
|
const branchName = options._branchName ?? getBranchName();
|
|
28
74
|
const diff = options._gitDiff ?? getGitDiff(options.base);
|
|
@@ -34,7 +80,12 @@ export async function runPrDesc(options) {
|
|
|
34
80
|
const prompt = buildPrompt(branchName, truncateDiff(diff), summarizeFindings(findings));
|
|
35
81
|
const engine = options._reviewEngine ?? await resolveEngine();
|
|
36
82
|
const { rawOutput } = await engine.review({ content: prompt, kind: 'pr-diff' });
|
|
37
|
-
|
|
83
|
+
// Extract first non-empty bullet from the model's Summary section as a
|
|
84
|
+
// last-resort title fallback when the model didn't emit `Title: ...`.
|
|
85
|
+
const firstSummaryLine = rawOutput.split('\n')
|
|
86
|
+
.map(l => l.trim())
|
|
87
|
+
.find(l => /^[-*•]\s/.test(l));
|
|
88
|
+
const { title, body } = parseDescription(rawOutput, { branchName, firstSummaryLine });
|
|
38
89
|
const formatted = `Title: ${title}\n\n---\n${body}`;
|
|
39
90
|
if (options.output) {
|
|
40
91
|
fs.writeFileSync(options.output, formatted, 'utf8');
|
|
@@ -81,7 +132,13 @@ function loadCachedFindings() {
|
|
|
81
132
|
}
|
|
82
133
|
}
|
|
83
134
|
function buildPrompt(branch, diff, findingsSummary) {
|
|
84
|
-
return `Generate a pull request
|
|
135
|
+
return `Generate a pull request title and description.
|
|
136
|
+
|
|
137
|
+
Output format (must follow exactly):
|
|
138
|
+
|
|
139
|
+
Title: <one-line conventional-commit title — e.g. "fix(auth): close session leak on logout" or "feat: add SSO for enterprise tenants". Do NOT use a generic placeholder like "chore: update". Derive from the actual diff.>
|
|
140
|
+
|
|
141
|
+
---
|
|
85
142
|
|
|
86
143
|
## Summary
|
|
87
144
|
<3-5 bullet points describing what changed and why>
|
package/dist/src/cli/setup.js
CHANGED
|
@@ -16,6 +16,7 @@ const PRESET_LABELS = {
|
|
|
16
16
|
't3': 'T3 Stack (Next.js + tRPC + Prisma)',
|
|
17
17
|
'rails-postgres': 'Ruby on Rails + PostgreSQL',
|
|
18
18
|
'python-fastapi': 'Python FastAPI',
|
|
19
|
+
'python': 'Python',
|
|
19
20
|
'go': 'Go + PostgreSQL',
|
|
20
21
|
'generic': 'Generic (no stack-specific assumptions)',
|
|
21
22
|
};
|
|
@@ -11,7 +11,12 @@ export async function runGuardrail(input) {
|
|
|
11
11
|
// reviewer flagged — the bugs the LLM is best at often ride alongside one a static
|
|
12
12
|
// rule already caught.
|
|
13
13
|
const runReviewOnStaticFail = pipelineCfg.runReviewOnStaticFail !== false;
|
|
14
|
-
|
|
14
|
+
// Default true (5.0.5+): when an auto-detected test command fails or is missing
|
|
15
|
+
// (e.g. `npm test` on a Python repo with no test script), skipping review made
|
|
16
|
+
// first-runs return zero useful output. Review on failing code is exactly when
|
|
17
|
+
// it's most useful. Users who explicitly set `runReviewOnTestFail: false` keep
|
|
18
|
+
// the strict behavior.
|
|
19
|
+
const runReviewOnTestFail = pipelineCfg.runReviewOnTestFail !== false;
|
|
15
20
|
// Static-rules phase — tests always run afterward, regardless of status. The
|
|
16
21
|
// runReviewOnStaticFail flag only gates the LLM review phase (matching its name);
|
|
17
22
|
// skipping tests on a static-fail would be surprising and asymmetric with
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@delegance/claude-autopilot",
|
|
3
|
-
"version": "5.0.
|
|
3
|
+
"version": "5.0.5",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
|
|
6
6
|
"keywords": [
|
|
@@ -32,7 +32,12 @@ chunking:
|
|
|
32
32
|
perFileMaxTokens: 32000
|
|
33
33
|
pipeline:
|
|
34
34
|
runReviewOnStaticFail: true
|
|
35
|
-
|
|
35
|
+
# Default true so that failed/missing test commands don't silently kill the
|
|
36
|
+
# LLM review phase. Most common case: user runs `setup` on a fresh repo,
|
|
37
|
+
# auto-detected test command is `npm test` but no test script exists →
|
|
38
|
+
# exit 1 → review skipped → user sees nothing useful. Review on failing
|
|
39
|
+
# code is the path where review is *most* valuable.
|
|
40
|
+
runReviewOnTestFail: true
|
|
36
41
|
# Optional: multi-model council. Uncomment + set ANTHROPIC_API_KEY and/or
|
|
37
42
|
# OPENAI_API_KEY. Models are dispatched in parallel; the synthesizer reads
|
|
38
43
|
# their responses and writes the consensus. Both APIs supported (chat-completions
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
configVersion: 1
|
|
2
|
+
reviewEngine: { adapter: auto }
|
|
3
|
+
vcsHost: { adapter: github }
|
|
4
|
+
reviewBot: { adapter: cursor }
|
|
5
|
+
protectedPaths:
|
|
6
|
+
- "**/auth/**"
|
|
7
|
+
- "**/security.py"
|
|
8
|
+
- "**/config.py"
|
|
9
|
+
- "**/settings.py"
|
|
10
|
+
- "**/secrets/**"
|
|
11
|
+
staticRules:
|
|
12
|
+
- hardcoded-secrets
|
|
13
|
+
testCommand: pytest -q
|
|
14
|
+
thresholds:
|
|
15
|
+
bugbotAutoFix: 85
|
|
16
|
+
bugbotProposePatch: 60
|
|
17
|
+
maxValidateRetries: 3
|
|
18
|
+
reviewStrategy: auto
|
|
19
|
+
chunking:
|
|
20
|
+
smallTierMaxTokens: 8000
|
|
21
|
+
perFileMaxTokens: 32000
|
|
22
|
+
pipeline:
|
|
23
|
+
runReviewOnStaticFail: true
|
|
24
|
+
runReviewOnTestFail: true
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
A Python application (general — not framework-specific). Common patterns:
|
|
2
|
+
- Python 3.10+, virtualenv or uv/poetry for deps
|
|
3
|
+
- pytest for tests, ruff or flake8 for lint, mypy for types
|
|
4
|
+
- pyproject.toml for project config (PEP 621) or requirements.txt
|
|
5
|
+
- asyncio + aiohttp / httpx for async I/O
|
|
6
|
+
- Pydantic v2 or dataclasses for data models
|
|
7
|
+
- python-dotenv or os.environ for config (no Pydantic Settings assumed)
|
|
8
|
+
|
|
9
|
+
Conventions to encourage:
|
|
10
|
+
- Type hints on public functions
|
|
11
|
+
- f-strings over .format() / %
|
|
12
|
+
- pathlib over os.path
|
|
13
|
+
- contextmanagers for resources
|
|
14
|
+
- explicit exception types, no bare `except:`
|
|
15
|
+
|
|
16
|
+
Things that should flag CRITICAL:
|
|
17
|
+
- f-string SQL: f"SELECT * FROM users WHERE id = {user_id}"
|
|
18
|
+
- Bare `except:` or `except Exception:` swallowing errors
|
|
19
|
+
- Hardcoded secrets / API keys in source files
|
|
20
|
+
- subprocess.run with shell=True on user-controlled input
|
|
21
|
+
- pickle.load on untrusted data
|
|
22
|
+
- eval / exec on user input
|
|
23
|
+
- Synchronous blocking calls inside async def (requests, time.sleep, open)
|
|
24
|
+
|
|
25
|
+
Things that should flag WARNING:
|
|
26
|
+
- Mutable default arguments (def f(x=[]) → bug)
|
|
27
|
+
- Missing type hints on public functions
|
|
28
|
+
- Broad exception catches that hide root causes
|
|
29
|
+
- print() statements left in non-CLI code (use logging)
|
|
30
|
+
- TODO / FIXME comments without owners or context
|