@delegance/claude-autopilot 5.0.4 → 5.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,50 @@
1
- // Matches "path/to/file.ts:42", "`path/to/file.ts`", or bare filenames with common extensions
2
- const FILE_REF = /(?:`([^`]+\.[a-z]{1,6})`|(\b[\w./\-]+\.[a-z]{1,6})(?::(\d+))?)/;
1
+ // Allowlist of code-file extensions we'll accept as a file reference. Without
2
+ // this constraint the prior regex `\.[a-z]{1,6}` matched prose abbreviations
3
+ // like "e.g" and "i.e", which is how the `fix` command broke for users — every
4
+ // finding got `file: "e.g"` and the auto-fixer matched nothing.
5
+ //
6
+ // JS regex alternation is leftmost-first, so longer alternatives MUST come
7
+ // before shorter ones — otherwise `file.cpp:42` matches `file.c` and the line
8
+ // number `:42` is silently dropped (across cpp/hpp/mdx/jsonc/dart/mm/mk/css/
9
+ // hs/cmake/coffee and more). Sorted strictly by length DESC; ties within a
10
+ // length bucket are alphabetical. Tests in claude-adapter.test.ts pin this.
11
+ const CODE_EXT = String.raw `(?:` +
12
+ // 10
13
+ String.raw `dockerfile|` +
14
+ // 7
15
+ String.raw `graphql|` +
16
+ // 6
17
+ String.raw `coffee|gradle|svelte|` +
18
+ // 5
19
+ String.raw `astro|cmake|jsonc|proto|scala|swift|` +
20
+ // 4
21
+ String.raw `bash|cljs|dart|fish|html|java|json|less|sass|scss|toml|yaml|` +
22
+ // 3
23
+ String.raw `asm|cjs|clj|cpp|css|edn|elm|env|erl|exs|fsi|fsx|gql|hcl|hpp|htm|ini|jsx|lua|mdx|mjs|mli|nim|php|sol|sql|tsx|vue|xml|yml|zig|zsh|` +
24
+ // 2
25
+ String.raw `cc|cs|ex|fs|go|hs|jl|js|kt|md|mk|ml|mm|pl|pm|py|rb|rs|sc|sh|tf|ts|` +
26
+ // 1
27
+ String.raw `c|d|h|m|r|s` +
28
+ String.raw `)`;
29
+ // Matches "path/to/file.ts:42" (bare with known ext), "`path/to/file.ts`" (any
30
+ // ext when explicitly backtick-wrapped). Backtick-wrapped accepts any extension
31
+ // because the LLM signaled intent; bare paths must be a recognized code file.
32
+ const FILE_REF = new RegExp(String.raw `(?:` +
33
+ String.raw `\x60([^\x60]+\.[a-z]{1,6})\x60` +
34
+ String.raw `|(\b[\w./\-]+\.` + CODE_EXT + String.raw `)(?::(\d+))?` +
35
+ String.raw `)`, 'i');
3
36
  function extractFileRef(text) {
4
37
  const m = text.match(FILE_REF);
5
38
  if (!m)
6
39
  return { file: '<unspecified>' };
7
40
  const raw = (m[1] ?? m[2]);
8
- // Skip version strings (v1.2.3) and bare dotfile extensions with no path separator
9
- if (/^v?\d/.test(raw) || (!raw.includes('/') && raw.startsWith('.') && raw.split('.').length === 2)) {
41
+ // Skip version strings (v1.2.3), bare dotfile extensions with no path
42
+ // separator, and known prose abbreviations that slipped through the regex
43
+ // (only applicable when backtick-wrapped, since the bare branch already
44
+ // requires a known code extension).
45
+ if (/^v?\d/.test(raw) ||
46
+ (!raw.includes('/') && raw.startsWith('.') && raw.split('.').length === 2) ||
47
+ /^(?:e\.g|i\.e|etc|vs|cf|al|U\.S|U\.K)$/i.test(raw)) {
10
48
  return { file: '<unspecified>' };
11
49
  }
12
50
  const line = m[3] ? parseInt(m[3], 10) : undefined;
@@ -54,10 +54,23 @@ export function detectProject(cwd) {
54
54
  }
55
55
  const reqTxt = path.join(cwd, 'requirements.txt');
56
56
  const pyproject = path.join(cwd, 'pyproject.toml');
57
- if ((fs.existsSync(reqTxt) && fileContains(reqTxt, 'fastapi')) ||
58
- (fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'))) {
57
+ const hasFastapi = (fs.existsSync(reqTxt) && fileContains(reqTxt, 'fastapi')) ||
58
+ (fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'));
59
+ if (hasFastapi) {
59
60
  return { preset: 'python-fastapi', testCommand: 'pytest', confidence: 'high', evidence: 'found fastapi in requirements' };
60
61
  }
62
+ // Generic Python — covers any pyproject.toml or requirements.txt project that
63
+ // isn't FastAPI. Previously fell through to the JS/Generic preset, which
64
+ // writes `npm test` and npm-only static rules into a Python repo — the most
65
+ // visible "is this thing built?" papercut for non-JS users.
66
+ if (fs.existsSync(pyproject) || fs.existsSync(reqTxt)) {
67
+ return {
68
+ preset: 'python',
69
+ testCommand: 'pytest',
70
+ confidence: 'high',
71
+ evidence: fs.existsSync(pyproject) ? 'found pyproject.toml' : 'found requirements.txt',
72
+ };
73
+ }
61
74
  const pkgPath = path.join(cwd, 'package.json');
62
75
  if (fs.existsSync(pkgPath)) {
63
76
  const pkg = readJson(pkgPath);
@@ -23,7 +23,10 @@ export interface PrDescResult {
23
23
  }
24
24
  export declare function truncateDiff(diff: string, charLimit?: number): string;
25
25
  export declare function summarizeFindings(findings: Finding[], max?: number): string;
26
- export declare function parseDescription(raw: string): {
26
+ export declare function parseDescription(raw: string, fallback?: {
27
+ branchName?: string;
28
+ firstSummaryLine?: string;
29
+ }): {
27
30
  title: string;
28
31
  body: string;
29
32
  };
@@ -16,13 +16,67 @@ export function summarizeFindings(findings, max = 10) {
16
16
  .map(f => `- [${f.severity.toUpperCase()}] ${f.file}:${f.line ?? '?'} — ${f.message}`)
17
17
  .join('\n');
18
18
  }
19
- export function parseDescription(raw) {
19
+ export function parseDescription(raw, fallback) {
20
20
  const titleMatch = raw.match(/^Title:\s*(.+)$/m);
21
- const title = titleMatch ? titleMatch[1].trim() : 'chore: update';
21
+ let title = titleMatch ? titleMatch[1].trim() : '';
22
+ // Fallback chain: branch-based conventional-commit title (`fix/foo-bar` →
23
+ // `fix: foo bar`), then first summary bullet, then literal "chore: update".
24
+ // The "chore: update" literal was the prior unconditional default — every
25
+ // PR generated by this tool got the same title regardless of diff.
26
+ if (!title) {
27
+ title = deriveTitleFromBranch(fallback?.branchName) ??
28
+ deriveTitleFromSummary(fallback?.firstSummaryLine) ??
29
+ 'chore: update';
30
+ }
22
31
  const sepIdx = raw.indexOf('\n\n---\n');
23
32
  const body = sepIdx !== -1 ? raw.slice(sepIdx + 5).trim() : raw.replace(/^Title:.*\n?/m, '').trim();
24
33
  return { title, body };
25
34
  }
35
+ const CONVENTIONAL_TYPES = new Set(['feat', 'fix', 'chore', 'docs', 'refactor', 'test', 'perf', 'style', 'ci', 'build']);
36
+ function deriveTitleFromBranch(branch) {
37
+ if (!branch || branch === 'unknown' || branch === 'HEAD' || /^(main|master|develop)$/i.test(branch))
38
+ return null;
39
+ // `fix/cost-tracker` → `fix: cost tracker`. `feature/foo` → `feat: foo`.
40
+ const slashIdx = branch.indexOf('/');
41
+ if (slashIdx > 0) {
42
+ const rawPrefix = branch.slice(0, slashIdx).toLowerCase();
43
+ const rest = branch.slice(slashIdx + 1);
44
+ const prefix = rawPrefix === 'feature' || rawPrefix === 'features' ? 'feat'
45
+ : rawPrefix === 'bugfix' || rawPrefix === 'hotfix' ? 'fix'
46
+ : CONVENTIONAL_TYPES.has(rawPrefix) ? rawPrefix
47
+ : null;
48
+ if (prefix) {
49
+ // Include `/` in the character class — multi-segment branches like
50
+ // `fix/auth/session-leak` should normalize to `fix: auth session leak`,
51
+ // not `fix: auth/session-leak`.
52
+ const cleaned = rest.replace(/[-_/]+/g, ' ').trim();
53
+ return cleaned ? `${prefix}: ${cleaned}` : null;
54
+ }
55
+ // Unknown prefix that contains a slash — treat the segment after the
56
+ // first slash as the descriptive part and default the conventional
57
+ // type to `chore:`. Example: `autopilot-test/validate-weights` →
58
+ // `chore: validate weights` rather than the prefix-less
59
+ // `autopilot test validate weights` (which fails commitlint and looks
60
+ // half-finished in PR titles).
61
+ const cleanedRest = rest.replace(/[-_/]+/g, ' ').trim();
62
+ if (cleanedRest)
63
+ return `chore: ${cleanedRest}`;
64
+ }
65
+ // No slash — return cleaned branch name, or null when it normalizes empty
66
+ // (e.g. `_`, `---`). The caller chains via `??`, which only short-circuits
67
+ // on null/undefined; an empty string would skip the rest of the fallback.
68
+ const cleaned = branch.replace(/[-_/]+/g, ' ').trim();
69
+ return cleaned || null;
70
+ }
71
+ function deriveTitleFromSummary(summaryLine) {
72
+ if (!summaryLine)
73
+ return null;
74
+ // Strip leading bullet/dash and clip to ~60 chars.
75
+ const cleaned = summaryLine.replace(/^[\s\-*•]+/, '').trim();
76
+ if (!cleaned)
77
+ return null;
78
+ return cleaned.length > 70 ? `${cleaned.slice(0, 67)}...` : cleaned;
79
+ }
26
80
  export async function runPrDesc(options) {
27
81
  const branchName = options._branchName ?? getBranchName();
28
82
  const diff = options._gitDiff ?? getGitDiff(options.base);
@@ -34,7 +88,12 @@ export async function runPrDesc(options) {
34
88
  const prompt = buildPrompt(branchName, truncateDiff(diff), summarizeFindings(findings));
35
89
  const engine = options._reviewEngine ?? await resolveEngine();
36
90
  const { rawOutput } = await engine.review({ content: prompt, kind: 'pr-diff' });
37
- const { title, body } = parseDescription(rawOutput);
91
+ // Extract first non-empty bullet from the model's Summary section as a
92
+ // last-resort title fallback when the model didn't emit `Title: ...`.
93
+ const firstSummaryLine = rawOutput.split('\n')
94
+ .map(l => l.trim())
95
+ .find(l => /^[-*•]\s/.test(l));
96
+ const { title, body } = parseDescription(rawOutput, { branchName, firstSummaryLine });
38
97
  const formatted = `Title: ${title}\n\n---\n${body}`;
39
98
  if (options.output) {
40
99
  fs.writeFileSync(options.output, formatted, 'utf8');
@@ -81,7 +140,13 @@ function loadCachedFindings() {
81
140
  }
82
141
  }
83
142
  function buildPrompt(branch, diff, findingsSummary) {
84
- return `Generate a pull request description with three sections:
143
+ return `Generate a pull request title and description.
144
+
145
+ Output format (must follow exactly):
146
+
147
+ Title: <one-line conventional-commit title — e.g. "fix(auth): close session leak on logout" or "feat: add SSO for enterprise tenants". Do NOT use a generic placeholder like "chore: update". Derive from the actual diff.>
148
+
149
+ ---
85
150
 
86
151
  ## Summary
87
152
  <3-5 bullet points describing what changed and why>
@@ -133,6 +133,19 @@ export async function runScan(options = {}) {
133
133
  cwd,
134
134
  gitSummary: focusHint,
135
135
  });
136
+ // Single-file scan fallback — when only one file was scanned, the LLM
137
+ // doesn't always repeat the file path in its findings (it knows the
138
+ // context). Backfill `<unspecified>` with the actual scan target so the
139
+ // `fix` command can match findings to real paths. Without this, a
140
+ // `claude-autopilot scan src/foo.ts` produces findings with file
141
+ // `<unspecified>` and `fix --severity all` reports "no fixable findings".
142
+ if (relFiles.length === 1) {
143
+ const onlyFile = relFiles[0];
144
+ for (const f of result.findings) {
145
+ if (!f.file || f.file === '<unspecified>')
146
+ f.file = onlyFile;
147
+ }
148
+ }
136
149
  // Apply ignore rules
137
150
  const ignoreRules = [...loadIgnoreRules(cwd), ...parseConfigIgnore(config.ignore)];
138
151
  const findings = applyIgnoreRules(result.findings, ignoreRules);
@@ -16,6 +16,7 @@ const PRESET_LABELS = {
16
16
  't3': 'T3 Stack (Next.js + tRPC + Prisma)',
17
17
  'rails-postgres': 'Ruby on Rails + PostgreSQL',
18
18
  'python-fastapi': 'Python FastAPI',
19
+ 'python': 'Python',
19
20
  'go': 'Go + PostgreSQL',
20
21
  'generic': 'Generic (no stack-specific assumptions)',
21
22
  };
@@ -111,7 +112,15 @@ export async function runSetup(options = {}) {
111
112
  throw new Error(`Preset config not found for: ${detection.preset}. Looked in:\n ${presetSearchPaths(detection.preset, cwd).join('\n ')}`);
112
113
  }
113
114
  let presetContent = await fsAsync.readFile(presetConfigPath, 'utf8');
114
- presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
115
+ // Only append testCommand if the preset doesn't already declare one — several
116
+ // presets (go, python, python-fastapi, rails-postgres) ship with their own
117
+ // testCommand line. Unconditionally appending produced duplicate YAML keys
118
+ // ("testCommand" twice in the same map), which yaml parsers reject. After
119
+ // 5.0.5 that broke `setup` on Python repos: every command after setup
120
+ // hard-failed until the user manually edited the file.
121
+ if (!/^testCommand\s*:/m.test(presetContent)) {
122
+ presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
123
+ }
115
124
  // Apply profile overlay if specified
116
125
  if (options.profile) {
117
126
  const profile = PROFILES[options.profile];
@@ -11,7 +11,12 @@ export async function runGuardrail(input) {
11
11
  // reviewer flagged — the bugs the LLM is best at often ride alongside one a static
12
12
  // rule already caught.
13
13
  const runReviewOnStaticFail = pipelineCfg.runReviewOnStaticFail !== false;
14
- const runReviewOnTestFail = pipelineCfg.runReviewOnTestFail === true;
14
+ // Default true (5.0.5+): when an auto-detected test command fails or is missing
15
+ // (e.g. `npm test` on a Python repo with no test script), skipping review made
16
+ // first-runs return zero useful output. Review on failing code is exactly when
17
+ // it's most useful. Users who explicitly set `runReviewOnTestFail: false` keep
18
+ // the strict behavior.
19
+ const runReviewOnTestFail = pipelineCfg.runReviewOnTestFail !== false;
15
20
  // Static-rules phase — tests always run afterward, regardless of status. The
16
21
  // runReviewOnStaticFail flag only gates the LLM review phase (matching its name);
17
22
  // skipping tests on a static-fail would be surprising and asymmetric with
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@delegance/claude-autopilot",
3
- "version": "5.0.4",
3
+ "version": "5.0.6",
4
4
  "type": "module",
5
5
  "description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
6
6
  "keywords": [
@@ -32,7 +32,12 @@ chunking:
32
32
  perFileMaxTokens: 32000
33
33
  pipeline:
34
34
  runReviewOnStaticFail: true
35
- runReviewOnTestFail: false
35
+ # Default true so that failed/missing test commands don't silently kill the
36
+ # LLM review phase. Most common case: user runs `setup` on a fresh repo,
37
+ # auto-detected test command is `npm test` but no test script exists →
38
+ # exit 1 → review skipped → user sees nothing useful. Review on failing
39
+ # code is the path where review is *most* valuable.
40
+ runReviewOnTestFail: true
36
41
  # Optional: multi-model council. Uncomment + set ANTHROPIC_API_KEY and/or
37
42
  # OPENAI_API_KEY. Models are dispatched in parallel; the synthesizer reads
38
43
  # their responses and writes the consensus. Both APIs supported (chat-completions
@@ -0,0 +1,24 @@
1
+ configVersion: 1
2
+ reviewEngine: { adapter: auto }
3
+ vcsHost: { adapter: github }
4
+ reviewBot: { adapter: cursor }
5
+ protectedPaths:
6
+ - "**/auth/**"
7
+ - "**/security.py"
8
+ - "**/config.py"
9
+ - "**/settings.py"
10
+ - "**/secrets/**"
11
+ staticRules:
12
+ - hardcoded-secrets
13
+ testCommand: pytest -q
14
+ thresholds:
15
+ bugbotAutoFix: 85
16
+ bugbotProposePatch: 60
17
+ maxValidateRetries: 3
18
+ reviewStrategy: auto
19
+ chunking:
20
+ smallTierMaxTokens: 8000
21
+ perFileMaxTokens: 32000
22
+ pipeline:
23
+ runReviewOnStaticFail: true
24
+ runReviewOnTestFail: true
@@ -0,0 +1,30 @@
1
+ A Python application (general — not framework-specific). Common patterns:
2
+ - Python 3.10+, virtualenv or uv/poetry for deps
3
+ - pytest for tests, ruff or flake8 for lint, mypy for types
4
+ - pyproject.toml for project config (PEP 621) or requirements.txt
5
+ - asyncio + aiohttp / httpx for async I/O
6
+ - Pydantic v2 or dataclasses for data models
7
+ - python-dotenv or os.environ for config (no Pydantic Settings assumed)
8
+
9
+ Conventions to encourage:
10
+ - Type hints on public functions
11
+ - f-strings over .format() / %
12
+ - pathlib over os.path
13
+ - contextmanagers for resources
14
+ - explicit exception types, no bare `except:`
15
+
16
+ Things that should flag CRITICAL:
17
+ - f-string SQL: f"SELECT * FROM users WHERE id = {user_id}"
18
+ - Bare `except:` or `except Exception:` swallowing errors
19
+ - Hardcoded secrets / API keys in source files
20
+ - subprocess.run with shell=True on user-controlled input
21
+ - pickle.load on untrusted data
22
+ - eval / exec on user input
23
+ - Synchronous blocking calls inside async def (requests, time.sleep, open)
24
+
25
+ Things that should flag WARNING:
26
+ - Mutable default arguments (def f(x=[]) → bug)
27
+ - Missing type hints on public functions
28
+ - Broad exception catches that hide root causes
29
+ - print() statements left in non-CLI code (use logging)
30
+ - TODO / FIXME comments without owners or context