npm - @delegance/claude-autopilot - Versions diffs - 5.0.4 → 5.0.6 - Mend

@delegance/claude-autopilot 5.0.4 → 5.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/src/adapters/review-engine/parse-output.js +42 -4
package/dist/src/cli/detector.js +15 -2
package/dist/src/cli/pr-desc.d.ts +4 -1
package/dist/src/cli/pr-desc.js +69 -4
package/dist/src/cli/scan.js +13 -0
package/dist/src/cli/setup.js +10 -1
package/dist/src/core/pipeline/run.js +6 -1
package/package.json +1 -1
package/presets/generic/guardrail.config.yaml +6 -1
package/presets/python/guardrail.config.yaml +24 -0
package/presets/python/stack.md +30 -0

package/dist/src/adapters/review-engine/parse-output.js CHANGED Viewed

@@ -1,12 +1,50 @@
-// Matches "path/to/file.ts:42", "`path/to/file.ts`", or bare filenames with common extensions
-const FILE_REF = /(?:`([^`]+\.[a-z]{1,6})`|(\b[\w./\-]+\.[a-z]{1,6})(?::(\d+))?)/;
+// Allowlist of code-file extensions we'll accept as a file reference. Without
+// this constraint the prior regex `\.[a-z]{1,6}` matched prose abbreviations
+// like "e.g" and "i.e", which is how the `fix` command broke for users — every
+// finding got `file: "e.g"` and the auto-fixer matched nothing.
+//
+// JS regex alternation is leftmost-first, so longer alternatives MUST come
+// before shorter ones — otherwise `file.cpp:42` matches `file.c` and the line
+// number `:42` is silently dropped (across cpp/hpp/mdx/jsonc/dart/mm/mk/css/
+// hs/cmake/coffee and more). Sorted strictly by length DESC; ties within a
+// length bucket are alphabetical. Tests in claude-adapter.test.ts pin this.
+const CODE_EXT = String.raw `(?:` +
+    // 10
+    String.raw `dockerfile|` +
+    // 7
+    String.raw `graphql|` +
+    // 6
+    String.raw `coffee|gradle|svelte|` +
+    // 5
+    String.raw `astro|cmake|jsonc|proto|scala|swift|` +
+    // 4
+    String.raw `bash|cljs|dart|fish|html|java|json|less|sass|scss|toml|yaml|` +
+    // 3
+    String.raw `asm|cjs|clj|cpp|css|edn|elm|env|erl|exs|fsi|fsx|gql|hcl|hpp|htm|ini|jsx|lua|mdx|mjs|mli|nim|php|sol|sql|tsx|vue|xml|yml|zig|zsh|` +
+    // 2
+    String.raw `cc|cs|ex|fs|go|hs|jl|js|kt|md|mk|ml|mm|pl|pm|py|rb|rs|sc|sh|tf|ts|` +
+    // 1
+    String.raw `c|d|h|m|r|s` +
+    String.raw `)`;
+// Matches "path/to/file.ts:42" (bare with known ext), "`path/to/file.ts`" (any
+// ext when explicitly backtick-wrapped). Backtick-wrapped accepts any extension
+// because the LLM signaled intent; bare paths must be a recognized code file.
+const FILE_REF = new RegExp(String.raw `(?:` +
+    String.raw `\x60([^\x60]+\.[a-z]{1,6})\x60` +
+    String.raw `|(\b[\w./\-]+\.` + CODE_EXT + String.raw `)(?::(\d+))?` +
+    String.raw `)`, 'i');
 function extractFileRef(text) {
     const m = text.match(FILE_REF);
     if (!m)
         return { file: '<unspecified>' };
     const raw = (m[1] ?? m[2]);
-    // Skip version strings (v1.2.3) and bare dotfile extensions with no path separator
-    if (/^v?\d/.test(raw) || (!raw.includes('/') && raw.startsWith('.') && raw.split('.').length === 2)) {
+    // Skip version strings (v1.2.3), bare dotfile extensions with no path
+    // separator, and known prose abbreviations that slipped through the regex
+    // (only applicable when backtick-wrapped, since the bare branch already
+    // requires a known code extension).
+    if (/^v?\d/.test(raw) ||
+        (!raw.includes('/') && raw.startsWith('.') && raw.split('.').length === 2) ||
+        /^(?:e\.g|i\.e|etc|vs|cf|al|U\.S|U\.K)$/i.test(raw)) {
         return { file: '<unspecified>' };
     }
     const line = m[3] ? parseInt(m[3], 10) : undefined;

package/dist/src/cli/detector.js CHANGED Viewed

@@ -54,10 +54,23 @@ export function detectProject(cwd) {
     }
     const reqTxt = path.join(cwd, 'requirements.txt');
     const pyproject = path.join(cwd, 'pyproject.toml');
-    if ((fs.existsSync(reqTxt) && fileContains(reqTxt, 'fastapi')) ||
-        (fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'))) {
+    const hasFastapi = (fs.existsSync(reqTxt) && fileContains(reqTxt, 'fastapi')) ||
+        (fs.existsSync(pyproject) && fileContains(pyproject, 'fastapi'));
+    if (hasFastapi) {
         return { preset: 'python-fastapi', testCommand: 'pytest', confidence: 'high', evidence: 'found fastapi in requirements' };
     }
+    // Generic Python — covers any pyproject.toml or requirements.txt project that
+    // isn't FastAPI. Previously fell through to the JS/Generic preset, which
+    // writes `npm test` and npm-only static rules into a Python repo — the most
+    // visible "is this thing built?" papercut for non-JS users.
+    if (fs.existsSync(pyproject) || fs.existsSync(reqTxt)) {
+        return {
+            preset: 'python',
+            testCommand: 'pytest',
+            confidence: 'high',
+            evidence: fs.existsSync(pyproject) ? 'found pyproject.toml' : 'found requirements.txt',
+        };
+    }
     const pkgPath = path.join(cwd, 'package.json');
     if (fs.existsSync(pkgPath)) {
         const pkg = readJson(pkgPath);

package/dist/src/cli/pr-desc.d.ts CHANGED Viewed

@@ -23,7 +23,10 @@ export interface PrDescResult {
 }
 export declare function truncateDiff(diff: string, charLimit?: number): string;
 export declare function summarizeFindings(findings: Finding[], max?: number): string;
-export declare function parseDescription(raw: string): {
+export declare function parseDescription(raw: string, fallback?: {
+    branchName?: string;
+    firstSummaryLine?: string;
+}): {
     title: string;
     body: string;
 };

package/dist/src/cli/pr-desc.js CHANGED Viewed

@@ -16,13 +16,67 @@ export function summarizeFindings(findings, max = 10) {
         .map(f => `- [${f.severity.toUpperCase()}] ${f.file}:${f.line ?? '?'} — ${f.message}`)
         .join('\n');
 }
-export function parseDescription(raw) {
+export function parseDescription(raw, fallback) {
     const titleMatch = raw.match(/^Title:\s*(.+)$/m);
-    const title = titleMatch ? titleMatch[1].trim() : 'chore: update';
+    let title = titleMatch ? titleMatch[1].trim() : '';
+    // Fallback chain: branch-based conventional-commit title (`fix/foo-bar` →
+    // `fix: foo bar`), then first summary bullet, then literal "chore: update".
+    // The "chore: update" literal was the prior unconditional default — every
+    // PR generated by this tool got the same title regardless of diff.
+    if (!title) {
+        title = deriveTitleFromBranch(fallback?.branchName) ??
+            deriveTitleFromSummary(fallback?.firstSummaryLine) ??
+            'chore: update';
+    }
     const sepIdx = raw.indexOf('\n\n---\n');
     const body = sepIdx !== -1 ? raw.slice(sepIdx + 5).trim() : raw.replace(/^Title:.*\n?/m, '').trim();
     return { title, body };
 }
+const CONVENTIONAL_TYPES = new Set(['feat', 'fix', 'chore', 'docs', 'refactor', 'test', 'perf', 'style', 'ci', 'build']);
+function deriveTitleFromBranch(branch) {
+    if (!branch || branch === 'unknown' || branch === 'HEAD' || /^(main|master|develop)$/i.test(branch))
+        return null;
+    // `fix/cost-tracker` → `fix: cost tracker`. `feature/foo` → `feat: foo`.
+    const slashIdx = branch.indexOf('/');
+    if (slashIdx > 0) {
+        const rawPrefix = branch.slice(0, slashIdx).toLowerCase();
+        const rest = branch.slice(slashIdx + 1);
+        const prefix = rawPrefix === 'feature' || rawPrefix === 'features' ? 'feat'
+            : rawPrefix === 'bugfix' || rawPrefix === 'hotfix' ? 'fix'
+                : CONVENTIONAL_TYPES.has(rawPrefix) ? rawPrefix
+                    : null;
+        if (prefix) {
+            // Include `/` in the character class — multi-segment branches like
+            // `fix/auth/session-leak` should normalize to `fix: auth session leak`,
+            // not `fix: auth/session-leak`.
+            const cleaned = rest.replace(/[-_/]+/g, ' ').trim();
+            return cleaned ? `${prefix}: ${cleaned}` : null;
+        }
+        // Unknown prefix that contains a slash — treat the segment after the
+        // first slash as the descriptive part and default the conventional
+        // type to `chore:`. Example: `autopilot-test/validate-weights` →
+        // `chore: validate weights` rather than the prefix-less
+        // `autopilot test validate weights` (which fails commitlint and looks
+        // half-finished in PR titles).
+        const cleanedRest = rest.replace(/[-_/]+/g, ' ').trim();
+        if (cleanedRest)
+            return `chore: ${cleanedRest}`;
+    }
+    // No slash — return cleaned branch name, or null when it normalizes empty
+    // (e.g. `_`, `---`). The caller chains via `??`, which only short-circuits
+    // on null/undefined; an empty string would skip the rest of the fallback.
+    const cleaned = branch.replace(/[-_/]+/g, ' ').trim();
+    return cleaned || null;
+}
+function deriveTitleFromSummary(summaryLine) {
+    if (!summaryLine)
+        return null;
+    // Strip leading bullet/dash and clip to ~60 chars.
+    const cleaned = summaryLine.replace(/^[\s\-*•]+/, '').trim();
+    if (!cleaned)
+        return null;
+    return cleaned.length > 70 ? `${cleaned.slice(0, 67)}...` : cleaned;
+}
 export async function runPrDesc(options) {
     const branchName = options._branchName ?? getBranchName();
     const diff = options._gitDiff ?? getGitDiff(options.base);
@@ -34,7 +88,12 @@ export async function runPrDesc(options) {
     const prompt = buildPrompt(branchName, truncateDiff(diff), summarizeFindings(findings));
     const engine = options._reviewEngine ?? await resolveEngine();
     const { rawOutput } = await engine.review({ content: prompt, kind: 'pr-diff' });
-    const { title, body } = parseDescription(rawOutput);
+    // Extract first non-empty bullet from the model's Summary section as a
+    // last-resort title fallback when the model didn't emit `Title: ...`.
+    const firstSummaryLine = rawOutput.split('\n')
+        .map(l => l.trim())
+        .find(l => /^[-*•]\s/.test(l));
+    const { title, body } = parseDescription(rawOutput, { branchName, firstSummaryLine });
     const formatted = `Title: ${title}\n\n---\n${body}`;
     if (options.output) {
         fs.writeFileSync(options.output, formatted, 'utf8');
@@ -81,7 +140,13 @@ function loadCachedFindings() {
     }
 }
 function buildPrompt(branch, diff, findingsSummary) {
-    return `Generate a pull request description with three sections:
+    return `Generate a pull request title and description.
+Output format (must follow exactly):
+Title: <one-line conventional-commit title — e.g. "fix(auth): close session leak on logout" or "feat: add SSO for enterprise tenants". Do NOT use a generic placeholder like "chore: update". Derive from the actual diff.>
+---
 ## Summary
 <3-5 bullet points describing what changed and why>

package/dist/src/cli/scan.js CHANGED Viewed

@@ -133,6 +133,19 @@ export async function runScan(options = {}) {
         cwd,
         gitSummary: focusHint,
     });
+    // Single-file scan fallback — when only one file was scanned, the LLM
+    // doesn't always repeat the file path in its findings (it knows the
+    // context). Backfill `<unspecified>` with the actual scan target so the
+    // `fix` command can match findings to real paths. Without this, a
+    // `claude-autopilot scan src/foo.ts` produces findings with file
+    // `<unspecified>` and `fix --severity all` reports "no fixable findings".
+    if (relFiles.length === 1) {
+        const onlyFile = relFiles[0];
+        for (const f of result.findings) {
+            if (!f.file || f.file === '<unspecified>')
+                f.file = onlyFile;
+        }
+    }
     // Apply ignore rules
     const ignoreRules = [...loadIgnoreRules(cwd), ...parseConfigIgnore(config.ignore)];
     const findings = applyIgnoreRules(result.findings, ignoreRules);

package/dist/src/cli/setup.js CHANGED Viewed

@@ -16,6 +16,7 @@ const PRESET_LABELS = {
     't3': 'T3 Stack (Next.js + tRPC + Prisma)',
     'rails-postgres': 'Ruby on Rails + PostgreSQL',
     'python-fastapi': 'Python FastAPI',
+    'python': 'Python',
     'go': 'Go + PostgreSQL',
     'generic': 'Generic (no stack-specific assumptions)',
 };
@@ -111,7 +112,15 @@ export async function runSetup(options = {}) {
         throw new Error(`Preset config not found for: ${detection.preset}. Looked in:\n  ${presetSearchPaths(detection.preset, cwd).join('\n  ')}`);
     }
     let presetContent = await fsAsync.readFile(presetConfigPath, 'utf8');
-    presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
+    // Only append testCommand if the preset doesn't already declare one — several
+    // presets (go, python, python-fastapi, rails-postgres) ship with their own
+    // testCommand line. Unconditionally appending produced duplicate YAML keys
+    // ("testCommand" twice in the same map), which yaml parsers reject. After
+    // 5.0.5 that broke `setup` on Python repos: every command after setup
+    // hard-failed until the user manually edited the file.
+    if (!/^testCommand\s*:/m.test(presetContent)) {
+        presetContent = presetContent.trimEnd() + `\ntestCommand: "${detection.testCommand}"\n`;
+    }
     // Apply profile overlay if specified
     if (options.profile) {
         const profile = PROFILES[options.profile];

package/dist/src/core/pipeline/run.js CHANGED Viewed

@@ -11,7 +11,12 @@ export async function runGuardrail(input) {
     // reviewer flagged — the bugs the LLM is best at often ride alongside one a static
     // rule already caught.
     const runReviewOnStaticFail = pipelineCfg.runReviewOnStaticFail !== false;
-    const runReviewOnTestFail = pipelineCfg.runReviewOnTestFail === true;
+    // Default true (5.0.5+): when an auto-detected test command fails or is missing
+    // (e.g. `npm test` on a Python repo with no test script), skipping review made
+    // first-runs return zero useful output. Review on failing code is exactly when
+    // it's most useful. Users who explicitly set `runReviewOnTestFail: false` keep
+    // the strict behavior.
+    const runReviewOnTestFail = pipelineCfg.runReviewOnTestFail !== false;
     // Static-rules phase — tests always run afterward, regardless of status. The
     // runReviewOnStaticFail flag only gates the LLM review phase (matching its name);
     // skipping tests on a static-fail would be surprising and asymmetric with

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@delegance/claude-autopilot",
-  "version": "5.0.4",
+  "version": "5.0.6",
   "type": "module",
   "description": "Autonomous development pipeline for Claude Code: brainstorm → spec → plan → implement → migrate → validate → PR → review → merge. Multi-model, local-first, every phase a skill you can intervene in.",
   "keywords": [

package/presets/generic/guardrail.config.yaml CHANGED Viewed

@@ -32,7 +32,12 @@ chunking:
   perFileMaxTokens: 32000
 pipeline:
   runReviewOnStaticFail: true
-  runReviewOnTestFail: false
+  # Default true so that failed/missing test commands don't silently kill the
+  # LLM review phase. Most common case: user runs `setup` on a fresh repo,
+  # auto-detected test command is `npm test` but no test script exists →
+  # exit 1 → review skipped → user sees nothing useful. Review on failing
+  # code is the path where review is *most* valuable.
+  runReviewOnTestFail: true
 # Optional: multi-model council. Uncomment + set ANTHROPIC_API_KEY and/or
 # OPENAI_API_KEY. Models are dispatched in parallel; the synthesizer reads
 # their responses and writes the consensus. Both APIs supported (chat-completions

package/presets/python/guardrail.config.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+configVersion: 1
+reviewEngine: { adapter: auto }
+vcsHost: { adapter: github }
+reviewBot: { adapter: cursor }
+protectedPaths:
+  - "**/auth/**"
+  - "**/security.py"
+  - "**/config.py"
+  - "**/settings.py"
+  - "**/secrets/**"
+staticRules:
+  - hardcoded-secrets
+testCommand: pytest -q
+thresholds:
+  bugbotAutoFix: 85
+  bugbotProposePatch: 60
+  maxValidateRetries: 3
+reviewStrategy: auto
+chunking:
+  smallTierMaxTokens: 8000
+  perFileMaxTokens: 32000
+pipeline:
+  runReviewOnStaticFail: true
+  runReviewOnTestFail: true

package/presets/python/stack.md ADDED Viewed

@@ -0,0 +1,30 @@
+A Python application (general — not framework-specific). Common patterns:
+- Python 3.10+, virtualenv or uv/poetry for deps
+- pytest for tests, ruff or flake8 for lint, mypy for types
+- pyproject.toml for project config (PEP 621) or requirements.txt
+- asyncio + aiohttp / httpx for async I/O
+- Pydantic v2 or dataclasses for data models
+- python-dotenv or os.environ for config (no Pydantic Settings assumed)
+Conventions to encourage:
+- Type hints on public functions
+- f-strings over .format() / %
+- pathlib over os.path
+- contextmanagers for resources
+- explicit exception types, no bare `except:`
+Things that should flag CRITICAL:
+- f-string SQL: f"SELECT * FROM users WHERE id = {user_id}"
+- Bare `except:` or `except Exception:` swallowing errors
+- Hardcoded secrets / API keys in source files
+- subprocess.run with shell=True on user-controlled input
+- pickle.load on untrusted data
+- eval / exec on user input
+- Synchronous blocking calls inside async def (requests, time.sleep, open)
+Things that should flag WARNING:
+- Mutable default arguments (def f(x=[])  → bug)
+- Missing type hints on public functions
+- Broad exception catches that hide root causes
+- print() statements left in non-CLI code (use logging)
+- TODO / FIXME comments without owners or context