npm - @fermindi/pwn-cli - Versions diffs - 0.7.0 → 0.9.0 - Mend

@fermindi/pwn-cli 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/cli/backlog.js +16 -3
package/cli/batch.js +5 -0
package/package.json +1 -1
package/src/services/batch-runner.js +230 -60
package/src/services/batch-service.js +75 -2
package/templates/workspace/.ai/README.md +20 -0
package/templates/workspace/.ai/batch/prompt.md +36 -0

package/cli/backlog.js CHANGED Viewed

@@ -19,9 +19,17 @@ export default async function backlogCommand(args = []) {
   }
   const cwd = process.cwd();
-  const stories = parsePrdTasks(cwd);
+  let stories = parsePrdTasks(cwd);
   const taskFiles = listTaskFiles(cwd);
+  // Parse --filter
+  const filterIdx = args.findIndex(a => a === '--filter');
+  const filter = filterIdx !== -1 ? args[filterIdx + 1] : null;
+  if (filter) {
+    const re = new RegExp(filter, 'i');
+    stories = stories.filter(s => re.test(s.id) || re.test(s.title));
+  }
   // Read project name from prd.json
   let project = 'project';
   const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
@@ -33,11 +41,12 @@ export default async function backlogCommand(args = []) {
   }
   const noInteractive = args.includes('--no-interactive') || !process.stdout.isTTY;
+  const label = filter ? `${project} (filter: ${filter})` : project;
   if (noInteractive) {
-    printPlain({ project, stories, taskFiles });
+    printPlain({ project: label, stories, taskFiles });
   } else {
-    await startViewer({ project, stories, taskFiles });
+    await startViewer({ project: label, stories, taskFiles });
   }
 }
@@ -45,8 +54,12 @@ function showHelp() {
   console.log('📋 PWN Backlog Viewer\n');
   console.log('Usage: pwn backlog [options]\n');
   console.log('Options:');
+  console.log('  --filter <pattern>  Filter stories by ID or title (regex, case-insensitive)');
   console.log('  --no-interactive    Plain text output (for CI/piping)');
   console.log('  --help, -h          Show this help\n');
+  console.log('Examples:');
+  console.log('  pwn backlog --filter SEC        # Only SEC-* stories');
+  console.log('  pwn backlog --filter "API|AUTH"  # Stories matching API or AUTH\n');
   console.log('Keybindings (list view):');
   console.log('  ↑/k        Move up');
   console.log('  ↓/j        Move down');

package/cli/batch.js CHANGED Viewed

@@ -103,6 +103,8 @@ function parseRunOptions(args) {
       options.dryRun = true;
     } else if (arg === '--phase') {
       options.phase = args[++i];
+    } else if (arg === '--filter') {
+      options.filter = args[++i];
     } else if (arg === '--no-plan') {
       options.noPlan = true;
     } else if (arg === '--rate-limit-wait') {
@@ -410,6 +412,7 @@ function showHelp() {
   console.log('  --continue             Continue on errors');
   console.log('  --no-commit            Skip auto-commit');
   console.log('  --no-branch            Skip branch creation');
+  console.log('  --filter <pattern>     Filter tasks by ID or title (regex, case-insensitive)');
   console.log('  --no-plan              Skip planning phase (use fixed 10min timeout)');
   console.log('  --rate-limit-wait <s>  Seconds to wait on rate limit (default: 1800)');
   console.log('  --help, -h             Show this help\n');
@@ -418,6 +421,8 @@ function showHelp() {
   console.log('  pwn batch run                # Run autonomous batch loop');
   console.log('  pwn batch run --dry-run      # Preview next story');
   console.log('  pwn batch run --phase 3      # Run specific phase');
+  console.log('  pwn batch run --filter ORCH   # Run only ORCH-* tasks');
+  console.log('  pwn batch run -f "UNI|DATA"  # Run UNI or DATA tasks');
   console.log('  pwn batch run --no-plan      # Skip planning, fixed timeout');
   console.log('  pwn batch tasks              # List all task files');
   console.log('  pwn batch tasks --failed     # Show only failed tasks');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fermindi/pwn-cli",
-  "version": "0.7.0",
+  "version": "0.9.0",
   "description": "Professional AI Workspace - Inject structured memory and automation into any project for AI-powered development",
   "type": "module",
   "bin": {

package/src/services/batch-runner.js CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * Two-phase execution model:
  * Phase 1: Planning — Claude estimates time and creates action plan
- * Phase 2: Execution — Dynamic timeout based on estimate + 5%
+ * Phase 2: Execution — Dynamic timeout based on estimate + 20%
  *
  * Task files (.ai/batch/tasks/{US-ID}.json) track status per story.
  * Completed files are cleaned up at the end; failed are kept for review.
@@ -12,7 +12,6 @@
 import { spawn } from 'child_process';
 import { existsSync, readFileSync, writeFileSync, mkdirSync, createWriteStream, appendFileSync, unlinkSync, readdirSync } from 'fs';
 import { join } from 'path';
-import { tmpdir } from 'os';
 import ora from 'ora';
 import chalk from 'chalk';
 import {
@@ -23,13 +22,26 @@ import {
   loadConfig,
   commitTask,
   updateBatchState,
-  clearBatchState
+  clearBatchState,
+  getCurrentBranch,
+  createTaskBranch,
+  checkoutBranch,
+  createBatchBranch,
+  mergeBranch
 } from './batch-service.js';
 // --- Constants ---
-const RUNNER_VERSION = '2.0';
-const DEFAULT_TIMEOUT_MS = 600_000;  // 10 minutes fallback
-const MIN_TIMEOUT_MS = 120_000;      // 2 minutes minimum (claude init takes ~30s)
+const RUNNER_VERSION = '2.1';
+const DEFAULT_TIMEOUT_MS = 900_000;  // 15 minutes fallback
+const MIN_TIMEOUT_MS = 300_000;      // 5 minutes minimum (claude init ~30-40s + real work)
+// Complexity → timeout mapping (based on real-world execution data)
+// AI is bad at estimating seconds but decent at classifying complexity
+const COMPLEXITY_TIMEOUT = {
+  low:    { seconds: 300,  label: '5m' },    // config change, small fix
+  medium: { seconds: 600,  label: '10m' },   // new function, 1-3 files
+  high:   { seconds: 900,  label: '15m' },   // new module, multi-file refactor
+};
 const PLAN_TIMEOUT_MS = 120_000;     // 2 minutes for planning phase (claude init ~30s)
 const DEFAULT_RATE_LIMIT_WAIT = 1800; // 30 minutes (seconds)
 const MAX_RETRIES = 2;
@@ -99,7 +111,7 @@ async function waitForRateLimit(waitSeconds, attempt) {
 // --- Planning Phase ---
-function buildPlanPrompt(task, cwd) {
+function buildPlanPrompt(task, cwd, replanContext = null) {
   const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
   const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
   const story = prd.stories.find(s => s.id === task.id);
@@ -107,42 +119,49 @@ function buildPlanPrompt(task, cwd) {
   const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
+  let replanSection = '';
+  if (replanContext) {
+    replanSection = `
+IMPORTANT — REPLANNING: A previous attempt FAILED.
+- Previous complexity: ${replanContext.previousComplexity}
+- Failure reason: ${replanContext.failureReason}
+Re-evaluate the complexity. If it timed out, it's likely MORE complex than you initially thought.`;
+  }
   return `You are analyzing task ${task.id}: ${task.title}
 Acceptance criteria:
 ${acList}
 Notes: ${story.notes || 'None'}
+${replanSection}
 Analyze this task and respond with ONLY a JSON object (no markdown, no code fences):
 {
-  "estimated_time_seconds": <number>,
   "plan": ["step 1", "step 2", ...],
   "complexity": "low|medium|high",
   "recommended_model": "opus|sonnet|haiku",
   "files_likely_affected": ["path1", "path2"]
 }
-Base your estimate on:
-- low complexity (config change, small fix): 30-120s
-- medium complexity (new function, simple feature): 120-300s
-- high complexity (new module, multi-file feature): 300-600s
+Classify complexity:
+- "low": config change, small fix, single file tweak
+- "medium": new function, simple feature, 1-3 files
+- "high": new module, multi-file refactor, architecture change, 4+ files
 Recommend a model for execution:
-- "haiku": trivial (config change, typo, single-line fix) — ~30s
-- "sonnet": low-medium complexity (new function, simple feature, 1-3 files) — 30-300s
-- "opus": high complexity (new module, multi-file refactor, architecture) — 300-600s`;
+- "haiku": trivial (config change, typo, single-line fix)
+- "sonnet": low-medium complexity (new function, simple feature, 1-3 files)
+- "opus": high complexity (new module, multi-file refactor, architecture)`;
 }
-async function planTask(task, cwd) {
-  const prompt = buildPlanPrompt(task, cwd);
+async function planTask(task, cwd, replanContext = null) {
+  const prompt = buildPlanPrompt(task, cwd, replanContext);
   if (!prompt) return null;
-  const promptFile = join(tmpdir(), `pwn-plan-${Date.now()}.md`);
-  writeFileSync(promptFile, prompt);
   const env = { ...process.env };
-  delete env.CLAUDECODE;
+  // Remove all Claude session markers to avoid "nested session" detection
+  Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
   return new Promise((resolve) => {
     let output = '';
@@ -150,13 +169,17 @@ async function planTask(task, cwd) {
     // Planning uses --print WITHOUT --dangerously-skip-permissions (read-only)
     const child = spawn('bash', [
       '-c',
-      `claude --model opus --print -p "$(cat '${promptFile}')"`,
+      `claude --model opus --print -p "$(cat)"`,
     ], {
       cwd,
-      stdio: ['ignore', 'pipe', 'pipe'],
+      stdio: ['pipe', 'pipe', 'pipe'],
       env,
     });
+    // Pass prompt via stdin
+    child.stdin.write(prompt);
+    child.stdin.end();
     const timeoutId = setTimeout(() => {
       child.kill('SIGTERM');
     }, PLAN_TIMEOUT_MS);
@@ -166,7 +189,6 @@ async function planTask(task, cwd) {
     child.on('close', (code, signal) => {
       clearTimeout(timeoutId);
-      try { unlinkSync(promptFile); } catch {}
       if (signal) {
         console.log(chalk.dim(`    Planning killed by ${signal} (timeout=${signal === 'SIGTERM' ? 'likely' : 'no'})`));
@@ -195,7 +217,6 @@ async function planTask(task, cwd) {
     child.on('error', (err) => {
       clearTimeout(timeoutId);
-      try { unlinkSync(promptFile); } catch {}
       console.log(chalk.dim(`    Planning spawn error: ${err.message}`));
       resolve(null);
     });
@@ -204,8 +225,7 @@ async function planTask(task, cwd) {
 function computeTimeout(estimatedSeconds) {
   if (!estimatedSeconds || estimatedSeconds <= 0) return DEFAULT_TIMEOUT_MS;
-  const withMargin = Math.ceil(estimatedSeconds * 1.05) * 1000;
-  return Math.max(withMargin, MIN_TIMEOUT_MS);
+  return Math.max(Math.ceil(estimatedSeconds * 1.05) * 1000, MIN_TIMEOUT_MS);
 }
 /**
@@ -235,23 +255,45 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
   const stories = parsePrdTasks(cwd);
   const totalStories = stories.length;
   const doneAtStart = stories.filter(s => s.passes).length;
-  const maxIterations = options.maxIterations || 20;
   const phaseFilter = options.phase ? `Phase ${options.phase}` : undefined;
+  const taskFilter = options.filter || null;
+  // Count remaining eligible stories (respecting filters)
+  const doneIds = stories.filter(s => s.passes).map(s => s.id);
+  const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
+  const eligibleCount = stories.filter(s =>
+    !s.passes &&
+    s.dependencies.every(dep => doneIds.includes(dep)) &&
+    (!phaseFilter || s.phase === phaseFilter) &&
+    (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
+  ).length;
+  const maxIterations = options.maxIterations || eligibleCount;
   // --- Dry run ---
   if (options.dryRun) {
-    return dryRunPreview(cwd, phaseFilter, maxIterations);
+    return dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter);
   }
   // --- Print header ---
-  printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd);
+  printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd, taskFilter);
   // NO custom SIGINT handler — Ctrl+C uses default Node.js behavior (kills process group)
+  // --- Save original branch for isolation ---
+  const originalBranch = await getCurrentBranch(cwd);
+  // --- Create batch branch ---
+  const batchBranch = await createBatchBranch(cwd);
+  console.log(chalk.blue(`  Batch branch: ${chalk.bold(batchBranch)}`));
   // --- Main loop ---
   let iteration = 0;
   let noProgressCount = 0;
   let storiesCompleted = 0;
+  let mergedCount = 0;
+  const branchesCreated = [];
+  const mergedBranches = [];
+  const unmergedBranches = [];
   const batchStart = Date.now();
   updateBatchState({
@@ -264,7 +306,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
   while (iteration < maxIterations) {
     iteration++;
-    const task = selectNextTask(cwd, { phase: phaseFilter });
+    const task = selectNextTask(cwd, { phase: phaseFilter, filter: taskFilter });
     if (!task) {
       console.log(chalk.green('\nAll eligible stories completed!'));
       break;
@@ -282,8 +324,9 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
     if (!noPlan) {
       const existing = loadTaskFile(task.id, cwd);
-      if (existing && existing.status === 'planned' && existing.complexity !== 'unknown') {
-        // Reuse previous plan
+      const needsReplan = existing && existing.last_failure_type && existing.status !== 'completed';
+      if (existing && existing.status === 'planned' && existing.complexity !== 'unknown' && !needsReplan) {
+        // Reuse previous plan (only if it hasn't failed before)
         taskFile = existing;
         taskTimeoutMs = computeTimeout(existing.estimated_time_seconds);
         console.log(chalk.dim(`  Phase 1: Reusing plan for ${task.id} (${existing.complexity}, ~${formatDuration(existing.estimated_time_seconds)}, model: ${existing.recommended_model || 'sonnet'})`));
@@ -294,9 +337,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
         const planResult = await planTask(task, cwd);
         if (planResult) {
-          const estimatedSeconds = planResult.estimated_time_seconds || 600;
-          const timeoutSeconds = Math.ceil(Math.max(estimatedSeconds * 1.05, MIN_TIMEOUT_MS / 1000));
-          taskTimeoutMs = computeTimeout(estimatedSeconds);
+          const complexity = planResult.complexity || 'medium';
+          const tier = COMPLEXITY_TIMEOUT[complexity] || COMPLEXITY_TIMEOUT.medium;
+          const estimatedSeconds = tier.seconds;
+          const timeoutSeconds = Math.ceil(estimatedSeconds * 1.05);
+          taskTimeoutMs = timeoutSeconds * 1000;
           const recommendedModel = planResult.recommended_model || 'sonnet';
           taskFile = {
@@ -306,7 +351,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
             estimated_time_seconds: estimatedSeconds,
             timeout_seconds: timeoutSeconds,
             plan: planResult.plan || [],
-            complexity: planResult.complexity || 'medium',
+            complexity,
             recommended_model: recommendedModel,
             files_likely_affected: planResult.files_likely_affected || [],
             created_at: new Date().toISOString(),
@@ -316,7 +361,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
           saveTaskFile(taskFile, cwd);
           planSpinner.succeed(chalk.green(
-            `Planned: ${planResult.complexity} complexity, ~${formatDuration(estimatedSeconds)} estimated, timeout ${formatDuration(timeoutSeconds)}, model: ${recommendedModel}`
+            `Planned: ${complexity} complexity, timeout ${tier.label}, model: ${recommendedModel}`
           ));
         } else {
           // Fallback when planning fails
@@ -343,6 +388,23 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
       }
     }
+    // --- Branch isolation: create/checkout task branch ---
+    let taskBranch = null;
+    try {
+      taskBranch = await createTaskBranch(task.id, cwd);
+      branchesCreated.push(taskBranch);
+      console.log(chalk.blue(`  Branch: ${chalk.bold(taskBranch)}`));
+      // Track branch in task file
+      if (taskFile) {
+        taskFile.branch = taskBranch;
+        saveTaskFile(taskFile, cwd);
+      }
+    } catch (err) {
+      console.log(chalk.red(`  Failed to create branch feat/${task.id}: ${err.message}`));
+      console.log(chalk.yellow('  Continuing on current branch...'));
+    }
     // --- Phase 2: Execution ---
     const phaseLabel = noPlan ? '' : 'Phase 2';
     console.log(chalk.blue(`  ${noPlan ? 'Executing' : 'Phase 2: Executing'} ${task.id}...`));
@@ -358,6 +420,10 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
       }
       const prompt = buildPrompt(task.id, cwd, prdPath, promptPath, errorContext);
+      if (!prompt) {
+        console.log(chalk.red(`  Cannot build prompt for ${task.id} — skipping`));
+        break;
+      }
       const logFile = join(logDir, `${task.id}_${timestamp()}.log`);
       const estimatedSeconds = taskFile?.estimated_time_seconds || null;
@@ -367,8 +433,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
       // Killed by signal (user did kill or Ctrl+C) — don't retry, exit
       if (result.signal) {
         console.log(chalk.yellow(`  Killed by ${result.signal}`));
+        if (taskBranch) {
+          try { await checkoutBranch(batchBranch, cwd, { force: true }); } catch {}
+        }
         clearBatchState(cwd);
-        printSummary(cwd, iteration, storiesCompleted, batchStart);
+        printSummary(cwd, iteration, storiesCompleted, batchStart, branchesCreated, batchBranch, originalBranch, mergedCount, unmergedBranches);
         return;
       }
@@ -380,8 +449,41 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
           await waitForRateLimit(rateLimitWait, rateLimitAttempts);
           continue;
         }
-        console.log(chalk.yellow(`  Timed out after ${Math.round(taskTimeoutMs / 1000)}s`));
-        errorContext = 'Session timed out. Simplify the implementation or focus on core acceptance criteria.';
+        const prevTimeout = Math.round(taskTimeoutMs / 1000);
+        console.log(chalk.yellow(`  Timed out after ${prevTimeout}s`));
+        // --- Replan on timeout: bump complexity tier ---
+        if (taskFile) {
+          const prevComplexity = taskFile.complexity;
+          const prevEstimate = taskFile.estimated_time_seconds;
+          // Escalate complexity: low → medium → high, high stays high but doubles
+          const escalation = { low: 'medium', medium: 'high' };
+          const newComplexity = escalation[prevComplexity] || 'high';
+          const tier = COMPLEXITY_TIMEOUT[newComplexity] || COMPLEXITY_TIMEOUT.high;
+          // If already high, double the previous timeout
+          const newEstimate = prevComplexity === 'high'
+            ? prevEstimate * 2
+            : tier.seconds;
+          const newTimeout = Math.ceil(newEstimate * 1.05);
+          taskTimeoutMs = newTimeout * 1000;
+          taskFile.estimated_time_seconds = newEstimate;
+          taskFile.timeout_seconds = newTimeout;
+          taskFile.complexity = newComplexity;
+          taskFile.recommended_model = 'opus'; // upgrade model on timeout
+          taskFile.replanned_at = new Date().toISOString();
+          taskFile.replan_reason = `timeout after ${prevTimeout}s (${prevComplexity} → ${newComplexity})`;
+          saveTaskFile(taskFile, cwd);
+          console.log(chalk.blue(
+            `  Escalated: ${prevComplexity} → ${newComplexity}, timeout ${formatDuration(newTimeout)}, model: opus`
+          ));
+        }
+        errorContext = `Session timed out after ${prevTimeout}s. The task was replanned with more time. Focus on core acceptance criteria first, then iterate.`;
         retry++;
         continue;
       }
@@ -397,6 +499,14 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
         }
         console.log(chalk.yellow(`  Claude exited with code ${result.exitCode}`));
         errorContext = `Claude session failed with exit code ${result.exitCode}.`;
+        // Save error output snippet to task file for debugging
+        if (taskFile) {
+          taskFile.last_error_output = (result.output || '').slice(-2000);
+          taskFile.last_failure_type = 'crash';
+          saveTaskFile(taskFile, cwd);
+        }
         retry++;
         continue;
       }
@@ -430,16 +540,40 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
           current_task: null,
           last_completed_at: new Date().toISOString()
         }, cwd);
+        // Merge successful task branch into batch branch
+        if (taskBranch) {
+          try {
+            await checkoutBranch(batchBranch, cwd);
+            await mergeBranch(taskBranch, cwd);
+            mergedCount++;
+            mergedBranches.push(taskBranch);
+            console.log(chalk.green(`  Merged ${taskBranch} → ${batchBranch}`));
+          } catch (err) {
+            console.log(chalk.red(`  Merge failed: ${err.message}`));
+            console.log(chalk.yellow(`  Branch ${taskBranch} available for manual merge`));
+            unmergedBranches.push(taskBranch);
+          }
+        }
       } else {
         console.log(chalk.red(`  Quality gates FAILED`));
         errorContext = gatesResult.errorOutput;
+        // Save gate failure details to task file
+        if (taskFile) {
+          taskFile.last_error_output = (gatesResult.errorOutput || '').slice(-2000);
+          taskFile.last_failure_type = 'quality_gate';
+          saveTaskFile(taskFile, cwd);
+        }
         retry++;
       }
     }
     if (!storyDone) {
-      console.log(chalk.red(`  FAILED: ${task.id} after ${MAX_RETRIES} retries`));
-      appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries. Skipping.`);
+      const failureType = taskFile?.last_failure_type || 'unknown';
+      console.log(chalk.red(`  FAILED: ${task.id} after ${MAX_RETRIES} retries (${failureType})`));
+      appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries (${failureType}). Skipping.`);
       // Update task file with failure
       if (taskFile) {
@@ -452,6 +586,19 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
       noProgressCount++;
     }
+    // --- Return to batch branch (force: discard dirty state from failed tasks) ---
+    if (taskBranch) {
+      try {
+        await checkoutBranch(batchBranch, cwd, { force: true });
+        console.log(chalk.dim(`  Returned to branch: ${batchBranch}`));
+      } catch (err) {
+        console.log(chalk.red(`  Warning: failed to return to ${batchBranch}: ${err.message}`));
+      }
+      if (!storyDone) {
+        unmergedBranches.push(taskBranch);
+      }
+    }
     if (noProgressCount >= MAX_NO_PROGRESS) {
       console.log(chalk.red(`\nCIRCUIT BREAKER: ${MAX_NO_PROGRESS} consecutive failures. Stopping.`));
       break;
@@ -459,7 +606,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
   }
   clearBatchState(cwd);
-  printSummary(cwd, iteration, storiesCompleted, batchStart);
+  printSummary(cwd, iteration, storiesCompleted, batchStart, branchesCreated, batchBranch, originalBranch, mergedCount, unmergedBranches);
 }
 /**
@@ -474,23 +621,24 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
     const logStream = createWriteStream(logFile);
     const startTime = Date.now();
-    // Write prompt to temp file
-    const promptFile = join(tmpdir(), `pwn-prompt-${Date.now()}.md`);
-    writeFileSync(promptFile, prompt);
     const env = { ...process.env };
-    delete env.CLAUDECODE;
+    // Remove all Claude session markers to avoid "nested session" detection
+    Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
     const modelFlag = model ? `--model ${model} ` : '';
     const child = spawn('bash', [
       '-c',
-      `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat '${promptFile}')"`,
+      `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat)"`,
     ], {
       cwd,
-      stdio: ['ignore', 'pipe', 'pipe'],
+      stdio: ['pipe', 'pipe', 'pipe'],
       env,
     });
+    // Pass prompt via stdin to avoid shell escaping issues and file race conditions
+    child.stdin.write(prompt);
+    child.stdin.end();
     const modelLabel = model ? chalk.magenta(model) : chalk.dim('default');
     console.log(chalk.dim(`  Log: tail -f ${logFile}`));
     console.log(chalk.dim(`  PID: ${child.pid}  |  Model: `) + modelLabel + chalk.dim(`  |  Prompt: ${prompt.length} chars  |  Timeout: ${formatDuration(Math.round(timeoutMs / 1000))}`));
@@ -532,13 +680,14 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
       clearTimeout(timeoutId);
       clearInterval(timer);
       logStream.end();
-      try { unlinkSync(promptFile); } catch {}
       // Clear spinner line
       process.stdout.write('\r\x1b[K');
       const elapsed = Math.round((Date.now() - startTime) / 1000);
-      const timedOut = signal === 'SIGTERM' && elapsed >= Math.floor(timeoutMs / 1000) - 1;
+      // Timeout detection: SIGTERM signal, OR exit code 143 (128+SIGTERM) near the timeout boundary
+      const nearTimeout = elapsed >= Math.floor(timeoutMs / 1000) - 2;
+      const timedOut = nearTimeout && (signal === 'SIGTERM' || code === 143);
       if (signal && !timedOut) {
         console.log(chalk.dim(`  Claude killed (${signal}) after ${formatDuration(elapsed)}`));
@@ -554,7 +703,6 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
       clearTimeout(timeoutId);
       clearInterval(timer);
       logStream.end();
-      try { unlinkSync(promptFile); } catch {}
       process.stdout.write('\r\x1b[K');
       console.log(chalk.red(`  Spawn error: ${err.message}`));
       resolve({ exitCode: 1, output: '', timedOut: false, signal: null });
@@ -599,7 +747,10 @@ async function runGatesWithStatus(cwd) {
 function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
   const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
   const story = prd.stories.find(s => s.id === storyId);
-  if (!story) return '';
+  if (!story) {
+    console.log(chalk.yellow(`  Warning: story ${storyId} not found in prd.json — may have been modified during execution`));
+    return '';
+  }
   const doneIds = prd.stories.filter(s => s.passes).map(s => s.id);
   const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
@@ -632,7 +783,7 @@ function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
 /**
  * Dry run: show formatted preview of eligible tasks.
  */
-function dryRunPreview(cwd, phaseFilter, maxIterations) {
+function dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter = null) {
   const stories = parsePrdTasks(cwd);
   const total = stories.length;
   const done = stories.filter(s => s.passes).length;
@@ -642,12 +793,15 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
   console.log(`  Progress: ${chalk.green(done)}/${total} done, ${chalk.yellow(remaining)} remaining`);
   console.log(`  Max iterations: ${maxIterations}`);
   if (phaseFilter) console.log(`  Phase filter: ${chalk.blue(phaseFilter)}`);
+  if (taskFilter) console.log(`  Task filter:  ${chalk.blue(taskFilter)}`);
   const doneIds = stories.filter(s => s.passes).map(s => s.id);
+  const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
   const eligible = stories.filter(s =>
     !s.passes &&
     s.dependencies.every(dep => doneIds.includes(dep)) &&
-    (!phaseFilter || s.phase === phaseFilter)
+    (!phaseFilter || s.phase === phaseFilter) &&
+    (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
   );
   if (eligible.length === 0) {
@@ -683,7 +837,7 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
   console.log(chalk.dim('\n  Run without --dry-run to execute.\n'));
 }
-function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd()) {
+function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd(), taskFilter = null) {
   const config = loadConfig(cwd);
   const skipGates = config.skip_gates || [];
@@ -694,13 +848,14 @@ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.
   console.log(`  Progress:       ${chalk.green(done)}/${total} done`);
   console.log(`  Planning:       ${noPlan ? chalk.yellow('disabled') : chalk.green('enabled')}`);
   if (phase) console.log(`  Phase filter:   ${chalk.blue(phase)}`);
+  if (taskFilter) console.log(`  Task filter:    ${chalk.blue(taskFilter)}`);
   if (skipGates.length > 0) {
     console.log(`  ${chalk.yellow('⚠️  Skipping gates (no tooling):')} ${skipGates.join(', ')}`);
   }
   console.log(chalk.dim('─'.repeat(40)));
 }
-function printSummary(cwd, iterations, completed, startTime) {
+function printSummary(cwd, iterations, completed, startTime, branchesCreated = [], batchBranch = null, originalBranch = null, mergedCount = 0, unmergedBranches = []) {
   const stories = parsePrdTasks(cwd);
   const total = stories.length;
   const done = stories.filter(s => s.passes).length;
@@ -737,6 +892,21 @@ function printSummary(cwd, iterations, completed, startTime) {
   if (cleaned > 0 || failedKept > 0) {
     console.log(`  Cleanup:          ${chalk.green(`${cleaned} completed`)} removed, ${failedKept > 0 ? chalk.red(`${failedKept} failed`) : '0 failed'} kept for review`);
   }
+  if (batchBranch) {
+    console.log(`  Batch branch:     ${chalk.cyan(batchBranch)}`);
+    console.log(`  Merged:           ${chalk.green(mergedCount)} tasks merged into batch branch`);
+    if (unmergedBranches.length > 0) {
+      const unique = [...new Set(unmergedBranches)];
+      console.log(`  Unmerged:         ${chalk.yellow(unique.length)} isolated: ${unique.map(b => chalk.yellow(b)).join(', ')}`);
+    }
+  }
+  if (branchesCreated.length > 0) {
+    const unique = [...new Set(branchesCreated)];
+    console.log(`  Branches:         ${unique.map(b => chalk.cyan(b)).join(', ')}`);
+  }
+  if (originalBranch) {
+    console.log(`  Original branch:  ${chalk.green(originalBranch)} (intact)`);
+  }
   console.log(chalk.dim('─'.repeat(40)));
   console.log('');
 }

package/src/services/batch-service.js CHANGED Viewed

@@ -13,6 +13,73 @@ import { getState, updateState, hasWorkspace } from '../core/state.js';
 const execAsync = promisify(exec);
+// --- Git Helpers ---
+/**
+ * Get the current git branch name
+ * @param {string} cwd - Working directory
+ * @returns {Promise<string>} Current branch name
+ */
+export async function getCurrentBranch(cwd = process.cwd()) {
+  const { stdout } = await execAsync('git branch --show-current', { cwd });
+  return stdout.trim();
+}
+/**
+ * Create and checkout a task branch from the current HEAD
+ * If branch already exists, just checkout.
+ * @param {string} taskId - Task ID (e.g. "SEC-007")
+ * @param {string} cwd - Working directory
+ * @returns {Promise<string>} Branch name
+ */
+export async function createTaskBranch(taskId, cwd = process.cwd()) {
+  const branch = `feat/${taskId}`;
+  try {
+    await execAsync(`git rev-parse --verify ${branch}`, { cwd });
+    // Branch exists (rerun) — checkout and reset to current HEAD
+    // so we don't carry stale/dirty state from previous attempt
+    const { stdout } = await execAsync('git rev-parse HEAD', { cwd });
+    const baseRef = stdout.trim();
+    await execAsync(`git checkout ${branch}`, { cwd });
+    await execAsync(`git reset --hard ${baseRef}`, { cwd });
+  } catch {
+    // Branch doesn't exist — create
+    await execAsync(`git checkout -b ${branch}`, { cwd });
+  }
+  return branch;
+}
+/**
+ * Checkout an existing branch
+ * @param {string} branch - Branch name
+ * @param {string} cwd - Working directory
+ */
+export async function checkoutBranch(branch, cwd = process.cwd(), { force = false } = {}) {
+  const flag = force ? ' --force' : '';
+  await execAsync(`git checkout${flag} ${branch}`, { cwd });
+}
+/**
+ * Create a batch branch from the current HEAD
+ * @param {string} cwd - Working directory
+ * @returns {Promise<string>} Branch name (batch/{timestamp})
+ */
+export async function createBatchBranch(cwd = process.cwd()) {
+  const ts = new Date().toISOString().replace(/[:.]/g, '').slice(0, 15);
+  const branch = `batch/${ts}`;
+  await execAsync(`git checkout -b ${branch}`, { cwd });
+  return branch;
+}
+/**
+ * Merge a source branch into the current branch
+ * @param {string} source - Source branch name
+ * @param {string} cwd - Working directory
+ */
+export async function mergeBranch(source, cwd = process.cwd()) {
+  await execAsync(`git merge ${source} --no-edit`, { cwd });
+}
 /**
  * Default batch configuration
  */
@@ -392,9 +459,14 @@ export function convertBacklogToPrd(backlogContent, projectName = 'my-project')
  * @returns {object|null} Selected task or null
  */
 export function selectNextTask(cwd = process.cwd(), options = {}) {
+  const filterRe = options.filter ? new RegExp(options.filter, 'i') : null;
   // First check active tasks for incomplete ones
   const activeTasks = parseActiveTasks(cwd);
-  const pendingActive = activeTasks.filter(t => !t.completed && !t.blockedBy);
+  const pendingActive = activeTasks.filter(t =>
+    !t.completed && !t.blockedBy &&
+    (!filterRe || filterRe.test(t.id) || filterRe.test(t.title))
+  );
   if (pendingActive.length > 0) {
     return pendingActive[0];
@@ -407,7 +479,8 @@ export function selectNextTask(cwd = process.cwd(), options = {}) {
   const eligible = stories.find(s =>
     !s.passes &&
     s.dependencies.every(dep => doneIds.includes(dep)) &&
-    (!options.phase || s.phase === options.phase)
+    (!options.phase || s.phase === options.phase) &&
+    (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
   );
   return eligible || null;

package/templates/workspace/.ai/README.md CHANGED Viewed

@@ -111,6 +111,26 @@ Defines:
 - Commit patterns
 - Completion signals
+### Writing stories for `prd.json`
+Stories run with `--dangerously-skip-permissions` — the agent has full access. Write defensively.
+**Never put these in batch stories:**
+- Destructive git ops (`git filter-repo`, `BFG`, `push --force`, history rewriting)
+- Destructive file ops (`rm -rf`, wiping directories)
+- Database ops (`DROP TABLE`, prod migrations)
+- Secret rotation (revoking keys, rotating credentials)
+- External side effects (sending emails, creating PRs, publishing packages)
+**Rule of thumb**: if a mistake needs human intervention to fix, it's not a batch story.
+**Instead**, ask the agent to **prepare and document** — write the script, the docs, the config — but let a human execute the dangerous part.
+**Always include in `notes`** what the agent must NOT do:
+```json
+"notes": "Do NOT run git-filter-repo. Do NOT modify prd.json."
+```
 ## 🤖 Agents
 ### agent/claude.md

package/templates/workspace/.ai/batch/prompt.md ADDED Viewed

@@ -0,0 +1,36 @@
+You are working on this project autonomously as part of a batch execution run.
+## Project Context
+- Read CLAUDE.md (or .ai/agents/claude.md) for full project instructions and conventions
+- Read .ai/memory/decisions.md for architectural decisions
+- Read .ai/memory/patterns.md for established patterns
+- Read .ai/batch/progress.txt for learnings from previous iterations
+## Current Task
+**{STORY_ID}**: {STORY_TITLE}
+### Acceptance Criteria
+{ACCEPTANCE_CRITERIA}
+### Notes
+{NOTES}
+### Dependencies (already implemented)
+{DEPENDENCIES}
+## Instructions
+1. Explore the codebase to understand existing patterns for similar features
+2. Implement the feature following existing conventions
+3. Write comprehensive tests (see tests/ for patterns)
+4. Run quality gates and fix any failures before committing
+5. Commit with: feat({STORY_ID}): {short description}
+6. Update .ai/tasks/active.md marking this task as done with today's date
+## Important
+- Do NOT push to remote
+- Do NOT modify unrelated files
+- Do NOT edit .ai/tasks/prd.json or batch configuration files
+- Do NOT run destructive operations (git filter-repo, rm -rf, DROP TABLE, force push)
+- Do NOT rotate secrets, revoke keys, or modify credentials — those are human tasks
+- Follow existing patterns exactly
+- If you discover useful patterns, note them for progress.txt