@fermindi/pwn-cli 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/backlog.js CHANGED
@@ -19,9 +19,17 @@ export default async function backlogCommand(args = []) {
19
19
  }
20
20
 
21
21
  const cwd = process.cwd();
22
- const stories = parsePrdTasks(cwd);
22
+ let stories = parsePrdTasks(cwd);
23
23
  const taskFiles = listTaskFiles(cwd);
24
24
 
25
+ // Parse --filter
26
+ const filterIdx = args.findIndex(a => a === '--filter');
27
+ const filter = filterIdx !== -1 ? args[filterIdx + 1] : null;
28
+ if (filter) {
29
+ const re = new RegExp(filter, 'i');
30
+ stories = stories.filter(s => re.test(s.id) || re.test(s.title));
31
+ }
32
+
25
33
  // Read project name from prd.json
26
34
  let project = 'project';
27
35
  const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
@@ -33,11 +41,12 @@ export default async function backlogCommand(args = []) {
33
41
  }
34
42
 
35
43
  const noInteractive = args.includes('--no-interactive') || !process.stdout.isTTY;
44
+ const label = filter ? `${project} (filter: ${filter})` : project;
36
45
 
37
46
  if (noInteractive) {
38
- printPlain({ project, stories, taskFiles });
47
+ printPlain({ project: label, stories, taskFiles });
39
48
  } else {
40
- await startViewer({ project, stories, taskFiles });
49
+ await startViewer({ project: label, stories, taskFiles });
41
50
  }
42
51
  }
43
52
 
@@ -45,8 +54,12 @@ function showHelp() {
45
54
  console.log('📋 PWN Backlog Viewer\n');
46
55
  console.log('Usage: pwn backlog [options]\n');
47
56
  console.log('Options:');
57
+ console.log(' --filter <pattern> Filter stories by ID or title (regex, case-insensitive)');
48
58
  console.log(' --no-interactive Plain text output (for CI/piping)');
49
59
  console.log(' --help, -h Show this help\n');
60
+ console.log('Examples:');
61
+ console.log(' pwn backlog --filter SEC # Only SEC-* stories');
62
+ console.log(' pwn backlog --filter "API|AUTH" # Stories matching API or AUTH\n');
50
63
  console.log('Keybindings (list view):');
51
64
  console.log(' ↑/k Move up');
52
65
  console.log(' ↓/j Move down');
package/cli/batch.js CHANGED
@@ -103,6 +103,8 @@ function parseRunOptions(args) {
103
103
  options.dryRun = true;
104
104
  } else if (arg === '--phase') {
105
105
  options.phase = args[++i];
106
+ } else if (arg === '--filter') {
107
+ options.filter = args[++i];
106
108
  } else if (arg === '--no-plan') {
107
109
  options.noPlan = true;
108
110
  } else if (arg === '--rate-limit-wait') {
@@ -410,6 +412,7 @@ function showHelp() {
410
412
  console.log(' --continue Continue on errors');
411
413
  console.log(' --no-commit Skip auto-commit');
412
414
  console.log(' --no-branch Skip branch creation');
415
+ console.log(' --filter <pattern> Filter tasks by ID or title (regex, case-insensitive)');
413
416
  console.log(' --no-plan Skip planning phase (use fixed 10min timeout)');
414
417
  console.log(' --rate-limit-wait <s> Seconds to wait on rate limit (default: 1800)');
415
418
  console.log(' --help, -h Show this help\n');
@@ -418,6 +421,8 @@ function showHelp() {
418
421
  console.log(' pwn batch run # Run autonomous batch loop');
419
422
  console.log(' pwn batch run --dry-run # Preview next story');
420
423
  console.log(' pwn batch run --phase 3 # Run specific phase');
424
+ console.log(' pwn batch run --filter ORCH # Run only ORCH-* tasks');
425
+ console.log(' pwn batch run -f "UNI|DATA" # Run UNI or DATA tasks');
421
426
  console.log(' pwn batch run --no-plan # Skip planning, fixed timeout');
422
427
  console.log(' pwn batch tasks # List all task files');
423
428
  console.log(' pwn batch tasks --failed # Show only failed tasks');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fermindi/pwn-cli",
3
- "version": "0.7.0",
3
+ "version": "0.9.0",
4
4
  "description": "Professional AI Workspace - Inject structured memory and automation into any project for AI-powered development",
5
5
  "type": "module",
6
6
  "bin": {
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * Two-phase execution model:
5
5
  * Phase 1: Planning — Claude estimates time and creates action plan
6
- * Phase 2: Execution — Dynamic timeout based on estimate + 5%
6
+ * Phase 2: Execution — Dynamic timeout based on estimate + 20%
7
7
  *
8
8
  * Task files (.ai/batch/tasks/{US-ID}.json) track status per story.
9
9
  * Completed files are cleaned up at the end; failed are kept for review.
@@ -12,7 +12,6 @@
12
12
  import { spawn } from 'child_process';
13
13
  import { existsSync, readFileSync, writeFileSync, mkdirSync, createWriteStream, appendFileSync, unlinkSync, readdirSync } from 'fs';
14
14
  import { join } from 'path';
15
- import { tmpdir } from 'os';
16
15
  import ora from 'ora';
17
16
  import chalk from 'chalk';
18
17
  import {
@@ -23,13 +22,26 @@ import {
23
22
  loadConfig,
24
23
  commitTask,
25
24
  updateBatchState,
26
- clearBatchState
25
+ clearBatchState,
26
+ getCurrentBranch,
27
+ createTaskBranch,
28
+ checkoutBranch,
29
+ createBatchBranch,
30
+ mergeBranch
27
31
  } from './batch-service.js';
28
32
 
29
33
  // --- Constants ---
30
- const RUNNER_VERSION = '2.0';
31
- const DEFAULT_TIMEOUT_MS = 600_000; // 10 minutes fallback
32
- const MIN_TIMEOUT_MS = 120_000; // 2 minutes minimum (claude init takes ~30s)
34
+ const RUNNER_VERSION = '2.1';
35
+ const DEFAULT_TIMEOUT_MS = 900_000; // 15 minutes fallback
36
+ const MIN_TIMEOUT_MS = 300_000; // 5 minutes minimum (claude init ~30-40s + real work)
37
+
38
+ // Complexity → timeout mapping (based on real-world execution data)
39
+ // AI is bad at estimating seconds but decent at classifying complexity
40
+ const COMPLEXITY_TIMEOUT = {
41
+ low: { seconds: 300, label: '5m' }, // config change, small fix
42
+ medium: { seconds: 600, label: '10m' }, // new function, 1-3 files
43
+ high: { seconds: 900, label: '15m' }, // new module, multi-file refactor
44
+ };
33
45
  const PLAN_TIMEOUT_MS = 120_000; // 2 minutes for planning phase (claude init ~30s)
34
46
  const DEFAULT_RATE_LIMIT_WAIT = 1800; // 30 minutes (seconds)
35
47
  const MAX_RETRIES = 2;
@@ -99,7 +111,7 @@ async function waitForRateLimit(waitSeconds, attempt) {
99
111
 
100
112
  // --- Planning Phase ---
101
113
 
102
- function buildPlanPrompt(task, cwd) {
114
+ function buildPlanPrompt(task, cwd, replanContext = null) {
103
115
  const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
104
116
  const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
105
117
  const story = prd.stories.find(s => s.id === task.id);
@@ -107,42 +119,49 @@ function buildPlanPrompt(task, cwd) {
107
119
 
108
120
  const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
109
121
 
122
+ let replanSection = '';
123
+ if (replanContext) {
124
+ replanSection = `
125
+
126
+ IMPORTANT — REPLANNING: A previous attempt FAILED.
127
+ - Previous complexity: ${replanContext.previousComplexity}
128
+ - Failure reason: ${replanContext.failureReason}
129
+ Re-evaluate the complexity. If it timed out, it's likely MORE complex than you initially thought.`;
130
+ }
131
+
110
132
  return `You are analyzing task ${task.id}: ${task.title}
111
133
 
112
134
  Acceptance criteria:
113
135
  ${acList}
114
136
 
115
137
  Notes: ${story.notes || 'None'}
116
-
138
+ ${replanSection}
117
139
  Analyze this task and respond with ONLY a JSON object (no markdown, no code fences):
118
140
  {
119
- "estimated_time_seconds": <number>,
120
141
  "plan": ["step 1", "step 2", ...],
121
142
  "complexity": "low|medium|high",
122
143
  "recommended_model": "opus|sonnet|haiku",
123
144
  "files_likely_affected": ["path1", "path2"]
124
145
  }
125
146
 
126
- Base your estimate on:
127
- - low complexity (config change, small fix): 30-120s
128
- - medium complexity (new function, simple feature): 120-300s
129
- - high complexity (new module, multi-file feature): 300-600s
147
+ Classify complexity:
148
+ - "low": config change, small fix, single file tweak
149
+ - "medium": new function, simple feature, 1-3 files
150
+ - "high": new module, multi-file refactor, architecture change, 4+ files
130
151
 
131
152
  Recommend a model for execution:
132
- - "haiku": trivial (config change, typo, single-line fix) — ~30s
133
- - "sonnet": low-medium complexity (new function, simple feature, 1-3 files) — 30-300s
134
- - "opus": high complexity (new module, multi-file refactor, architecture) — 300-600s`;
153
+ - "haiku": trivial (config change, typo, single-line fix)
154
+ - "sonnet": low-medium complexity (new function, simple feature, 1-3 files)
155
+ - "opus": high complexity (new module, multi-file refactor, architecture)`;
135
156
  }
136
157
 
137
- async function planTask(task, cwd) {
138
- const prompt = buildPlanPrompt(task, cwd);
158
+ async function planTask(task, cwd, replanContext = null) {
159
+ const prompt = buildPlanPrompt(task, cwd, replanContext);
139
160
  if (!prompt) return null;
140
161
 
141
- const promptFile = join(tmpdir(), `pwn-plan-${Date.now()}.md`);
142
- writeFileSync(promptFile, prompt);
143
-
144
162
  const env = { ...process.env };
145
- delete env.CLAUDECODE;
163
+ // Remove all Claude session markers to avoid "nested session" detection
164
+ Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
146
165
 
147
166
  return new Promise((resolve) => {
148
167
  let output = '';
@@ -150,13 +169,17 @@ async function planTask(task, cwd) {
150
169
  // Planning uses --print WITHOUT --dangerously-skip-permissions (read-only)
151
170
  const child = spawn('bash', [
152
171
  '-c',
153
- `claude --model opus --print -p "$(cat '${promptFile}')"`,
172
+ `claude --model opus --print -p "$(cat)"`,
154
173
  ], {
155
174
  cwd,
156
- stdio: ['ignore', 'pipe', 'pipe'],
175
+ stdio: ['pipe', 'pipe', 'pipe'],
157
176
  env,
158
177
  });
159
178
 
179
+ // Pass prompt via stdin
180
+ child.stdin.write(prompt);
181
+ child.stdin.end();
182
+
160
183
  const timeoutId = setTimeout(() => {
161
184
  child.kill('SIGTERM');
162
185
  }, PLAN_TIMEOUT_MS);
@@ -166,7 +189,6 @@ async function planTask(task, cwd) {
166
189
 
167
190
  child.on('close', (code, signal) => {
168
191
  clearTimeout(timeoutId);
169
- try { unlinkSync(promptFile); } catch {}
170
192
 
171
193
  if (signal) {
172
194
  console.log(chalk.dim(` Planning killed by ${signal} (timeout=${signal === 'SIGTERM' ? 'likely' : 'no'})`));
@@ -195,7 +217,6 @@ async function planTask(task, cwd) {
195
217
 
196
218
  child.on('error', (err) => {
197
219
  clearTimeout(timeoutId);
198
- try { unlinkSync(promptFile); } catch {}
199
220
  console.log(chalk.dim(` Planning spawn error: ${err.message}`));
200
221
  resolve(null);
201
222
  });
@@ -204,8 +225,7 @@ async function planTask(task, cwd) {
204
225
 
205
226
  function computeTimeout(estimatedSeconds) {
206
227
  if (!estimatedSeconds || estimatedSeconds <= 0) return DEFAULT_TIMEOUT_MS;
207
- const withMargin = Math.ceil(estimatedSeconds * 1.05) * 1000;
208
- return Math.max(withMargin, MIN_TIMEOUT_MS);
228
+ return Math.max(Math.ceil(estimatedSeconds * 1.05) * 1000, MIN_TIMEOUT_MS);
209
229
  }
210
230
 
211
231
  /**
@@ -235,23 +255,45 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
235
255
  const stories = parsePrdTasks(cwd);
236
256
  const totalStories = stories.length;
237
257
  const doneAtStart = stories.filter(s => s.passes).length;
238
- const maxIterations = options.maxIterations || 20;
239
258
  const phaseFilter = options.phase ? `Phase ${options.phase}` : undefined;
259
+ const taskFilter = options.filter || null;
260
+
261
+ // Count remaining eligible stories (respecting filters)
262
+ const doneIds = stories.filter(s => s.passes).map(s => s.id);
263
+ const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
264
+ const eligibleCount = stories.filter(s =>
265
+ !s.passes &&
266
+ s.dependencies.every(dep => doneIds.includes(dep)) &&
267
+ (!phaseFilter || s.phase === phaseFilter) &&
268
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
269
+ ).length;
270
+ const maxIterations = options.maxIterations || eligibleCount;
240
271
 
241
272
  // --- Dry run ---
242
273
  if (options.dryRun) {
243
- return dryRunPreview(cwd, phaseFilter, maxIterations);
274
+ return dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter);
244
275
  }
245
276
 
246
277
  // --- Print header ---
247
- printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd);
278
+ printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd, taskFilter);
248
279
 
249
280
  // NO custom SIGINT handler — Ctrl+C uses default Node.js behavior (kills process group)
250
281
 
282
+ // --- Save original branch for isolation ---
283
+ const originalBranch = await getCurrentBranch(cwd);
284
+
285
+ // --- Create batch branch ---
286
+ const batchBranch = await createBatchBranch(cwd);
287
+ console.log(chalk.blue(` Batch branch: ${chalk.bold(batchBranch)}`));
288
+
251
289
  // --- Main loop ---
252
290
  let iteration = 0;
253
291
  let noProgressCount = 0;
254
292
  let storiesCompleted = 0;
293
+ let mergedCount = 0;
294
+ const branchesCreated = [];
295
+ const mergedBranches = [];
296
+ const unmergedBranches = [];
255
297
  const batchStart = Date.now();
256
298
 
257
299
  updateBatchState({
@@ -264,7 +306,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
264
306
  while (iteration < maxIterations) {
265
307
  iteration++;
266
308
 
267
- const task = selectNextTask(cwd, { phase: phaseFilter });
309
+ const task = selectNextTask(cwd, { phase: phaseFilter, filter: taskFilter });
268
310
  if (!task) {
269
311
  console.log(chalk.green('\nAll eligible stories completed!'));
270
312
  break;
@@ -282,8 +324,9 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
282
324
  if (!noPlan) {
283
325
  const existing = loadTaskFile(task.id, cwd);
284
326
 
285
- if (existing && existing.status === 'planned' && existing.complexity !== 'unknown') {
286
- // Reuse previous plan
327
+ const needsReplan = existing && existing.last_failure_type && existing.status !== 'completed';
328
+ if (existing && existing.status === 'planned' && existing.complexity !== 'unknown' && !needsReplan) {
329
+ // Reuse previous plan (only if it hasn't failed before)
287
330
  taskFile = existing;
288
331
  taskTimeoutMs = computeTimeout(existing.estimated_time_seconds);
289
332
  console.log(chalk.dim(` Phase 1: Reusing plan for ${task.id} (${existing.complexity}, ~${formatDuration(existing.estimated_time_seconds)}, model: ${existing.recommended_model || 'sonnet'})`));
@@ -294,9 +337,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
294
337
  const planResult = await planTask(task, cwd);
295
338
 
296
339
  if (planResult) {
297
- const estimatedSeconds = planResult.estimated_time_seconds || 600;
298
- const timeoutSeconds = Math.ceil(Math.max(estimatedSeconds * 1.05, MIN_TIMEOUT_MS / 1000));
299
- taskTimeoutMs = computeTimeout(estimatedSeconds);
340
+ const complexity = planResult.complexity || 'medium';
341
+ const tier = COMPLEXITY_TIMEOUT[complexity] || COMPLEXITY_TIMEOUT.medium;
342
+ const estimatedSeconds = tier.seconds;
343
+ const timeoutSeconds = Math.ceil(estimatedSeconds * 1.05);
344
+ taskTimeoutMs = timeoutSeconds * 1000;
300
345
 
301
346
  const recommendedModel = planResult.recommended_model || 'sonnet';
302
347
  taskFile = {
@@ -306,7 +351,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
306
351
  estimated_time_seconds: estimatedSeconds,
307
352
  timeout_seconds: timeoutSeconds,
308
353
  plan: planResult.plan || [],
309
- complexity: planResult.complexity || 'medium',
354
+ complexity,
310
355
  recommended_model: recommendedModel,
311
356
  files_likely_affected: planResult.files_likely_affected || [],
312
357
  created_at: new Date().toISOString(),
@@ -316,7 +361,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
316
361
  saveTaskFile(taskFile, cwd);
317
362
 
318
363
  planSpinner.succeed(chalk.green(
319
- `Planned: ${planResult.complexity} complexity, ~${formatDuration(estimatedSeconds)} estimated, timeout ${formatDuration(timeoutSeconds)}, model: ${recommendedModel}`
364
+ `Planned: ${complexity} complexity, timeout ${tier.label}, model: ${recommendedModel}`
320
365
  ));
321
366
  } else {
322
367
  // Fallback when planning fails
@@ -343,6 +388,23 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
343
388
  }
344
389
  }
345
390
 
391
+ // --- Branch isolation: create/checkout task branch ---
392
+ let taskBranch = null;
393
+ try {
394
+ taskBranch = await createTaskBranch(task.id, cwd);
395
+ branchesCreated.push(taskBranch);
396
+ console.log(chalk.blue(` Branch: ${chalk.bold(taskBranch)}`));
397
+
398
+ // Track branch in task file
399
+ if (taskFile) {
400
+ taskFile.branch = taskBranch;
401
+ saveTaskFile(taskFile, cwd);
402
+ }
403
+ } catch (err) {
404
+ console.log(chalk.red(` Failed to create branch feat/${task.id}: ${err.message}`));
405
+ console.log(chalk.yellow(' Continuing on current branch...'));
406
+ }
407
+
346
408
  // --- Phase 2: Execution ---
347
409
  const phaseLabel = noPlan ? '' : 'Phase 2';
348
410
  console.log(chalk.blue(` ${noPlan ? 'Executing' : 'Phase 2: Executing'} ${task.id}...`));
@@ -358,6 +420,10 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
358
420
  }
359
421
 
360
422
  const prompt = buildPrompt(task.id, cwd, prdPath, promptPath, errorContext);
423
+ if (!prompt) {
424
+ console.log(chalk.red(` Cannot build prompt for ${task.id} — skipping`));
425
+ break;
426
+ }
361
427
  const logFile = join(logDir, `${task.id}_${timestamp()}.log`);
362
428
 
363
429
  const estimatedSeconds = taskFile?.estimated_time_seconds || null;
@@ -367,8 +433,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
367
433
  // Killed by signal (user did kill or Ctrl+C) — don't retry, exit
368
434
  if (result.signal) {
369
435
  console.log(chalk.yellow(` Killed by ${result.signal}`));
436
+ if (taskBranch) {
437
+ try { await checkoutBranch(batchBranch, cwd, { force: true }); } catch {}
438
+ }
370
439
  clearBatchState(cwd);
371
- printSummary(cwd, iteration, storiesCompleted, batchStart);
440
+ printSummary(cwd, iteration, storiesCompleted, batchStart, branchesCreated, batchBranch, originalBranch, mergedCount, unmergedBranches);
372
441
  return;
373
442
  }
374
443
 
@@ -380,8 +449,41 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
380
449
  await waitForRateLimit(rateLimitWait, rateLimitAttempts);
381
450
  continue;
382
451
  }
383
- console.log(chalk.yellow(` Timed out after ${Math.round(taskTimeoutMs / 1000)}s`));
384
- errorContext = 'Session timed out. Simplify the implementation or focus on core acceptance criteria.';
452
+
453
+ const prevTimeout = Math.round(taskTimeoutMs / 1000);
454
+ console.log(chalk.yellow(` Timed out after ${prevTimeout}s`));
455
+
456
+ // --- Replan on timeout: bump complexity tier ---
457
+ if (taskFile) {
458
+ const prevComplexity = taskFile.complexity;
459
+ const prevEstimate = taskFile.estimated_time_seconds;
460
+
461
+ // Escalate complexity: low → medium → high, high stays high but doubles
462
+ const escalation = { low: 'medium', medium: 'high' };
463
+ const newComplexity = escalation[prevComplexity] || 'high';
464
+ const tier = COMPLEXITY_TIMEOUT[newComplexity] || COMPLEXITY_TIMEOUT.high;
465
+
466
+ // If already high, double the previous timeout
467
+ const newEstimate = prevComplexity === 'high'
468
+ ? prevEstimate * 2
469
+ : tier.seconds;
470
+ const newTimeout = Math.ceil(newEstimate * 1.05);
471
+ taskTimeoutMs = newTimeout * 1000;
472
+
473
+ taskFile.estimated_time_seconds = newEstimate;
474
+ taskFile.timeout_seconds = newTimeout;
475
+ taskFile.complexity = newComplexity;
476
+ taskFile.recommended_model = 'opus'; // upgrade model on timeout
477
+ taskFile.replanned_at = new Date().toISOString();
478
+ taskFile.replan_reason = `timeout after ${prevTimeout}s (${prevComplexity} → ${newComplexity})`;
479
+ saveTaskFile(taskFile, cwd);
480
+
481
+ console.log(chalk.blue(
482
+ ` Escalated: ${prevComplexity} → ${newComplexity}, timeout ${formatDuration(newTimeout)}, model: opus`
483
+ ));
484
+ }
485
+
486
+ errorContext = `Session timed out after ${prevTimeout}s. The task was replanned with more time. Focus on core acceptance criteria first, then iterate.`;
385
487
  retry++;
386
488
  continue;
387
489
  }
@@ -397,6 +499,14 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
397
499
  }
398
500
  console.log(chalk.yellow(` Claude exited with code ${result.exitCode}`));
399
501
  errorContext = `Claude session failed with exit code ${result.exitCode}.`;
502
+
503
+ // Save error output snippet to task file for debugging
504
+ if (taskFile) {
505
+ taskFile.last_error_output = (result.output || '').slice(-2000);
506
+ taskFile.last_failure_type = 'crash';
507
+ saveTaskFile(taskFile, cwd);
508
+ }
509
+
400
510
  retry++;
401
511
  continue;
402
512
  }
@@ -430,16 +540,40 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
430
540
  current_task: null,
431
541
  last_completed_at: new Date().toISOString()
432
542
  }, cwd);
543
+
544
+ // Merge successful task branch into batch branch
545
+ if (taskBranch) {
546
+ try {
547
+ await checkoutBranch(batchBranch, cwd);
548
+ await mergeBranch(taskBranch, cwd);
549
+ mergedCount++;
550
+ mergedBranches.push(taskBranch);
551
+ console.log(chalk.green(` Merged ${taskBranch} → ${batchBranch}`));
552
+ } catch (err) {
553
+ console.log(chalk.red(` Merge failed: ${err.message}`));
554
+ console.log(chalk.yellow(` Branch ${taskBranch} available for manual merge`));
555
+ unmergedBranches.push(taskBranch);
556
+ }
557
+ }
433
558
  } else {
434
559
  console.log(chalk.red(` Quality gates FAILED`));
435
560
  errorContext = gatesResult.errorOutput;
561
+
562
+ // Save gate failure details to task file
563
+ if (taskFile) {
564
+ taskFile.last_error_output = (gatesResult.errorOutput || '').slice(-2000);
565
+ taskFile.last_failure_type = 'quality_gate';
566
+ saveTaskFile(taskFile, cwd);
567
+ }
568
+
436
569
  retry++;
437
570
  }
438
571
  }
439
572
 
440
573
  if (!storyDone) {
441
- console.log(chalk.red(` FAILED: ${task.id} after ${MAX_RETRIES} retries`));
442
- appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries. Skipping.`);
574
+ const failureType = taskFile?.last_failure_type || 'unknown';
575
+ console.log(chalk.red(` FAILED: ${task.id} after ${MAX_RETRIES} retries (${failureType})`));
576
+ appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries (${failureType}). Skipping.`);
443
577
 
444
578
  // Update task file with failure
445
579
  if (taskFile) {
@@ -452,6 +586,19 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
452
586
  noProgressCount++;
453
587
  }
454
588
 
589
+ // --- Return to batch branch (force: discard dirty state from failed tasks) ---
590
+ if (taskBranch) {
591
+ try {
592
+ await checkoutBranch(batchBranch, cwd, { force: true });
593
+ console.log(chalk.dim(` Returned to branch: ${batchBranch}`));
594
+ } catch (err) {
595
+ console.log(chalk.red(` Warning: failed to return to ${batchBranch}: ${err.message}`));
596
+ }
597
+ if (!storyDone) {
598
+ unmergedBranches.push(taskBranch);
599
+ }
600
+ }
601
+
455
602
  if (noProgressCount >= MAX_NO_PROGRESS) {
456
603
  console.log(chalk.red(`\nCIRCUIT BREAKER: ${MAX_NO_PROGRESS} consecutive failures. Stopping.`));
457
604
  break;
@@ -459,7 +606,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
459
606
  }
460
607
 
461
608
  clearBatchState(cwd);
462
- printSummary(cwd, iteration, storiesCompleted, batchStart);
609
+ printSummary(cwd, iteration, storiesCompleted, batchStart, branchesCreated, batchBranch, originalBranch, mergedCount, unmergedBranches);
463
610
  }
464
611
 
465
612
  /**
@@ -474,23 +621,24 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
474
621
  const logStream = createWriteStream(logFile);
475
622
  const startTime = Date.now();
476
623
 
477
- // Write prompt to temp file
478
- const promptFile = join(tmpdir(), `pwn-prompt-${Date.now()}.md`);
479
- writeFileSync(promptFile, prompt);
480
-
481
624
  const env = { ...process.env };
482
- delete env.CLAUDECODE;
625
+ // Remove all Claude session markers to avoid "nested session" detection
626
+ Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
483
627
 
484
628
  const modelFlag = model ? `--model ${model} ` : '';
485
629
  const child = spawn('bash', [
486
630
  '-c',
487
- `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat '${promptFile}')"`,
631
+ `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat)"`,
488
632
  ], {
489
633
  cwd,
490
- stdio: ['ignore', 'pipe', 'pipe'],
634
+ stdio: ['pipe', 'pipe', 'pipe'],
491
635
  env,
492
636
  });
493
637
 
638
+ // Pass prompt via stdin to avoid shell escaping issues and file race conditions
639
+ child.stdin.write(prompt);
640
+ child.stdin.end();
641
+
494
642
  const modelLabel = model ? chalk.magenta(model) : chalk.dim('default');
495
643
  console.log(chalk.dim(` Log: tail -f ${logFile}`));
496
644
  console.log(chalk.dim(` PID: ${child.pid} | Model: `) + modelLabel + chalk.dim(` | Prompt: ${prompt.length} chars | Timeout: ${formatDuration(Math.round(timeoutMs / 1000))}`));
@@ -532,13 +680,14 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
532
680
  clearTimeout(timeoutId);
533
681
  clearInterval(timer);
534
682
  logStream.end();
535
- try { unlinkSync(promptFile); } catch {}
536
683
 
537
684
  // Clear spinner line
538
685
  process.stdout.write('\r\x1b[K');
539
686
 
540
687
  const elapsed = Math.round((Date.now() - startTime) / 1000);
541
- const timedOut = signal === 'SIGTERM' && elapsed >= Math.floor(timeoutMs / 1000) - 1;
688
+ // Timeout detection: SIGTERM signal, OR exit code 143 (128+SIGTERM) near the timeout boundary
689
+ const nearTimeout = elapsed >= Math.floor(timeoutMs / 1000) - 2;
690
+ const timedOut = nearTimeout && (signal === 'SIGTERM' || code === 143);
542
691
 
543
692
  if (signal && !timedOut) {
544
693
  console.log(chalk.dim(` Claude killed (${signal}) after ${formatDuration(elapsed)}`));
@@ -554,7 +703,6 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
554
703
  clearTimeout(timeoutId);
555
704
  clearInterval(timer);
556
705
  logStream.end();
557
- try { unlinkSync(promptFile); } catch {}
558
706
  process.stdout.write('\r\x1b[K');
559
707
  console.log(chalk.red(` Spawn error: ${err.message}`));
560
708
  resolve({ exitCode: 1, output: '', timedOut: false, signal: null });
@@ -599,7 +747,10 @@ async function runGatesWithStatus(cwd) {
599
747
  function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
600
748
  const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
601
749
  const story = prd.stories.find(s => s.id === storyId);
602
- if (!story) return '';
750
+ if (!story) {
751
+ console.log(chalk.yellow(` Warning: story ${storyId} not found in prd.json — may have been modified during execution`));
752
+ return '';
753
+ }
603
754
 
604
755
  const doneIds = prd.stories.filter(s => s.passes).map(s => s.id);
605
756
  const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
@@ -632,7 +783,7 @@ function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
632
783
  /**
633
784
  * Dry run: show formatted preview of eligible tasks.
634
785
  */
635
- function dryRunPreview(cwd, phaseFilter, maxIterations) {
786
+ function dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter = null) {
636
787
  const stories = parsePrdTasks(cwd);
637
788
  const total = stories.length;
638
789
  const done = stories.filter(s => s.passes).length;
@@ -642,12 +793,15 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
642
793
  console.log(` Progress: ${chalk.green(done)}/${total} done, ${chalk.yellow(remaining)} remaining`);
643
794
  console.log(` Max iterations: ${maxIterations}`);
644
795
  if (phaseFilter) console.log(` Phase filter: ${chalk.blue(phaseFilter)}`);
796
+ if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
645
797
 
646
798
  const doneIds = stories.filter(s => s.passes).map(s => s.id);
799
+ const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
647
800
  const eligible = stories.filter(s =>
648
801
  !s.passes &&
649
802
  s.dependencies.every(dep => doneIds.includes(dep)) &&
650
- (!phaseFilter || s.phase === phaseFilter)
803
+ (!phaseFilter || s.phase === phaseFilter) &&
804
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
651
805
  );
652
806
 
653
807
  if (eligible.length === 0) {
@@ -683,7 +837,7 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
683
837
  console.log(chalk.dim('\n Run without --dry-run to execute.\n'));
684
838
  }
685
839
 
686
- function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd()) {
840
+ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd(), taskFilter = null) {
687
841
  const config = loadConfig(cwd);
688
842
  const skipGates = config.skip_gates || [];
689
843
 
@@ -694,13 +848,14 @@ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.
694
848
  console.log(` Progress: ${chalk.green(done)}/${total} done`);
695
849
  console.log(` Planning: ${noPlan ? chalk.yellow('disabled') : chalk.green('enabled')}`);
696
850
  if (phase) console.log(` Phase filter: ${chalk.blue(phase)}`);
851
+ if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
697
852
  if (skipGates.length > 0) {
698
853
  console.log(` ${chalk.yellow('⚠️ Skipping gates (no tooling):')} ${skipGates.join(', ')}`);
699
854
  }
700
855
  console.log(chalk.dim('─'.repeat(40)));
701
856
  }
702
857
 
703
- function printSummary(cwd, iterations, completed, startTime) {
858
+ function printSummary(cwd, iterations, completed, startTime, branchesCreated = [], batchBranch = null, originalBranch = null, mergedCount = 0, unmergedBranches = []) {
704
859
  const stories = parsePrdTasks(cwd);
705
860
  const total = stories.length;
706
861
  const done = stories.filter(s => s.passes).length;
@@ -737,6 +892,21 @@ function printSummary(cwd, iterations, completed, startTime) {
737
892
  if (cleaned > 0 || failedKept > 0) {
738
893
  console.log(` Cleanup: ${chalk.green(`${cleaned} completed`)} removed, ${failedKept > 0 ? chalk.red(`${failedKept} failed`) : '0 failed'} kept for review`);
739
894
  }
895
+ if (batchBranch) {
896
+ console.log(` Batch branch: ${chalk.cyan(batchBranch)}`);
897
+ console.log(` Merged: ${chalk.green(mergedCount)} tasks merged into batch branch`);
898
+ if (unmergedBranches.length > 0) {
899
+ const unique = [...new Set(unmergedBranches)];
900
+ console.log(` Unmerged: ${chalk.yellow(unique.length)} isolated: ${unique.map(b => chalk.yellow(b)).join(', ')}`);
901
+ }
902
+ }
903
+ if (branchesCreated.length > 0) {
904
+ const unique = [...new Set(branchesCreated)];
905
+ console.log(` Branches: ${unique.map(b => chalk.cyan(b)).join(', ')}`);
906
+ }
907
+ if (originalBranch) {
908
+ console.log(` Original branch: ${chalk.green(originalBranch)} (intact)`);
909
+ }
740
910
  console.log(chalk.dim('─'.repeat(40)));
741
911
  console.log('');
742
912
  }
@@ -13,6 +13,73 @@ import { getState, updateState, hasWorkspace } from '../core/state.js';
13
13
 
14
14
  const execAsync = promisify(exec);
15
15
 
16
+ // --- Git Helpers ---
17
+
18
+ /**
19
+ * Get the current git branch name
20
+ * @param {string} cwd - Working directory
21
+ * @returns {Promise<string>} Current branch name
22
+ */
23
+ export async function getCurrentBranch(cwd = process.cwd()) {
24
+ const { stdout } = await execAsync('git branch --show-current', { cwd });
25
+ return stdout.trim();
26
+ }
27
+
28
+ /**
29
+ * Create and checkout a task branch from the current HEAD
30
+ * If branch already exists, just checkout.
31
+ * @param {string} taskId - Task ID (e.g. "SEC-007")
32
+ * @param {string} cwd - Working directory
33
+ * @returns {Promise<string>} Branch name
34
+ */
35
+ export async function createTaskBranch(taskId, cwd = process.cwd()) {
36
+ const branch = `feat/${taskId}`;
37
+ try {
38
+ await execAsync(`git rev-parse --verify ${branch}`, { cwd });
39
+ // Branch exists (rerun) — checkout and reset to current HEAD
40
+ // so we don't carry stale/dirty state from previous attempt
41
+ const { stdout } = await execAsync('git rev-parse HEAD', { cwd });
42
+ const baseRef = stdout.trim();
43
+ await execAsync(`git checkout ${branch}`, { cwd });
44
+ await execAsync(`git reset --hard ${baseRef}`, { cwd });
45
+ } catch {
46
+ // Branch doesn't exist — create
47
+ await execAsync(`git checkout -b ${branch}`, { cwd });
48
+ }
49
+ return branch;
50
+ }
51
+
52
+ /**
53
+ * Checkout an existing branch
54
+ * @param {string} branch - Branch name
55
+ * @param {string} cwd - Working directory
56
+ */
57
+ export async function checkoutBranch(branch, cwd = process.cwd(), { force = false } = {}) {
58
+ const flag = force ? ' --force' : '';
59
+ await execAsync(`git checkout${flag} ${branch}`, { cwd });
60
+ }
61
+
62
+ /**
63
+ * Create a batch branch from the current HEAD
64
+ * @param {string} cwd - Working directory
65
+ * @returns {Promise<string>} Branch name (batch/{timestamp})
66
+ */
67
+ export async function createBatchBranch(cwd = process.cwd()) {
68
+ const ts = new Date().toISOString().replace(/[:.]/g, '').slice(0, 15);
69
+ const branch = `batch/${ts}`;
70
+ await execAsync(`git checkout -b ${branch}`, { cwd });
71
+ return branch;
72
+ }
73
+
74
+ /**
75
+ * Merge a source branch into the current branch
76
+ * @param {string} source - Source branch name
77
+ * @param {string} cwd - Working directory
78
+ */
79
+ export async function mergeBranch(source, cwd = process.cwd()) {
80
+ await execAsync(`git merge ${source} --no-edit`, { cwd });
81
+ }
82
+
16
83
  /**
17
84
  * Default batch configuration
18
85
  */
@@ -392,9 +459,14 @@ export function convertBacklogToPrd(backlogContent, projectName = 'my-project')
392
459
  * @returns {object|null} Selected task or null
393
460
  */
394
461
  export function selectNextTask(cwd = process.cwd(), options = {}) {
462
+ const filterRe = options.filter ? new RegExp(options.filter, 'i') : null;
463
+
395
464
  // First check active tasks for incomplete ones
396
465
  const activeTasks = parseActiveTasks(cwd);
397
- const pendingActive = activeTasks.filter(t => !t.completed && !t.blockedBy);
466
+ const pendingActive = activeTasks.filter(t =>
467
+ !t.completed && !t.blockedBy &&
468
+ (!filterRe || filterRe.test(t.id) || filterRe.test(t.title))
469
+ );
398
470
 
399
471
  if (pendingActive.length > 0) {
400
472
  return pendingActive[0];
@@ -407,7 +479,8 @@ export function selectNextTask(cwd = process.cwd(), options = {}) {
407
479
  const eligible = stories.find(s =>
408
480
  !s.passes &&
409
481
  s.dependencies.every(dep => doneIds.includes(dep)) &&
410
- (!options.phase || s.phase === options.phase)
482
+ (!options.phase || s.phase === options.phase) &&
483
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
411
484
  );
412
485
 
413
486
  return eligible || null;
@@ -111,6 +111,26 @@ Defines:
111
111
  - Commit patterns
112
112
  - Completion signals
113
113
 
114
+ ### Writing stories for `prd.json`
115
+
116
+ Stories run with `--dangerously-skip-permissions` — the agent has full access. Write defensively.
117
+
118
+ **Never put these in batch stories:**
119
+ - Destructive git ops (`git filter-repo`, `BFG`, `push --force`, history rewriting)
120
+ - Destructive file ops (`rm -rf`, wiping directories)
121
+ - Database ops (`DROP TABLE`, prod migrations)
122
+ - Secret rotation (revoking keys, rotating credentials)
123
+ - External side effects (sending emails, creating PRs, publishing packages)
124
+
125
+ **Rule of thumb**: if a mistake needs human intervention to fix, it's not a batch story.
126
+
127
+ **Instead**, ask the agent to **prepare and document** — write the script, the docs, the config — but let a human execute the dangerous part.
128
+
129
+ **Always include in `notes`** what the agent must NOT do:
130
+ ```json
131
+ "notes": "Do NOT run git-filter-repo. Do NOT modify prd.json."
132
+ ```
133
+
114
134
  ## 🤖 Agents
115
135
 
116
136
  ### agent/claude.md
@@ -0,0 +1,36 @@
1
+ You are working on this project autonomously as part of a batch execution run.
2
+
3
+ ## Project Context
4
+ - Read CLAUDE.md (or .ai/agents/claude.md) for full project instructions and conventions
5
+ - Read .ai/memory/decisions.md for architectural decisions
6
+ - Read .ai/memory/patterns.md for established patterns
7
+ - Read .ai/batch/progress.txt for learnings from previous iterations
8
+
9
+ ## Current Task
10
+ **{STORY_ID}**: {STORY_TITLE}
11
+
12
+ ### Acceptance Criteria
13
+ {ACCEPTANCE_CRITERIA}
14
+
15
+ ### Notes
16
+ {NOTES}
17
+
18
+ ### Dependencies (already implemented)
19
+ {DEPENDENCIES}
20
+
21
+ ## Instructions
22
+ 1. Explore the codebase to understand existing patterns for similar features
23
+ 2. Implement the feature following existing conventions
24
+ 3. Write comprehensive tests (see tests/ for patterns)
25
+ 4. Run quality gates and fix any failures before committing
26
+ 5. Commit with: feat({STORY_ID}): {short description}
27
+ 6. Update .ai/tasks/active.md marking this task as done with today's date
28
+
29
+ ## Important
30
+ - Do NOT push to remote
31
+ - Do NOT modify unrelated files
32
+ - Do NOT edit .ai/tasks/prd.json or batch configuration files
33
+ - Do NOT run destructive operations (git filter-repo, rm -rf, DROP TABLE, force push)
34
+ - Do NOT rotate secrets, revoke keys, or modify credentials — those are human tasks
35
+ - Follow existing patterns exactly
36
+ - If you discover useful patterns, note them for progress.txt