@fermindi/pwn-cli 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/backlog.js CHANGED
@@ -19,9 +19,17 @@ export default async function backlogCommand(args = []) {
19
19
  }
20
20
 
21
21
  const cwd = process.cwd();
22
- const stories = parsePrdTasks(cwd);
22
+ let stories = parsePrdTasks(cwd);
23
23
  const taskFiles = listTaskFiles(cwd);
24
24
 
25
+ // Parse --filter
26
+ const filterIdx = args.findIndex(a => a === '--filter');
27
+ const filter = filterIdx !== -1 ? args[filterIdx + 1] : null;
28
+ if (filter) {
29
+ const re = new RegExp(filter, 'i');
30
+ stories = stories.filter(s => re.test(s.id) || re.test(s.title));
31
+ }
32
+
25
33
  // Read project name from prd.json
26
34
  let project = 'project';
27
35
  const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
@@ -33,11 +41,12 @@ export default async function backlogCommand(args = []) {
33
41
  }
34
42
 
35
43
  const noInteractive = args.includes('--no-interactive') || !process.stdout.isTTY;
44
+ const label = filter ? `${project} (filter: ${filter})` : project;
36
45
 
37
46
  if (noInteractive) {
38
- printPlain({ project, stories, taskFiles });
47
+ printPlain({ project: label, stories, taskFiles });
39
48
  } else {
40
- await startViewer({ project, stories, taskFiles });
49
+ await startViewer({ project: label, stories, taskFiles });
41
50
  }
42
51
  }
43
52
 
@@ -45,8 +54,12 @@ function showHelp() {
45
54
  console.log('📋 PWN Backlog Viewer\n');
46
55
  console.log('Usage: pwn backlog [options]\n');
47
56
  console.log('Options:');
57
+ console.log(' --filter <pattern> Filter stories by ID or title (regex, case-insensitive)');
48
58
  console.log(' --no-interactive Plain text output (for CI/piping)');
49
59
  console.log(' --help, -h Show this help\n');
60
+ console.log('Examples:');
61
+ console.log(' pwn backlog --filter SEC # Only SEC-* stories');
62
+ console.log(' pwn backlog --filter "API|AUTH" # Stories matching API or AUTH\n');
50
63
  console.log('Keybindings (list view):');
51
64
  console.log(' ↑/k Move up');
52
65
  console.log(' ↓/j Move down');
package/cli/batch.js CHANGED
@@ -103,6 +103,8 @@ function parseRunOptions(args) {
103
103
  options.dryRun = true;
104
104
  } else if (arg === '--phase') {
105
105
  options.phase = args[++i];
106
+ } else if (arg === '--filter') {
107
+ options.filter = args[++i];
106
108
  } else if (arg === '--no-plan') {
107
109
  options.noPlan = true;
108
110
  } else if (arg === '--rate-limit-wait') {
@@ -410,6 +412,7 @@ function showHelp() {
410
412
  console.log(' --continue Continue on errors');
411
413
  console.log(' --no-commit Skip auto-commit');
412
414
  console.log(' --no-branch Skip branch creation');
415
+ console.log(' --filter <pattern> Filter tasks by ID or title (regex, case-insensitive)');
413
416
  console.log(' --no-plan Skip planning phase (use fixed 10min timeout)');
414
417
  console.log(' --rate-limit-wait <s> Seconds to wait on rate limit (default: 1800)');
415
418
  console.log(' --help, -h Show this help\n');
@@ -418,6 +421,8 @@ function showHelp() {
418
421
  console.log(' pwn batch run # Run autonomous batch loop');
419
422
  console.log(' pwn batch run --dry-run # Preview next story');
420
423
  console.log(' pwn batch run --phase 3 # Run specific phase');
424
+ console.log(' pwn batch run --filter ORCH # Run only ORCH-* tasks');
425
+ console.log(' pwn batch run -f "UNI|DATA" # Run UNI or DATA tasks');
421
426
  console.log(' pwn batch run --no-plan # Skip planning, fixed timeout');
422
427
  console.log(' pwn batch tasks # List all task files');
423
428
  console.log(' pwn batch tasks --failed # Show only failed tasks');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fermindi/pwn-cli",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Professional AI Workspace - Inject structured memory and automation into any project for AI-powered development",
5
5
  "type": "module",
6
6
  "bin": {
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * Two-phase execution model:
5
5
  * Phase 1: Planning — Claude estimates time and creates action plan
6
- * Phase 2: Execution — Dynamic timeout based on estimate + 5%
6
+ * Phase 2: Execution — Dynamic timeout based on estimate + 20%
7
7
  *
8
8
  * Task files (.ai/batch/tasks/{US-ID}.json) track status per story.
9
9
  * Completed files are cleaned up at the end; failed are kept for review.
@@ -12,7 +12,6 @@
12
12
  import { spawn } from 'child_process';
13
13
  import { existsSync, readFileSync, writeFileSync, mkdirSync, createWriteStream, appendFileSync, unlinkSync, readdirSync } from 'fs';
14
14
  import { join } from 'path';
15
- import { tmpdir } from 'os';
16
15
  import ora from 'ora';
17
16
  import chalk from 'chalk';
18
17
  import {
@@ -27,9 +26,17 @@ import {
27
26
  } from './batch-service.js';
28
27
 
29
28
  // --- Constants ---
30
- const RUNNER_VERSION = '2.0';
31
- const DEFAULT_TIMEOUT_MS = 600_000; // 10 minutes fallback
32
- const MIN_TIMEOUT_MS = 120_000; // 2 minutes minimum (claude init takes ~30s)
29
+ const RUNNER_VERSION = '2.1';
30
+ const DEFAULT_TIMEOUT_MS = 900_000; // 15 minutes fallback
31
+ const MIN_TIMEOUT_MS = 300_000; // 5 minutes minimum (claude init ~30-40s + real work)
32
+
33
+ // Complexity → timeout mapping (based on real-world execution data)
34
+ // AI is bad at estimating seconds but decent at classifying complexity
35
+ const COMPLEXITY_TIMEOUT = {
36
+ low: { seconds: 300, label: '5m' }, // config change, small fix
37
+ medium: { seconds: 600, label: '10m' }, // new function, 1-3 files
38
+ high: { seconds: 900, label: '15m' }, // new module, multi-file refactor
39
+ };
33
40
  const PLAN_TIMEOUT_MS = 120_000; // 2 minutes for planning phase (claude init ~30s)
34
41
  const DEFAULT_RATE_LIMIT_WAIT = 1800; // 30 minutes (seconds)
35
42
  const MAX_RETRIES = 2;
@@ -99,7 +106,7 @@ async function waitForRateLimit(waitSeconds, attempt) {
99
106
 
100
107
  // --- Planning Phase ---
101
108
 
102
- function buildPlanPrompt(task, cwd) {
109
+ function buildPlanPrompt(task, cwd, replanContext = null) {
103
110
  const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
104
111
  const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
105
112
  const story = prd.stories.find(s => s.id === task.id);
@@ -107,42 +114,49 @@ function buildPlanPrompt(task, cwd) {
107
114
 
108
115
  const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
109
116
 
117
+ let replanSection = '';
118
+ if (replanContext) {
119
+ replanSection = `
120
+
121
+ IMPORTANT — REPLANNING: A previous attempt FAILED.
122
+ - Previous complexity: ${replanContext.previousComplexity}
123
+ - Failure reason: ${replanContext.failureReason}
124
+ Re-evaluate the complexity. If it timed out, it's likely MORE complex than you initially thought.`;
125
+ }
126
+
110
127
  return `You are analyzing task ${task.id}: ${task.title}
111
128
 
112
129
  Acceptance criteria:
113
130
  ${acList}
114
131
 
115
132
  Notes: ${story.notes || 'None'}
116
-
133
+ ${replanSection}
117
134
  Analyze this task and respond with ONLY a JSON object (no markdown, no code fences):
118
135
  {
119
- "estimated_time_seconds": <number>,
120
136
  "plan": ["step 1", "step 2", ...],
121
137
  "complexity": "low|medium|high",
122
138
  "recommended_model": "opus|sonnet|haiku",
123
139
  "files_likely_affected": ["path1", "path2"]
124
140
  }
125
141
 
126
- Base your estimate on:
127
- - low complexity (config change, small fix): 30-120s
128
- - medium complexity (new function, simple feature): 120-300s
129
- - high complexity (new module, multi-file feature): 300-600s
142
+ Classify complexity:
143
+ - "low": config change, small fix, single file tweak
144
+ - "medium": new function, simple feature, 1-3 files
145
+ - "high": new module, multi-file refactor, architecture change, 4+ files
130
146
 
131
147
  Recommend a model for execution:
132
- - "haiku": trivial (config change, typo, single-line fix) — ~30s
133
- - "sonnet": low-medium complexity (new function, simple feature, 1-3 files) — 30-300s
134
- - "opus": high complexity (new module, multi-file refactor, architecture) — 300-600s`;
148
+ - "haiku": trivial (config change, typo, single-line fix)
149
+ - "sonnet": low-medium complexity (new function, simple feature, 1-3 files)
150
+ - "opus": high complexity (new module, multi-file refactor, architecture)`;
135
151
  }
136
152
 
137
- async function planTask(task, cwd) {
138
- const prompt = buildPlanPrompt(task, cwd);
153
+ async function planTask(task, cwd, replanContext = null) {
154
+ const prompt = buildPlanPrompt(task, cwd, replanContext);
139
155
  if (!prompt) return null;
140
156
 
141
- const promptFile = join(tmpdir(), `pwn-plan-${Date.now()}.md`);
142
- writeFileSync(promptFile, prompt);
143
-
144
157
  const env = { ...process.env };
145
- delete env.CLAUDECODE;
158
+ // Remove all Claude session markers to avoid "nested session" detection
159
+ Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
146
160
 
147
161
  return new Promise((resolve) => {
148
162
  let output = '';
@@ -150,13 +164,17 @@ async function planTask(task, cwd) {
150
164
  // Planning uses --print WITHOUT --dangerously-skip-permissions (read-only)
151
165
  const child = spawn('bash', [
152
166
  '-c',
153
- `claude --model opus --print -p "$(cat '${promptFile}')"`,
167
+ `claude --model opus --print -p "$(cat)"`,
154
168
  ], {
155
169
  cwd,
156
- stdio: ['ignore', 'pipe', 'pipe'],
170
+ stdio: ['pipe', 'pipe', 'pipe'],
157
171
  env,
158
172
  });
159
173
 
174
+ // Pass prompt via stdin
175
+ child.stdin.write(prompt);
176
+ child.stdin.end();
177
+
160
178
  const timeoutId = setTimeout(() => {
161
179
  child.kill('SIGTERM');
162
180
  }, PLAN_TIMEOUT_MS);
@@ -166,7 +184,6 @@ async function planTask(task, cwd) {
166
184
 
167
185
  child.on('close', (code, signal) => {
168
186
  clearTimeout(timeoutId);
169
- try { unlinkSync(promptFile); } catch {}
170
187
 
171
188
  if (signal) {
172
189
  console.log(chalk.dim(` Planning killed by ${signal} (timeout=${signal === 'SIGTERM' ? 'likely' : 'no'})`));
@@ -195,7 +212,6 @@ async function planTask(task, cwd) {
195
212
 
196
213
  child.on('error', (err) => {
197
214
  clearTimeout(timeoutId);
198
- try { unlinkSync(promptFile); } catch {}
199
215
  console.log(chalk.dim(` Planning spawn error: ${err.message}`));
200
216
  resolve(null);
201
217
  });
@@ -204,8 +220,7 @@ async function planTask(task, cwd) {
204
220
 
205
221
  function computeTimeout(estimatedSeconds) {
206
222
  if (!estimatedSeconds || estimatedSeconds <= 0) return DEFAULT_TIMEOUT_MS;
207
- const withMargin = Math.ceil(estimatedSeconds * 1.05) * 1000;
208
- return Math.max(withMargin, MIN_TIMEOUT_MS);
223
+ return Math.max(Math.ceil(estimatedSeconds * 1.05) * 1000, MIN_TIMEOUT_MS);
209
224
  }
210
225
 
211
226
  /**
@@ -235,16 +250,27 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
235
250
  const stories = parsePrdTasks(cwd);
236
251
  const totalStories = stories.length;
237
252
  const doneAtStart = stories.filter(s => s.passes).length;
238
- const maxIterations = options.maxIterations || 20;
239
253
  const phaseFilter = options.phase ? `Phase ${options.phase}` : undefined;
254
+ const taskFilter = options.filter || null;
255
+
256
+ // Count remaining eligible stories (respecting filters)
257
+ const doneIds = stories.filter(s => s.passes).map(s => s.id);
258
+ const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
259
+ const eligibleCount = stories.filter(s =>
260
+ !s.passes &&
261
+ s.dependencies.every(dep => doneIds.includes(dep)) &&
262
+ (!phaseFilter || s.phase === phaseFilter) &&
263
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
264
+ ).length;
265
+ const maxIterations = options.maxIterations || eligibleCount;
240
266
 
241
267
  // --- Dry run ---
242
268
  if (options.dryRun) {
243
- return dryRunPreview(cwd, phaseFilter, maxIterations);
269
+ return dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter);
244
270
  }
245
271
 
246
272
  // --- Print header ---
247
- printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd);
273
+ printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd, taskFilter);
248
274
 
249
275
  // NO custom SIGINT handler — Ctrl+C uses default Node.js behavior (kills process group)
250
276
 
@@ -264,7 +290,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
264
290
  while (iteration < maxIterations) {
265
291
  iteration++;
266
292
 
267
- const task = selectNextTask(cwd, { phase: phaseFilter });
293
+ const task = selectNextTask(cwd, { phase: phaseFilter, filter: taskFilter });
268
294
  if (!task) {
269
295
  console.log(chalk.green('\nAll eligible stories completed!'));
270
296
  break;
@@ -282,8 +308,9 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
282
308
  if (!noPlan) {
283
309
  const existing = loadTaskFile(task.id, cwd);
284
310
 
285
- if (existing && existing.status === 'planned' && existing.complexity !== 'unknown') {
286
- // Reuse previous plan
311
+ const needsReplan = existing && existing.last_failure_type && existing.status !== 'completed';
312
+ if (existing && existing.status === 'planned' && existing.complexity !== 'unknown' && !needsReplan) {
313
+ // Reuse previous plan (only if it hasn't failed before)
287
314
  taskFile = existing;
288
315
  taskTimeoutMs = computeTimeout(existing.estimated_time_seconds);
289
316
  console.log(chalk.dim(` Phase 1: Reusing plan for ${task.id} (${existing.complexity}, ~${formatDuration(existing.estimated_time_seconds)}, model: ${existing.recommended_model || 'sonnet'})`));
@@ -294,9 +321,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
294
321
  const planResult = await planTask(task, cwd);
295
322
 
296
323
  if (planResult) {
297
- const estimatedSeconds = planResult.estimated_time_seconds || 600;
298
- const timeoutSeconds = Math.ceil(Math.max(estimatedSeconds * 1.05, MIN_TIMEOUT_MS / 1000));
299
- taskTimeoutMs = computeTimeout(estimatedSeconds);
324
+ const complexity = planResult.complexity || 'medium';
325
+ const tier = COMPLEXITY_TIMEOUT[complexity] || COMPLEXITY_TIMEOUT.medium;
326
+ const estimatedSeconds = tier.seconds;
327
+ const timeoutSeconds = Math.ceil(estimatedSeconds * 1.05);
328
+ taskTimeoutMs = timeoutSeconds * 1000;
300
329
 
301
330
  const recommendedModel = planResult.recommended_model || 'sonnet';
302
331
  taskFile = {
@@ -306,7 +335,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
306
335
  estimated_time_seconds: estimatedSeconds,
307
336
  timeout_seconds: timeoutSeconds,
308
337
  plan: planResult.plan || [],
309
- complexity: planResult.complexity || 'medium',
338
+ complexity,
310
339
  recommended_model: recommendedModel,
311
340
  files_likely_affected: planResult.files_likely_affected || [],
312
341
  created_at: new Date().toISOString(),
@@ -316,7 +345,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
316
345
  saveTaskFile(taskFile, cwd);
317
346
 
318
347
  planSpinner.succeed(chalk.green(
319
- `Planned: ${planResult.complexity} complexity, ~${formatDuration(estimatedSeconds)} estimated, timeout ${formatDuration(timeoutSeconds)}, model: ${recommendedModel}`
348
+ `Planned: ${complexity} complexity, timeout ${tier.label}, model: ${recommendedModel}`
320
349
  ));
321
350
  } else {
322
351
  // Fallback when planning fails
@@ -358,6 +387,10 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
358
387
  }
359
388
 
360
389
  const prompt = buildPrompt(task.id, cwd, prdPath, promptPath, errorContext);
390
+ if (!prompt) {
391
+ console.log(chalk.red(` Cannot build prompt for ${task.id} — skipping`));
392
+ break;
393
+ }
361
394
  const logFile = join(logDir, `${task.id}_${timestamp()}.log`);
362
395
 
363
396
  const estimatedSeconds = taskFile?.estimated_time_seconds || null;
@@ -380,8 +413,41 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
380
413
  await waitForRateLimit(rateLimitWait, rateLimitAttempts);
381
414
  continue;
382
415
  }
383
- console.log(chalk.yellow(` Timed out after ${Math.round(taskTimeoutMs / 1000)}s`));
384
- errorContext = 'Session timed out. Simplify the implementation or focus on core acceptance criteria.';
416
+
417
+ const prevTimeout = Math.round(taskTimeoutMs / 1000);
418
+ console.log(chalk.yellow(` Timed out after ${prevTimeout}s`));
419
+
420
+ // --- Replan on timeout: bump complexity tier ---
421
+ if (taskFile) {
422
+ const prevComplexity = taskFile.complexity;
423
+ const prevEstimate = taskFile.estimated_time_seconds;
424
+
425
+ // Escalate complexity: low → medium → high, high stays high but doubles
426
+ const escalation = { low: 'medium', medium: 'high' };
427
+ const newComplexity = escalation[prevComplexity] || 'high';
428
+ const tier = COMPLEXITY_TIMEOUT[newComplexity] || COMPLEXITY_TIMEOUT.high;
429
+
430
+ // If already high, double the previous timeout
431
+ const newEstimate = prevComplexity === 'high'
432
+ ? prevEstimate * 2
433
+ : tier.seconds;
434
+ const newTimeout = Math.ceil(newEstimate * 1.05);
435
+ taskTimeoutMs = newTimeout * 1000;
436
+
437
+ taskFile.estimated_time_seconds = newEstimate;
438
+ taskFile.timeout_seconds = newTimeout;
439
+ taskFile.complexity = newComplexity;
440
+ taskFile.recommended_model = 'opus'; // upgrade model on timeout
441
+ taskFile.replanned_at = new Date().toISOString();
442
+ taskFile.replan_reason = `timeout after ${prevTimeout}s (${prevComplexity} → ${newComplexity})`;
443
+ saveTaskFile(taskFile, cwd);
444
+
445
+ console.log(chalk.blue(
446
+ ` Escalated: ${prevComplexity} → ${newComplexity}, timeout ${formatDuration(newTimeout)}, model: opus`
447
+ ));
448
+ }
449
+
450
+ errorContext = `Session timed out after ${prevTimeout}s. The task was replanned with more time. Focus on core acceptance criteria first, then iterate.`;
385
451
  retry++;
386
452
  continue;
387
453
  }
@@ -397,6 +463,14 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
397
463
  }
398
464
  console.log(chalk.yellow(` Claude exited with code ${result.exitCode}`));
399
465
  errorContext = `Claude session failed with exit code ${result.exitCode}.`;
466
+
467
+ // Save error output snippet to task file for debugging
468
+ if (taskFile) {
469
+ taskFile.last_error_output = (result.output || '').slice(-2000);
470
+ taskFile.last_failure_type = 'crash';
471
+ saveTaskFile(taskFile, cwd);
472
+ }
473
+
400
474
  retry++;
401
475
  continue;
402
476
  }
@@ -433,13 +507,22 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
433
507
  } else {
434
508
  console.log(chalk.red(` Quality gates FAILED`));
435
509
  errorContext = gatesResult.errorOutput;
510
+
511
+ // Save gate failure details to task file
512
+ if (taskFile) {
513
+ taskFile.last_error_output = (gatesResult.errorOutput || '').slice(-2000);
514
+ taskFile.last_failure_type = 'quality_gate';
515
+ saveTaskFile(taskFile, cwd);
516
+ }
517
+
436
518
  retry++;
437
519
  }
438
520
  }
439
521
 
440
522
  if (!storyDone) {
441
- console.log(chalk.red(` FAILED: ${task.id} after ${MAX_RETRIES} retries`));
442
- appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries. Skipping.`);
523
+ const failureType = taskFile?.last_failure_type || 'unknown';
524
+ console.log(chalk.red(` FAILED: ${task.id} after ${MAX_RETRIES} retries (${failureType})`));
525
+ appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries (${failureType}). Skipping.`);
443
526
 
444
527
  // Update task file with failure
445
528
  if (taskFile) {
@@ -474,23 +557,24 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
474
557
  const logStream = createWriteStream(logFile);
475
558
  const startTime = Date.now();
476
559
 
477
- // Write prompt to temp file
478
- const promptFile = join(tmpdir(), `pwn-prompt-${Date.now()}.md`);
479
- writeFileSync(promptFile, prompt);
480
-
481
560
  const env = { ...process.env };
482
- delete env.CLAUDECODE;
561
+ // Remove all Claude session markers to avoid "nested session" detection
562
+ Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
483
563
 
484
564
  const modelFlag = model ? `--model ${model} ` : '';
485
565
  const child = spawn('bash', [
486
566
  '-c',
487
- `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat '${promptFile}')"`,
567
+ `claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat)"`,
488
568
  ], {
489
569
  cwd,
490
- stdio: ['ignore', 'pipe', 'pipe'],
570
+ stdio: ['pipe', 'pipe', 'pipe'],
491
571
  env,
492
572
  });
493
573
 
574
+ // Pass prompt via stdin to avoid shell escaping issues and file race conditions
575
+ child.stdin.write(prompt);
576
+ child.stdin.end();
577
+
494
578
  const modelLabel = model ? chalk.magenta(model) : chalk.dim('default');
495
579
  console.log(chalk.dim(` Log: tail -f ${logFile}`));
496
580
  console.log(chalk.dim(` PID: ${child.pid} | Model: `) + modelLabel + chalk.dim(` | Prompt: ${prompt.length} chars | Timeout: ${formatDuration(Math.round(timeoutMs / 1000))}`));
@@ -532,13 +616,14 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
532
616
  clearTimeout(timeoutId);
533
617
  clearInterval(timer);
534
618
  logStream.end();
535
- try { unlinkSync(promptFile); } catch {}
536
619
 
537
620
  // Clear spinner line
538
621
  process.stdout.write('\r\x1b[K');
539
622
 
540
623
  const elapsed = Math.round((Date.now() - startTime) / 1000);
541
- const timedOut = signal === 'SIGTERM' && elapsed >= Math.floor(timeoutMs / 1000) - 1;
624
+ // Timeout detection: SIGTERM signal, OR exit code 143 (128+SIGTERM) near the timeout boundary
625
+ const nearTimeout = elapsed >= Math.floor(timeoutMs / 1000) - 2;
626
+ const timedOut = nearTimeout && (signal === 'SIGTERM' || code === 143);
542
627
 
543
628
  if (signal && !timedOut) {
544
629
  console.log(chalk.dim(` Claude killed (${signal}) after ${formatDuration(elapsed)}`));
@@ -554,7 +639,6 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
554
639
  clearTimeout(timeoutId);
555
640
  clearInterval(timer);
556
641
  logStream.end();
557
- try { unlinkSync(promptFile); } catch {}
558
642
  process.stdout.write('\r\x1b[K');
559
643
  console.log(chalk.red(` Spawn error: ${err.message}`));
560
644
  resolve({ exitCode: 1, output: '', timedOut: false, signal: null });
@@ -599,7 +683,10 @@ async function runGatesWithStatus(cwd) {
599
683
  function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
600
684
  const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
601
685
  const story = prd.stories.find(s => s.id === storyId);
602
- if (!story) return '';
686
+ if (!story) {
687
+ console.log(chalk.yellow(` Warning: story ${storyId} not found in prd.json — may have been modified during execution`));
688
+ return '';
689
+ }
603
690
 
604
691
  const doneIds = prd.stories.filter(s => s.passes).map(s => s.id);
605
692
  const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
@@ -632,7 +719,7 @@ function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
632
719
  /**
633
720
  * Dry run: show formatted preview of eligible tasks.
634
721
  */
635
- function dryRunPreview(cwd, phaseFilter, maxIterations) {
722
+ function dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter = null) {
636
723
  const stories = parsePrdTasks(cwd);
637
724
  const total = stories.length;
638
725
  const done = stories.filter(s => s.passes).length;
@@ -642,12 +729,15 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
642
729
  console.log(` Progress: ${chalk.green(done)}/${total} done, ${chalk.yellow(remaining)} remaining`);
643
730
  console.log(` Max iterations: ${maxIterations}`);
644
731
  if (phaseFilter) console.log(` Phase filter: ${chalk.blue(phaseFilter)}`);
732
+ if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
645
733
 
646
734
  const doneIds = stories.filter(s => s.passes).map(s => s.id);
735
+ const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
647
736
  const eligible = stories.filter(s =>
648
737
  !s.passes &&
649
738
  s.dependencies.every(dep => doneIds.includes(dep)) &&
650
- (!phaseFilter || s.phase === phaseFilter)
739
+ (!phaseFilter || s.phase === phaseFilter) &&
740
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
651
741
  );
652
742
 
653
743
  if (eligible.length === 0) {
@@ -683,7 +773,7 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
683
773
  console.log(chalk.dim('\n Run without --dry-run to execute.\n'));
684
774
  }
685
775
 
686
- function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd()) {
776
+ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd(), taskFilter = null) {
687
777
  const config = loadConfig(cwd);
688
778
  const skipGates = config.skip_gates || [];
689
779
 
@@ -694,6 +784,7 @@ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.
694
784
  console.log(` Progress: ${chalk.green(done)}/${total} done`);
695
785
  console.log(` Planning: ${noPlan ? chalk.yellow('disabled') : chalk.green('enabled')}`);
696
786
  if (phase) console.log(` Phase filter: ${chalk.blue(phase)}`);
787
+ if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
697
788
  if (skipGates.length > 0) {
698
789
  console.log(` ${chalk.yellow('⚠️ Skipping gates (no tooling):')} ${skipGates.join(', ')}`);
699
790
  }
@@ -392,9 +392,14 @@ export function convertBacklogToPrd(backlogContent, projectName = 'my-project')
392
392
  * @returns {object|null} Selected task or null
393
393
  */
394
394
  export function selectNextTask(cwd = process.cwd(), options = {}) {
395
+ const filterRe = options.filter ? new RegExp(options.filter, 'i') : null;
396
+
395
397
  // First check active tasks for incomplete ones
396
398
  const activeTasks = parseActiveTasks(cwd);
397
- const pendingActive = activeTasks.filter(t => !t.completed && !t.blockedBy);
399
+ const pendingActive = activeTasks.filter(t =>
400
+ !t.completed && !t.blockedBy &&
401
+ (!filterRe || filterRe.test(t.id) || filterRe.test(t.title))
402
+ );
398
403
 
399
404
  if (pendingActive.length > 0) {
400
405
  return pendingActive[0];
@@ -407,7 +412,8 @@ export function selectNextTask(cwd = process.cwd(), options = {}) {
407
412
  const eligible = stories.find(s =>
408
413
  !s.passes &&
409
414
  s.dependencies.every(dep => doneIds.includes(dep)) &&
410
- (!options.phase || s.phase === options.phase)
415
+ (!options.phase || s.phase === options.phase) &&
416
+ (!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
411
417
  );
412
418
 
413
419
  return eligible || null;
@@ -111,6 +111,26 @@ Defines:
111
111
  - Commit patterns
112
112
  - Completion signals
113
113
 
114
+ ### Writing stories for `prd.json`
115
+
116
+ Stories run with `--dangerously-skip-permissions` — the agent has full access. Write defensively.
117
+
118
+ **Never put these in batch stories:**
119
+ - Destructive git ops (`git filter-repo`, `BFG`, `push --force`, history rewriting)
120
+ - Destructive file ops (`rm -rf`, wiping directories)
121
+ - Database ops (`DROP TABLE`, prod migrations)
122
+ - Secret rotation (revoking keys, rotating credentials)
123
+ - External side effects (sending emails, creating PRs, publishing packages)
124
+
125
+ **Rule of thumb**: if a mistake needs human intervention to fix, it's not a batch story.
126
+
127
+ **Instead**, ask the agent to **prepare and document** — write the script, the docs, the config — but let a human execute the dangerous part.
128
+
129
+ **Always include in `notes`** what the agent must NOT do:
130
+ ```json
131
+ "notes": "Do NOT run git-filter-repo. Do NOT modify prd.json."
132
+ ```
133
+
114
134
  ## 🤖 Agents
115
135
 
116
136
  ### agent/claude.md
@@ -0,0 +1,36 @@
1
+ You are working on this project autonomously as part of a batch execution run.
2
+
3
+ ## Project Context
4
+ - Read CLAUDE.md (or .ai/agents/claude.md) for full project instructions and conventions
5
+ - Read .ai/memory/decisions.md for architectural decisions
6
+ - Read .ai/memory/patterns.md for established patterns
7
+ - Read .ai/batch/progress.txt for learnings from previous iterations
8
+
9
+ ## Current Task
10
+ **{STORY_ID}**: {STORY_TITLE}
11
+
12
+ ### Acceptance Criteria
13
+ {ACCEPTANCE_CRITERIA}
14
+
15
+ ### Notes
16
+ {NOTES}
17
+
18
+ ### Dependencies (already implemented)
19
+ {DEPENDENCIES}
20
+
21
+ ## Instructions
22
+ 1. Explore the codebase to understand existing patterns for similar features
23
+ 2. Implement the feature following existing conventions
24
+ 3. Write comprehensive tests (see tests/ for patterns)
25
+ 4. Run quality gates and fix any failures before committing
26
+ 5. Commit with: feat({STORY_ID}): {short description}
27
+ 6. Update .ai/tasks/active.md marking this task as done with today's date
28
+
29
+ ## Important
30
+ - Do NOT push to remote
31
+ - Do NOT modify unrelated files
32
+ - Do NOT edit .ai/tasks/prd.json or batch configuration files
33
+ - Do NOT run destructive operations (git filter-repo, rm -rf, DROP TABLE, force push)
34
+ - Do NOT rotate secrets, revoke keys, or modify credentials — those are human tasks
35
+ - Follow existing patterns exactly
36
+ - If you discover useful patterns, note them for progress.txt