@fermindi/pwn-cli 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/backlog.js
CHANGED
|
@@ -19,9 +19,17 @@ export default async function backlogCommand(args = []) {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
const cwd = process.cwd();
|
|
22
|
-
|
|
22
|
+
let stories = parsePrdTasks(cwd);
|
|
23
23
|
const taskFiles = listTaskFiles(cwd);
|
|
24
24
|
|
|
25
|
+
// Parse --filter
|
|
26
|
+
const filterIdx = args.findIndex(a => a === '--filter');
|
|
27
|
+
const filter = filterIdx !== -1 ? args[filterIdx + 1] : null;
|
|
28
|
+
if (filter) {
|
|
29
|
+
const re = new RegExp(filter, 'i');
|
|
30
|
+
stories = stories.filter(s => re.test(s.id) || re.test(s.title));
|
|
31
|
+
}
|
|
32
|
+
|
|
25
33
|
// Read project name from prd.json
|
|
26
34
|
let project = 'project';
|
|
27
35
|
const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
|
|
@@ -33,11 +41,12 @@ export default async function backlogCommand(args = []) {
|
|
|
33
41
|
}
|
|
34
42
|
|
|
35
43
|
const noInteractive = args.includes('--no-interactive') || !process.stdout.isTTY;
|
|
44
|
+
const label = filter ? `${project} (filter: ${filter})` : project;
|
|
36
45
|
|
|
37
46
|
if (noInteractive) {
|
|
38
|
-
printPlain({ project, stories, taskFiles });
|
|
47
|
+
printPlain({ project: label, stories, taskFiles });
|
|
39
48
|
} else {
|
|
40
|
-
await startViewer({ project, stories, taskFiles });
|
|
49
|
+
await startViewer({ project: label, stories, taskFiles });
|
|
41
50
|
}
|
|
42
51
|
}
|
|
43
52
|
|
|
@@ -45,8 +54,12 @@ function showHelp() {
|
|
|
45
54
|
console.log('📋 PWN Backlog Viewer\n');
|
|
46
55
|
console.log('Usage: pwn backlog [options]\n');
|
|
47
56
|
console.log('Options:');
|
|
57
|
+
console.log(' --filter <pattern> Filter stories by ID or title (regex, case-insensitive)');
|
|
48
58
|
console.log(' --no-interactive Plain text output (for CI/piping)');
|
|
49
59
|
console.log(' --help, -h Show this help\n');
|
|
60
|
+
console.log('Examples:');
|
|
61
|
+
console.log(' pwn backlog --filter SEC # Only SEC-* stories');
|
|
62
|
+
console.log(' pwn backlog --filter "API|AUTH" # Stories matching API or AUTH\n');
|
|
50
63
|
console.log('Keybindings (list view):');
|
|
51
64
|
console.log(' ↑/k Move up');
|
|
52
65
|
console.log(' ↓/j Move down');
|
package/cli/batch.js
CHANGED
|
@@ -103,6 +103,8 @@ function parseRunOptions(args) {
|
|
|
103
103
|
options.dryRun = true;
|
|
104
104
|
} else if (arg === '--phase') {
|
|
105
105
|
options.phase = args[++i];
|
|
106
|
+
} else if (arg === '--filter') {
|
|
107
|
+
options.filter = args[++i];
|
|
106
108
|
} else if (arg === '--no-plan') {
|
|
107
109
|
options.noPlan = true;
|
|
108
110
|
} else if (arg === '--rate-limit-wait') {
|
|
@@ -410,6 +412,7 @@ function showHelp() {
|
|
|
410
412
|
console.log(' --continue Continue on errors');
|
|
411
413
|
console.log(' --no-commit Skip auto-commit');
|
|
412
414
|
console.log(' --no-branch Skip branch creation');
|
|
415
|
+
console.log(' --filter <pattern> Filter tasks by ID or title (regex, case-insensitive)');
|
|
413
416
|
console.log(' --no-plan Skip planning phase (use fixed 10min timeout)');
|
|
414
417
|
console.log(' --rate-limit-wait <s> Seconds to wait on rate limit (default: 1800)');
|
|
415
418
|
console.log(' --help, -h Show this help\n');
|
|
@@ -418,6 +421,8 @@ function showHelp() {
|
|
|
418
421
|
console.log(' pwn batch run # Run autonomous batch loop');
|
|
419
422
|
console.log(' pwn batch run --dry-run # Preview next story');
|
|
420
423
|
console.log(' pwn batch run --phase 3 # Run specific phase');
|
|
424
|
+
console.log(' pwn batch run --filter ORCH # Run only ORCH-* tasks');
|
|
425
|
+
console.log(' pwn batch run -f "UNI|DATA" # Run UNI or DATA tasks');
|
|
421
426
|
console.log(' pwn batch run --no-plan # Skip planning, fixed timeout');
|
|
422
427
|
console.log(' pwn batch tasks # List all task files');
|
|
423
428
|
console.log(' pwn batch tasks --failed # Show only failed tasks');
|
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Two-phase execution model:
|
|
5
5
|
* Phase 1: Planning — Claude estimates time and creates action plan
|
|
6
|
-
* Phase 2: Execution — Dynamic timeout based on estimate +
|
|
6
|
+
* Phase 2: Execution — Dynamic timeout based on estimate + 20%
|
|
7
7
|
*
|
|
8
8
|
* Task files (.ai/batch/tasks/{US-ID}.json) track status per story.
|
|
9
9
|
* Completed files are cleaned up at the end; failed are kept for review.
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
import { spawn } from 'child_process';
|
|
13
13
|
import { existsSync, readFileSync, writeFileSync, mkdirSync, createWriteStream, appendFileSync, unlinkSync, readdirSync } from 'fs';
|
|
14
14
|
import { join } from 'path';
|
|
15
|
-
import { tmpdir } from 'os';
|
|
16
15
|
import ora from 'ora';
|
|
17
16
|
import chalk from 'chalk';
|
|
18
17
|
import {
|
|
@@ -27,9 +26,17 @@ import {
|
|
|
27
26
|
} from './batch-service.js';
|
|
28
27
|
|
|
29
28
|
// --- Constants ---
|
|
30
|
-
const RUNNER_VERSION = '2.
|
|
31
|
-
const DEFAULT_TIMEOUT_MS =
|
|
32
|
-
const MIN_TIMEOUT_MS =
|
|
29
|
+
const RUNNER_VERSION = '2.1';
|
|
30
|
+
const DEFAULT_TIMEOUT_MS = 900_000; // 15 minutes fallback
|
|
31
|
+
const MIN_TIMEOUT_MS = 300_000; // 5 minutes minimum (claude init ~30-40s + real work)
|
|
32
|
+
|
|
33
|
+
// Complexity → timeout mapping (based on real-world execution data)
|
|
34
|
+
// AI is bad at estimating seconds but decent at classifying complexity
|
|
35
|
+
const COMPLEXITY_TIMEOUT = {
|
|
36
|
+
low: { seconds: 300, label: '5m' }, // config change, small fix
|
|
37
|
+
medium: { seconds: 600, label: '10m' }, // new function, 1-3 files
|
|
38
|
+
high: { seconds: 900, label: '15m' }, // new module, multi-file refactor
|
|
39
|
+
};
|
|
33
40
|
const PLAN_TIMEOUT_MS = 120_000; // 2 minutes for planning phase (claude init ~30s)
|
|
34
41
|
const DEFAULT_RATE_LIMIT_WAIT = 1800; // 30 minutes (seconds)
|
|
35
42
|
const MAX_RETRIES = 2;
|
|
@@ -99,7 +106,7 @@ async function waitForRateLimit(waitSeconds, attempt) {
|
|
|
99
106
|
|
|
100
107
|
// --- Planning Phase ---
|
|
101
108
|
|
|
102
|
-
function buildPlanPrompt(task, cwd) {
|
|
109
|
+
function buildPlanPrompt(task, cwd, replanContext = null) {
|
|
103
110
|
const prdPath = join(cwd, '.ai', 'tasks', 'prd.json');
|
|
104
111
|
const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
|
|
105
112
|
const story = prd.stories.find(s => s.id === task.id);
|
|
@@ -107,42 +114,49 @@ function buildPlanPrompt(task, cwd) {
|
|
|
107
114
|
|
|
108
115
|
const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
|
|
109
116
|
|
|
117
|
+
let replanSection = '';
|
|
118
|
+
if (replanContext) {
|
|
119
|
+
replanSection = `
|
|
120
|
+
|
|
121
|
+
IMPORTANT — REPLANNING: A previous attempt FAILED.
|
|
122
|
+
- Previous complexity: ${replanContext.previousComplexity}
|
|
123
|
+
- Failure reason: ${replanContext.failureReason}
|
|
124
|
+
Re-evaluate the complexity. If it timed out, it's likely MORE complex than you initially thought.`;
|
|
125
|
+
}
|
|
126
|
+
|
|
110
127
|
return `You are analyzing task ${task.id}: ${task.title}
|
|
111
128
|
|
|
112
129
|
Acceptance criteria:
|
|
113
130
|
${acList}
|
|
114
131
|
|
|
115
132
|
Notes: ${story.notes || 'None'}
|
|
116
|
-
|
|
133
|
+
${replanSection}
|
|
117
134
|
Analyze this task and respond with ONLY a JSON object (no markdown, no code fences):
|
|
118
135
|
{
|
|
119
|
-
"estimated_time_seconds": <number>,
|
|
120
136
|
"plan": ["step 1", "step 2", ...],
|
|
121
137
|
"complexity": "low|medium|high",
|
|
122
138
|
"recommended_model": "opus|sonnet|haiku",
|
|
123
139
|
"files_likely_affected": ["path1", "path2"]
|
|
124
140
|
}
|
|
125
141
|
|
|
126
|
-
|
|
127
|
-
- low
|
|
128
|
-
- medium
|
|
129
|
-
- high
|
|
142
|
+
Classify complexity:
|
|
143
|
+
- "low": config change, small fix, single file tweak
|
|
144
|
+
- "medium": new function, simple feature, 1-3 files
|
|
145
|
+
- "high": new module, multi-file refactor, architecture change, 4+ files
|
|
130
146
|
|
|
131
147
|
Recommend a model for execution:
|
|
132
|
-
- "haiku": trivial (config change, typo, single-line fix)
|
|
133
|
-
- "sonnet": low-medium complexity (new function, simple feature, 1-3 files)
|
|
134
|
-
- "opus": high complexity (new module, multi-file refactor, architecture)
|
|
148
|
+
- "haiku": trivial (config change, typo, single-line fix)
|
|
149
|
+
- "sonnet": low-medium complexity (new function, simple feature, 1-3 files)
|
|
150
|
+
- "opus": high complexity (new module, multi-file refactor, architecture)`;
|
|
135
151
|
}
|
|
136
152
|
|
|
137
|
-
async function planTask(task, cwd) {
|
|
138
|
-
const prompt = buildPlanPrompt(task, cwd);
|
|
153
|
+
async function planTask(task, cwd, replanContext = null) {
|
|
154
|
+
const prompt = buildPlanPrompt(task, cwd, replanContext);
|
|
139
155
|
if (!prompt) return null;
|
|
140
156
|
|
|
141
|
-
const promptFile = join(tmpdir(), `pwn-plan-${Date.now()}.md`);
|
|
142
|
-
writeFileSync(promptFile, prompt);
|
|
143
|
-
|
|
144
157
|
const env = { ...process.env };
|
|
145
|
-
|
|
158
|
+
// Remove all Claude session markers to avoid "nested session" detection
|
|
159
|
+
Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
|
|
146
160
|
|
|
147
161
|
return new Promise((resolve) => {
|
|
148
162
|
let output = '';
|
|
@@ -150,13 +164,17 @@ async function planTask(task, cwd) {
|
|
|
150
164
|
// Planning uses --print WITHOUT --dangerously-skip-permissions (read-only)
|
|
151
165
|
const child = spawn('bash', [
|
|
152
166
|
'-c',
|
|
153
|
-
`claude --model opus --print -p "$(cat
|
|
167
|
+
`claude --model opus --print -p "$(cat)"`,
|
|
154
168
|
], {
|
|
155
169
|
cwd,
|
|
156
|
-
stdio: ['
|
|
170
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
157
171
|
env,
|
|
158
172
|
});
|
|
159
173
|
|
|
174
|
+
// Pass prompt via stdin
|
|
175
|
+
child.stdin.write(prompt);
|
|
176
|
+
child.stdin.end();
|
|
177
|
+
|
|
160
178
|
const timeoutId = setTimeout(() => {
|
|
161
179
|
child.kill('SIGTERM');
|
|
162
180
|
}, PLAN_TIMEOUT_MS);
|
|
@@ -166,7 +184,6 @@ async function planTask(task, cwd) {
|
|
|
166
184
|
|
|
167
185
|
child.on('close', (code, signal) => {
|
|
168
186
|
clearTimeout(timeoutId);
|
|
169
|
-
try { unlinkSync(promptFile); } catch {}
|
|
170
187
|
|
|
171
188
|
if (signal) {
|
|
172
189
|
console.log(chalk.dim(` Planning killed by ${signal} (timeout=${signal === 'SIGTERM' ? 'likely' : 'no'})`));
|
|
@@ -195,7 +212,6 @@ async function planTask(task, cwd) {
|
|
|
195
212
|
|
|
196
213
|
child.on('error', (err) => {
|
|
197
214
|
clearTimeout(timeoutId);
|
|
198
|
-
try { unlinkSync(promptFile); } catch {}
|
|
199
215
|
console.log(chalk.dim(` Planning spawn error: ${err.message}`));
|
|
200
216
|
resolve(null);
|
|
201
217
|
});
|
|
@@ -204,8 +220,7 @@ async function planTask(task, cwd) {
|
|
|
204
220
|
|
|
205
221
|
function computeTimeout(estimatedSeconds) {
|
|
206
222
|
if (!estimatedSeconds || estimatedSeconds <= 0) return DEFAULT_TIMEOUT_MS;
|
|
207
|
-
|
|
208
|
-
return Math.max(withMargin, MIN_TIMEOUT_MS);
|
|
223
|
+
return Math.max(Math.ceil(estimatedSeconds * 1.05) * 1000, MIN_TIMEOUT_MS);
|
|
209
224
|
}
|
|
210
225
|
|
|
211
226
|
/**
|
|
@@ -235,16 +250,27 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
235
250
|
const stories = parsePrdTasks(cwd);
|
|
236
251
|
const totalStories = stories.length;
|
|
237
252
|
const doneAtStart = stories.filter(s => s.passes).length;
|
|
238
|
-
const maxIterations = options.maxIterations || 20;
|
|
239
253
|
const phaseFilter = options.phase ? `Phase ${options.phase}` : undefined;
|
|
254
|
+
const taskFilter = options.filter || null;
|
|
255
|
+
|
|
256
|
+
// Count remaining eligible stories (respecting filters)
|
|
257
|
+
const doneIds = stories.filter(s => s.passes).map(s => s.id);
|
|
258
|
+
const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
|
|
259
|
+
const eligibleCount = stories.filter(s =>
|
|
260
|
+
!s.passes &&
|
|
261
|
+
s.dependencies.every(dep => doneIds.includes(dep)) &&
|
|
262
|
+
(!phaseFilter || s.phase === phaseFilter) &&
|
|
263
|
+
(!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
|
|
264
|
+
).length;
|
|
265
|
+
const maxIterations = options.maxIterations || eligibleCount;
|
|
240
266
|
|
|
241
267
|
// --- Dry run ---
|
|
242
268
|
if (options.dryRun) {
|
|
243
|
-
return dryRunPreview(cwd, phaseFilter, maxIterations);
|
|
269
|
+
return dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter);
|
|
244
270
|
}
|
|
245
271
|
|
|
246
272
|
// --- Print header ---
|
|
247
|
-
printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd);
|
|
273
|
+
printHeader(maxIterations, phaseFilter, totalStories, doneAtStart, noPlan, cwd, taskFilter);
|
|
248
274
|
|
|
249
275
|
// NO custom SIGINT handler — Ctrl+C uses default Node.js behavior (kills process group)
|
|
250
276
|
|
|
@@ -264,7 +290,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
264
290
|
while (iteration < maxIterations) {
|
|
265
291
|
iteration++;
|
|
266
292
|
|
|
267
|
-
const task = selectNextTask(cwd, { phase: phaseFilter });
|
|
293
|
+
const task = selectNextTask(cwd, { phase: phaseFilter, filter: taskFilter });
|
|
268
294
|
if (!task) {
|
|
269
295
|
console.log(chalk.green('\nAll eligible stories completed!'));
|
|
270
296
|
break;
|
|
@@ -282,8 +308,9 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
282
308
|
if (!noPlan) {
|
|
283
309
|
const existing = loadTaskFile(task.id, cwd);
|
|
284
310
|
|
|
285
|
-
|
|
286
|
-
|
|
311
|
+
const needsReplan = existing && existing.last_failure_type && existing.status !== 'completed';
|
|
312
|
+
if (existing && existing.status === 'planned' && existing.complexity !== 'unknown' && !needsReplan) {
|
|
313
|
+
// Reuse previous plan (only if it hasn't failed before)
|
|
287
314
|
taskFile = existing;
|
|
288
315
|
taskTimeoutMs = computeTimeout(existing.estimated_time_seconds);
|
|
289
316
|
console.log(chalk.dim(` Phase 1: Reusing plan for ${task.id} (${existing.complexity}, ~${formatDuration(existing.estimated_time_seconds)}, model: ${existing.recommended_model || 'sonnet'})`));
|
|
@@ -294,9 +321,11 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
294
321
|
const planResult = await planTask(task, cwd);
|
|
295
322
|
|
|
296
323
|
if (planResult) {
|
|
297
|
-
const
|
|
298
|
-
const
|
|
299
|
-
|
|
324
|
+
const complexity = planResult.complexity || 'medium';
|
|
325
|
+
const tier = COMPLEXITY_TIMEOUT[complexity] || COMPLEXITY_TIMEOUT.medium;
|
|
326
|
+
const estimatedSeconds = tier.seconds;
|
|
327
|
+
const timeoutSeconds = Math.ceil(estimatedSeconds * 1.05);
|
|
328
|
+
taskTimeoutMs = timeoutSeconds * 1000;
|
|
300
329
|
|
|
301
330
|
const recommendedModel = planResult.recommended_model || 'sonnet';
|
|
302
331
|
taskFile = {
|
|
@@ -306,7 +335,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
306
335
|
estimated_time_seconds: estimatedSeconds,
|
|
307
336
|
timeout_seconds: timeoutSeconds,
|
|
308
337
|
plan: planResult.plan || [],
|
|
309
|
-
complexity
|
|
338
|
+
complexity,
|
|
310
339
|
recommended_model: recommendedModel,
|
|
311
340
|
files_likely_affected: planResult.files_likely_affected || [],
|
|
312
341
|
created_at: new Date().toISOString(),
|
|
@@ -316,7 +345,7 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
316
345
|
saveTaskFile(taskFile, cwd);
|
|
317
346
|
|
|
318
347
|
planSpinner.succeed(chalk.green(
|
|
319
|
-
`Planned: ${
|
|
348
|
+
`Planned: ${complexity} complexity, timeout ${tier.label}, model: ${recommendedModel}`
|
|
320
349
|
));
|
|
321
350
|
} else {
|
|
322
351
|
// Fallback when planning fails
|
|
@@ -358,6 +387,10 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
358
387
|
}
|
|
359
388
|
|
|
360
389
|
const prompt = buildPrompt(task.id, cwd, prdPath, promptPath, errorContext);
|
|
390
|
+
if (!prompt) {
|
|
391
|
+
console.log(chalk.red(` Cannot build prompt for ${task.id} — skipping`));
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
361
394
|
const logFile = join(logDir, `${task.id}_${timestamp()}.log`);
|
|
362
395
|
|
|
363
396
|
const estimatedSeconds = taskFile?.estimated_time_seconds || null;
|
|
@@ -380,8 +413,41 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
380
413
|
await waitForRateLimit(rateLimitWait, rateLimitAttempts);
|
|
381
414
|
continue;
|
|
382
415
|
}
|
|
383
|
-
|
|
384
|
-
|
|
416
|
+
|
|
417
|
+
const prevTimeout = Math.round(taskTimeoutMs / 1000);
|
|
418
|
+
console.log(chalk.yellow(` Timed out after ${prevTimeout}s`));
|
|
419
|
+
|
|
420
|
+
// --- Replan on timeout: bump complexity tier ---
|
|
421
|
+
if (taskFile) {
|
|
422
|
+
const prevComplexity = taskFile.complexity;
|
|
423
|
+
const prevEstimate = taskFile.estimated_time_seconds;
|
|
424
|
+
|
|
425
|
+
// Escalate complexity: low → medium → high, high stays high but doubles
|
|
426
|
+
const escalation = { low: 'medium', medium: 'high' };
|
|
427
|
+
const newComplexity = escalation[prevComplexity] || 'high';
|
|
428
|
+
const tier = COMPLEXITY_TIMEOUT[newComplexity] || COMPLEXITY_TIMEOUT.high;
|
|
429
|
+
|
|
430
|
+
// If already high, double the previous timeout
|
|
431
|
+
const newEstimate = prevComplexity === 'high'
|
|
432
|
+
? prevEstimate * 2
|
|
433
|
+
: tier.seconds;
|
|
434
|
+
const newTimeout = Math.ceil(newEstimate * 1.05);
|
|
435
|
+
taskTimeoutMs = newTimeout * 1000;
|
|
436
|
+
|
|
437
|
+
taskFile.estimated_time_seconds = newEstimate;
|
|
438
|
+
taskFile.timeout_seconds = newTimeout;
|
|
439
|
+
taskFile.complexity = newComplexity;
|
|
440
|
+
taskFile.recommended_model = 'opus'; // upgrade model on timeout
|
|
441
|
+
taskFile.replanned_at = new Date().toISOString();
|
|
442
|
+
taskFile.replan_reason = `timeout after ${prevTimeout}s (${prevComplexity} → ${newComplexity})`;
|
|
443
|
+
saveTaskFile(taskFile, cwd);
|
|
444
|
+
|
|
445
|
+
console.log(chalk.blue(
|
|
446
|
+
` Escalated: ${prevComplexity} → ${newComplexity}, timeout ${formatDuration(newTimeout)}, model: opus`
|
|
447
|
+
));
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
errorContext = `Session timed out after ${prevTimeout}s. The task was replanned with more time. Focus on core acceptance criteria first, then iterate.`;
|
|
385
451
|
retry++;
|
|
386
452
|
continue;
|
|
387
453
|
}
|
|
@@ -397,6 +463,14 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
397
463
|
}
|
|
398
464
|
console.log(chalk.yellow(` Claude exited with code ${result.exitCode}`));
|
|
399
465
|
errorContext = `Claude session failed with exit code ${result.exitCode}.`;
|
|
466
|
+
|
|
467
|
+
// Save error output snippet to task file for debugging
|
|
468
|
+
if (taskFile) {
|
|
469
|
+
taskFile.last_error_output = (result.output || '').slice(-2000);
|
|
470
|
+
taskFile.last_failure_type = 'crash';
|
|
471
|
+
saveTaskFile(taskFile, cwd);
|
|
472
|
+
}
|
|
473
|
+
|
|
400
474
|
retry++;
|
|
401
475
|
continue;
|
|
402
476
|
}
|
|
@@ -433,13 +507,22 @@ export async function runBatch(options = {}, cwd = process.cwd()) {
|
|
|
433
507
|
} else {
|
|
434
508
|
console.log(chalk.red(` Quality gates FAILED`));
|
|
435
509
|
errorContext = gatesResult.errorOutput;
|
|
510
|
+
|
|
511
|
+
// Save gate failure details to task file
|
|
512
|
+
if (taskFile) {
|
|
513
|
+
taskFile.last_error_output = (gatesResult.errorOutput || '').slice(-2000);
|
|
514
|
+
taskFile.last_failure_type = 'quality_gate';
|
|
515
|
+
saveTaskFile(taskFile, cwd);
|
|
516
|
+
}
|
|
517
|
+
|
|
436
518
|
retry++;
|
|
437
519
|
}
|
|
438
520
|
}
|
|
439
521
|
|
|
440
522
|
if (!storyDone) {
|
|
441
|
-
|
|
442
|
-
|
|
523
|
+
const failureType = taskFile?.last_failure_type || 'unknown';
|
|
524
|
+
console.log(chalk.red(` FAILED: ${task.id} after ${MAX_RETRIES} retries (${failureType})`));
|
|
525
|
+
appendProgress(progressPath, task.id, `FAILED after ${MAX_RETRIES} retries (${failureType}). Skipping.`);
|
|
443
526
|
|
|
444
527
|
// Update task file with failure
|
|
445
528
|
if (taskFile) {
|
|
@@ -474,23 +557,24 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
|
|
|
474
557
|
const logStream = createWriteStream(logFile);
|
|
475
558
|
const startTime = Date.now();
|
|
476
559
|
|
|
477
|
-
// Write prompt to temp file
|
|
478
|
-
const promptFile = join(tmpdir(), `pwn-prompt-${Date.now()}.md`);
|
|
479
|
-
writeFileSync(promptFile, prompt);
|
|
480
|
-
|
|
481
560
|
const env = { ...process.env };
|
|
482
|
-
|
|
561
|
+
// Remove all Claude session markers to avoid "nested session" detection
|
|
562
|
+
Object.keys(env).forEach(k => { if (k.startsWith('CLAUDE')) delete env[k]; });
|
|
483
563
|
|
|
484
564
|
const modelFlag = model ? `--model ${model} ` : '';
|
|
485
565
|
const child = spawn('bash', [
|
|
486
566
|
'-c',
|
|
487
|
-
`claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat
|
|
567
|
+
`claude ${modelFlag}--print --dangerously-skip-permissions -p "$(cat)"`,
|
|
488
568
|
], {
|
|
489
569
|
cwd,
|
|
490
|
-
stdio: ['
|
|
570
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
491
571
|
env,
|
|
492
572
|
});
|
|
493
573
|
|
|
574
|
+
// Pass prompt via stdin to avoid shell escaping issues and file race conditions
|
|
575
|
+
child.stdin.write(prompt);
|
|
576
|
+
child.stdin.end();
|
|
577
|
+
|
|
494
578
|
const modelLabel = model ? chalk.magenta(model) : chalk.dim('default');
|
|
495
579
|
console.log(chalk.dim(` Log: tail -f ${logFile}`));
|
|
496
580
|
console.log(chalk.dim(` PID: ${child.pid} | Model: `) + modelLabel + chalk.dim(` | Prompt: ${prompt.length} chars | Timeout: ${formatDuration(Math.round(timeoutMs / 1000))}`));
|
|
@@ -532,13 +616,14 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
|
|
|
532
616
|
clearTimeout(timeoutId);
|
|
533
617
|
clearInterval(timer);
|
|
534
618
|
logStream.end();
|
|
535
|
-
try { unlinkSync(promptFile); } catch {}
|
|
536
619
|
|
|
537
620
|
// Clear spinner line
|
|
538
621
|
process.stdout.write('\r\x1b[K');
|
|
539
622
|
|
|
540
623
|
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
541
|
-
|
|
624
|
+
// Timeout detection: SIGTERM signal, OR exit code 143 (128+SIGTERM) near the timeout boundary
|
|
625
|
+
const nearTimeout = elapsed >= Math.floor(timeoutMs / 1000) - 2;
|
|
626
|
+
const timedOut = nearTimeout && (signal === 'SIGTERM' || code === 143);
|
|
542
627
|
|
|
543
628
|
if (signal && !timedOut) {
|
|
544
629
|
console.log(chalk.dim(` Claude killed (${signal}) after ${formatDuration(elapsed)}`));
|
|
@@ -554,7 +639,6 @@ function spawnClaude(prompt, task, iteration, maxIter, done, total, phase, logFi
|
|
|
554
639
|
clearTimeout(timeoutId);
|
|
555
640
|
clearInterval(timer);
|
|
556
641
|
logStream.end();
|
|
557
|
-
try { unlinkSync(promptFile); } catch {}
|
|
558
642
|
process.stdout.write('\r\x1b[K');
|
|
559
643
|
console.log(chalk.red(` Spawn error: ${err.message}`));
|
|
560
644
|
resolve({ exitCode: 1, output: '', timedOut: false, signal: null });
|
|
@@ -599,7 +683,10 @@ async function runGatesWithStatus(cwd) {
|
|
|
599
683
|
function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
|
|
600
684
|
const prd = JSON.parse(readFileSync(prdPath, 'utf8'));
|
|
601
685
|
const story = prd.stories.find(s => s.id === storyId);
|
|
602
|
-
if (!story)
|
|
686
|
+
if (!story) {
|
|
687
|
+
console.log(chalk.yellow(` Warning: story ${storyId} not found in prd.json — may have been modified during execution`));
|
|
688
|
+
return '';
|
|
689
|
+
}
|
|
603
690
|
|
|
604
691
|
const doneIds = prd.stories.filter(s => s.passes).map(s => s.id);
|
|
605
692
|
const acList = (story.acceptance_criteria || []).map(ac => `- ${ac}`).join('\n') || 'None';
|
|
@@ -632,7 +719,7 @@ function buildPrompt(storyId, cwd, prdPath, promptPath, extraContext) {
|
|
|
632
719
|
/**
|
|
633
720
|
* Dry run: show formatted preview of eligible tasks.
|
|
634
721
|
*/
|
|
635
|
-
function dryRunPreview(cwd, phaseFilter, maxIterations) {
|
|
722
|
+
function dryRunPreview(cwd, phaseFilter, maxIterations, taskFilter = null) {
|
|
636
723
|
const stories = parsePrdTasks(cwd);
|
|
637
724
|
const total = stories.length;
|
|
638
725
|
const done = stories.filter(s => s.passes).length;
|
|
@@ -642,12 +729,15 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
|
|
|
642
729
|
console.log(` Progress: ${chalk.green(done)}/${total} done, ${chalk.yellow(remaining)} remaining`);
|
|
643
730
|
console.log(` Max iterations: ${maxIterations}`);
|
|
644
731
|
if (phaseFilter) console.log(` Phase filter: ${chalk.blue(phaseFilter)}`);
|
|
732
|
+
if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
|
|
645
733
|
|
|
646
734
|
const doneIds = stories.filter(s => s.passes).map(s => s.id);
|
|
735
|
+
const filterRe = taskFilter ? new RegExp(taskFilter, 'i') : null;
|
|
647
736
|
const eligible = stories.filter(s =>
|
|
648
737
|
!s.passes &&
|
|
649
738
|
s.dependencies.every(dep => doneIds.includes(dep)) &&
|
|
650
|
-
(!phaseFilter || s.phase === phaseFilter)
|
|
739
|
+
(!phaseFilter || s.phase === phaseFilter) &&
|
|
740
|
+
(!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
|
|
651
741
|
);
|
|
652
742
|
|
|
653
743
|
if (eligible.length === 0) {
|
|
@@ -683,7 +773,7 @@ function dryRunPreview(cwd, phaseFilter, maxIterations) {
|
|
|
683
773
|
console.log(chalk.dim('\n Run without --dry-run to execute.\n'));
|
|
684
774
|
}
|
|
685
775
|
|
|
686
|
-
function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd()) {
|
|
776
|
+
function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.cwd(), taskFilter = null) {
|
|
687
777
|
const config = loadConfig(cwd);
|
|
688
778
|
const skipGates = config.skip_gates || [];
|
|
689
779
|
|
|
@@ -694,6 +784,7 @@ function printHeader(maxIter, phase, total, done, noPlan = false, cwd = process.
|
|
|
694
784
|
console.log(` Progress: ${chalk.green(done)}/${total} done`);
|
|
695
785
|
console.log(` Planning: ${noPlan ? chalk.yellow('disabled') : chalk.green('enabled')}`);
|
|
696
786
|
if (phase) console.log(` Phase filter: ${chalk.blue(phase)}`);
|
|
787
|
+
if (taskFilter) console.log(` Task filter: ${chalk.blue(taskFilter)}`);
|
|
697
788
|
if (skipGates.length > 0) {
|
|
698
789
|
console.log(` ${chalk.yellow('⚠️ Skipping gates (no tooling):')} ${skipGates.join(', ')}`);
|
|
699
790
|
}
|
|
@@ -392,9 +392,14 @@ export function convertBacklogToPrd(backlogContent, projectName = 'my-project')
|
|
|
392
392
|
* @returns {object|null} Selected task or null
|
|
393
393
|
*/
|
|
394
394
|
export function selectNextTask(cwd = process.cwd(), options = {}) {
|
|
395
|
+
const filterRe = options.filter ? new RegExp(options.filter, 'i') : null;
|
|
396
|
+
|
|
395
397
|
// First check active tasks for incomplete ones
|
|
396
398
|
const activeTasks = parseActiveTasks(cwd);
|
|
397
|
-
const pendingActive = activeTasks.filter(t =>
|
|
399
|
+
const pendingActive = activeTasks.filter(t =>
|
|
400
|
+
!t.completed && !t.blockedBy &&
|
|
401
|
+
(!filterRe || filterRe.test(t.id) || filterRe.test(t.title))
|
|
402
|
+
);
|
|
398
403
|
|
|
399
404
|
if (pendingActive.length > 0) {
|
|
400
405
|
return pendingActive[0];
|
|
@@ -407,7 +412,8 @@ export function selectNextTask(cwd = process.cwd(), options = {}) {
|
|
|
407
412
|
const eligible = stories.find(s =>
|
|
408
413
|
!s.passes &&
|
|
409
414
|
s.dependencies.every(dep => doneIds.includes(dep)) &&
|
|
410
|
-
(!options.phase || s.phase === options.phase)
|
|
415
|
+
(!options.phase || s.phase === options.phase) &&
|
|
416
|
+
(!filterRe || filterRe.test(s.id) || filterRe.test(s.title))
|
|
411
417
|
);
|
|
412
418
|
|
|
413
419
|
return eligible || null;
|
|
@@ -111,6 +111,26 @@ Defines:
|
|
|
111
111
|
- Commit patterns
|
|
112
112
|
- Completion signals
|
|
113
113
|
|
|
114
|
+
### Writing stories for `prd.json`
|
|
115
|
+
|
|
116
|
+
Stories run with `--dangerously-skip-permissions` — the agent has full access. Write defensively.
|
|
117
|
+
|
|
118
|
+
**Never put these in batch stories:**
|
|
119
|
+
- Destructive git ops (`git filter-repo`, `BFG`, `push --force`, history rewriting)
|
|
120
|
+
- Destructive file ops (`rm -rf`, wiping directories)
|
|
121
|
+
- Database ops (`DROP TABLE`, prod migrations)
|
|
122
|
+
- Secret rotation (revoking keys, rotating credentials)
|
|
123
|
+
- External side effects (sending emails, creating PRs, publishing packages)
|
|
124
|
+
|
|
125
|
+
**Rule of thumb**: if a mistake needs human intervention to fix, it's not a batch story.
|
|
126
|
+
|
|
127
|
+
**Instead**, ask the agent to **prepare and document** — write the script, the docs, the config — but let a human execute the dangerous part.
|
|
128
|
+
|
|
129
|
+
**Always include in `notes`** what the agent must NOT do:
|
|
130
|
+
```json
|
|
131
|
+
"notes": "Do NOT run git-filter-repo. Do NOT modify prd.json."
|
|
132
|
+
```
|
|
133
|
+
|
|
114
134
|
## 🤖 Agents
|
|
115
135
|
|
|
116
136
|
### agent/claude.md
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
You are working on this project autonomously as part of a batch execution run.
|
|
2
|
+
|
|
3
|
+
## Project Context
|
|
4
|
+
- Read CLAUDE.md (or .ai/agents/claude.md) for full project instructions and conventions
|
|
5
|
+
- Read .ai/memory/decisions.md for architectural decisions
|
|
6
|
+
- Read .ai/memory/patterns.md for established patterns
|
|
7
|
+
- Read .ai/batch/progress.txt for learnings from previous iterations
|
|
8
|
+
|
|
9
|
+
## Current Task
|
|
10
|
+
**{STORY_ID}**: {STORY_TITLE}
|
|
11
|
+
|
|
12
|
+
### Acceptance Criteria
|
|
13
|
+
{ACCEPTANCE_CRITERIA}
|
|
14
|
+
|
|
15
|
+
### Notes
|
|
16
|
+
{NOTES}
|
|
17
|
+
|
|
18
|
+
### Dependencies (already implemented)
|
|
19
|
+
{DEPENDENCIES}
|
|
20
|
+
|
|
21
|
+
## Instructions
|
|
22
|
+
1. Explore the codebase to understand existing patterns for similar features
|
|
23
|
+
2. Implement the feature following existing conventions
|
|
24
|
+
3. Write comprehensive tests (see tests/ for patterns)
|
|
25
|
+
4. Run quality gates and fix any failures before committing
|
|
26
|
+
5. Commit with: feat({STORY_ID}): {short description}
|
|
27
|
+
6. Update .ai/tasks/active.md marking this task as done with today's date
|
|
28
|
+
|
|
29
|
+
## Important
|
|
30
|
+
- Do NOT push to remote
|
|
31
|
+
- Do NOT modify unrelated files
|
|
32
|
+
- Do NOT edit .ai/tasks/prd.json or batch configuration files
|
|
33
|
+
- Do NOT run destructive operations (git filter-repo, rm -rf, DROP TABLE, force push)
|
|
34
|
+
- Do NOT rotate secrets, revoke keys, or modify credentials — those are human tasks
|
|
35
|
+
- Follow existing patterns exactly
|
|
36
|
+
- If you discover useful patterns, note them for progress.txt
|