@hyperdrive.bot/bmad-workflow 1.0.11 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/epics/create.d.ts +3 -0
- package/dist/commands/epics/create.js +15 -4
- package/dist/commands/stories/create.d.ts +3 -0
- package/dist/commands/stories/create.js +12 -4
- package/dist/commands/stories/develop.d.ts +3 -0
- package/dist/commands/stories/develop.js +12 -9
- package/dist/commands/stories/qa.d.ts +3 -0
- package/dist/commands/stories/qa.js +37 -11
- package/dist/commands/workflow.d.ts +3 -0
- package/dist/commands/workflow.js +15 -0
- package/dist/models/workflow-config.d.ts +15 -0
- package/dist/services/orchestration/workflow-orchestrator.js +32 -11
- package/dist/utils/retry.d.ts +29 -0
- package/dist/utils/retry.js +48 -0
- package/dist/utils/shared-flags.d.ts +3 -0
- package/dist/utils/shared-flags.js +15 -0
- package/package.json +1 -1
|
@@ -40,6 +40,9 @@ export default class EpicsCreate extends Command {
|
|
|
40
40
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
41
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
42
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
44
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
46
|
};
|
|
44
47
|
/**
|
|
45
48
|
* AI agent runner service (Claude or Gemini)
|
|
@@ -22,6 +22,7 @@ import * as colors from '../../utils/colors.js';
|
|
|
22
22
|
import { ValidationError } from '../../utils/errors.js';
|
|
23
23
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
24
24
|
import { createSpinner } from '../../utils/progress.js';
|
|
25
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
25
26
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
26
27
|
/**
|
|
27
28
|
* Epics Create Command
|
|
@@ -136,10 +137,13 @@ export default class EpicsCreate extends Command {
|
|
|
136
137
|
epicDir,
|
|
137
138
|
epics: toCreate,
|
|
138
139
|
interval: flags.interval,
|
|
140
|
+
maxRetries: flags['max-retries'],
|
|
139
141
|
prdPath: args['prd-path'],
|
|
140
142
|
prefix: flags.prefix || '',
|
|
141
143
|
references: flags.reference,
|
|
144
|
+
retryBackoff: flags['retry-backoff'],
|
|
142
145
|
task: flags.task,
|
|
146
|
+
timeout: flags.timeout,
|
|
143
147
|
});
|
|
144
148
|
// Display summary
|
|
145
149
|
this.displaySummary({
|
|
@@ -208,7 +212,7 @@ export default class EpicsCreate extends Command {
|
|
|
208
212
|
* Create epic files using Claude AI agents
|
|
209
213
|
*/
|
|
210
214
|
async createEpics(options) {
|
|
211
|
-
const { agent, epicDir, epics, interval, prdPath, prefix, references, task } = options;
|
|
215
|
+
const { agent, epicDir, epics, interval, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
|
|
212
216
|
const results = [];
|
|
213
217
|
/* eslint-disable no-await-in-loop */
|
|
214
218
|
for (let i = 0; i < epics.length; i++) {
|
|
@@ -217,10 +221,13 @@ export default class EpicsCreate extends Command {
|
|
|
217
221
|
agent,
|
|
218
222
|
epic,
|
|
219
223
|
epicDir,
|
|
224
|
+
maxRetries,
|
|
220
225
|
prdPath,
|
|
221
226
|
prefix,
|
|
222
227
|
references,
|
|
228
|
+
retryBackoff,
|
|
223
229
|
task,
|
|
230
|
+
timeout,
|
|
224
231
|
});
|
|
225
232
|
results.push(result);
|
|
226
233
|
// Wait interval if not last epic
|
|
@@ -235,7 +242,7 @@ export default class EpicsCreate extends Command {
|
|
|
235
242
|
* Create a single epic file
|
|
236
243
|
*/
|
|
237
244
|
async createSingleEpic(options) {
|
|
238
|
-
const { agent, epic, epicDir, prdPath, prefix, references, task } = options;
|
|
245
|
+
const { agent, epic, epicDir, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
|
|
239
246
|
const fileName = this.generateEpicFileName(epic, prefix);
|
|
240
247
|
const filePath = path.join(epicDir, fileName);
|
|
241
248
|
const spinner = createSpinner(`Creating Epic ${epic.number}: ${epic.title}...`);
|
|
@@ -262,10 +269,14 @@ export default class EpicsCreate extends Command {
|
|
|
262
269
|
});
|
|
263
270
|
// Step 3: Run Claude AI agent to populate epic content sections
|
|
264
271
|
spinner.text = `Populating epic ${epic.number} with AI agent...`;
|
|
265
|
-
const result = await this.agentRunner
|
|
272
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
266
273
|
agentType: 'sm',
|
|
267
274
|
references,
|
|
268
|
-
timeout:
|
|
275
|
+
timeout: timeout ?? 2_700_000,
|
|
276
|
+
}, {
|
|
277
|
+
backoffMs: retryBackoff,
|
|
278
|
+
logger: this.logger,
|
|
279
|
+
maxRetries,
|
|
269
280
|
});
|
|
270
281
|
if (!result.success) {
|
|
271
282
|
throw new Error(result.errors || 'Claude agent failed to populate epic');
|
|
@@ -38,6 +38,9 @@ export default class StoriesCreateCommand extends Command {
|
|
|
38
38
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
39
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
40
40
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
44
|
};
|
|
42
45
|
private agentRunner;
|
|
43
46
|
private batchProcessor;
|
|
@@ -24,6 +24,7 @@ import * as colors from '../../utils/colors.js';
|
|
|
24
24
|
import { ValidationError } from '../../utils/errors.js';
|
|
25
25
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
26
26
|
import { createSpinner } from '../../utils/progress.js';
|
|
27
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
27
28
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
28
29
|
/**
|
|
29
30
|
* Stories Create Command
|
|
@@ -220,10 +221,13 @@ export default class StoriesCreateCommand extends Command {
|
|
|
220
221
|
const processor = async (story) => this.createStory({
|
|
221
222
|
agent: flags.agent,
|
|
222
223
|
epicPath,
|
|
224
|
+
maxRetries: flags['max-retries'],
|
|
223
225
|
prefix: flags.prefix,
|
|
224
226
|
references: flags.reference,
|
|
227
|
+
retryBackoff: flags['retry-backoff'],
|
|
225
228
|
story,
|
|
226
229
|
task: flags.task,
|
|
230
|
+
timeout: flags.timeout,
|
|
227
231
|
});
|
|
228
232
|
// Progress callback for batch updates
|
|
229
233
|
const onProgress = (info) => {
|
|
@@ -266,7 +270,7 @@ export default class StoriesCreateCommand extends Command {
|
|
|
266
270
|
* Create a single story file
|
|
267
271
|
*/
|
|
268
272
|
async createStory(options) {
|
|
269
|
-
const { agent, epicPath, prefix, references, story, task } = options;
|
|
273
|
+
const { agent, epicPath, maxRetries, prefix, references, retryBackoff, story, task, timeout } = options;
|
|
270
274
|
const storyDir = this.pathResolver.getStoryDir();
|
|
271
275
|
const filename = this.generateStoryFilename(story, prefix);
|
|
272
276
|
const filePath = path.join(storyDir, filename);
|
|
@@ -290,10 +294,14 @@ export default class StoriesCreateCommand extends Command {
|
|
|
290
294
|
storyFilePath: absolutePath,
|
|
291
295
|
task,
|
|
292
296
|
});
|
|
293
|
-
// Step 3: Run Claude agent
|
|
294
|
-
const result = await this.agentRunner
|
|
297
|
+
// Step 3: Run Claude agent with retry on timeout/killed
|
|
298
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
295
299
|
agentType: 'sm',
|
|
296
|
-
timeout:
|
|
300
|
+
timeout: timeout ?? 2_700_000,
|
|
301
|
+
}, {
|
|
302
|
+
backoffMs: retryBackoff,
|
|
303
|
+
logger: this.logger,
|
|
304
|
+
maxRetries,
|
|
297
305
|
});
|
|
298
306
|
// Step 4: Verify file was updated
|
|
299
307
|
if (result.success) {
|
|
@@ -40,6 +40,9 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
40
40
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
41
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
42
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
44
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
46
|
};
|
|
44
47
|
private agentRunner;
|
|
45
48
|
private fileManager;
|
|
@@ -25,12 +25,8 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
|
|
|
25
25
|
import * as colors from '../../utils/colors.js';
|
|
26
26
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
27
27
|
import { createSpinner } from '../../utils/progress.js';
|
|
28
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
28
29
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
29
|
-
/**
|
|
30
|
-
* Agent timeout in milliseconds (30 minutes)
|
|
31
|
-
* Dev agents need sufficient time for comprehensive implementation
|
|
32
|
-
*/
|
|
33
|
-
const DEV_AGENT_TIMEOUT_MS = 1_800_000;
|
|
34
30
|
/**
|
|
35
31
|
* Stories Develop Command
|
|
36
32
|
*
|
|
@@ -251,10 +247,13 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
251
247
|
const result = await this.developStory({
|
|
252
248
|
agent: flags.agent,
|
|
253
249
|
cwd: flags.cwd,
|
|
250
|
+
maxRetries: flags['max-retries'],
|
|
254
251
|
references: flags.reference,
|
|
252
|
+
retryBackoff: flags['retry-backoff'],
|
|
255
253
|
storyMetadata,
|
|
256
254
|
storyPath,
|
|
257
255
|
task: flags.task,
|
|
256
|
+
timeout: flags.timeout,
|
|
258
257
|
});
|
|
259
258
|
results.push(result);
|
|
260
259
|
if (result.success) {
|
|
@@ -275,7 +274,7 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
275
274
|
* Develop a single story
|
|
276
275
|
*/
|
|
277
276
|
async developStory(options) {
|
|
278
|
-
const { agent, cwd, references, storyMetadata, storyPath, task } = options;
|
|
277
|
+
const { agent, cwd, maxRetries, references, retryBackoff, storyMetadata, storyPath, task, timeout } = options;
|
|
279
278
|
const storyNumber = isEpicStory(storyMetadata) ? storyMetadata.number : storyMetadata.id;
|
|
280
279
|
this.logger.info({ storyNumber, storyPath }, 'Starting story development');
|
|
281
280
|
try {
|
|
@@ -305,11 +304,15 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
305
304
|
storyPath,
|
|
306
305
|
task,
|
|
307
306
|
});
|
|
308
|
-
// Step 4: Run Claude dev agent
|
|
307
|
+
// Step 4: Run Claude dev agent with retry on timeout/killed
|
|
309
308
|
this.logger.info({ storyNumber }, 'Running Claude dev agent');
|
|
310
|
-
const result = await this.agentRunner
|
|
309
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
311
310
|
agentType: 'dev',
|
|
312
|
-
timeout:
|
|
311
|
+
timeout: timeout ?? 2_700_000,
|
|
312
|
+
}, {
|
|
313
|
+
backoffMs: retryBackoff,
|
|
314
|
+
logger: this.logger,
|
|
315
|
+
maxRetries,
|
|
313
316
|
});
|
|
314
317
|
if (!result.success) {
|
|
315
318
|
throw new Error(`Dev agent failed: ${result.errors}`);
|
|
@@ -36,6 +36,9 @@ export default class StoriesQaCommand extends Command {
|
|
|
36
36
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
37
37
|
'qa-prompt': import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
38
38
|
reference: import("@oclif/core/interfaces").OptionFlag<string[] | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
40
|
+
'agent-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
42
|
};
|
|
40
43
|
private agentRunner;
|
|
41
44
|
private fileManager;
|
|
@@ -23,11 +23,7 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
|
|
|
23
23
|
import * as colors from '../../utils/colors.js';
|
|
24
24
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
25
25
|
import { createSpinner } from '../../utils/progress.js';
|
|
26
|
-
|
|
27
|
-
* Agent timeout in milliseconds (30 minutes)
|
|
28
|
-
* QA and Dev agents need longer timeouts for comprehensive analysis
|
|
29
|
-
*/
|
|
30
|
-
const AGENT_TIMEOUT_MS = 1_800_000;
|
|
26
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
31
27
|
/**
|
|
32
28
|
* Stories QA Command
|
|
33
29
|
*
|
|
@@ -89,6 +85,21 @@ export default class StoriesQaCommand extends Command {
|
|
|
89
85
|
description: 'Additional context files for agents',
|
|
90
86
|
multiple: true,
|
|
91
87
|
}),
|
|
88
|
+
timeout: Flags.integer({
|
|
89
|
+
default: 2_700_000,
|
|
90
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
91
|
+
helpGroup: 'Resilience',
|
|
92
|
+
}),
|
|
93
|
+
'agent-retries': Flags.integer({
|
|
94
|
+
default: 0,
|
|
95
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
96
|
+
helpGroup: 'Resilience',
|
|
97
|
+
}),
|
|
98
|
+
'retry-backoff': Flags.integer({
|
|
99
|
+
default: 5000,
|
|
100
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
101
|
+
helpGroup: 'Resilience',
|
|
102
|
+
}),
|
|
92
103
|
};
|
|
93
104
|
// Service instances
|
|
94
105
|
agentRunner;
|
|
@@ -446,10 +457,17 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
446
457
|
try {
|
|
447
458
|
// Phase 1: Initial QA Deep Dive
|
|
448
459
|
this.log(colors.info(' Phase 1: QA Deep Dive Review...'));
|
|
460
|
+
const agentTimeout = flags.timeout ?? 2_700_000;
|
|
461
|
+
const agentRetries = flags['agent-retries'];
|
|
462
|
+
const retryBackoff = flags['retry-backoff'];
|
|
449
463
|
const qaPrompt = this.buildQaPrompt(storyPath, flags['qa-prompt'], flags.reference);
|
|
450
|
-
const qaResult = await this.agentRunner
|
|
464
|
+
const qaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
|
|
451
465
|
agentType: 'tea',
|
|
452
|
-
timeout:
|
|
466
|
+
timeout: agentTimeout,
|
|
467
|
+
}, {
|
|
468
|
+
backoffMs: retryBackoff,
|
|
469
|
+
logger: this.logger,
|
|
470
|
+
maxRetries: agentRetries,
|
|
453
471
|
});
|
|
454
472
|
if (!qaResult.success) {
|
|
455
473
|
throw new Error(`QA agent failed: ${qaResult.errors}`);
|
|
@@ -465,9 +483,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
465
483
|
// Run Dev agent to fix issues (sequential retry loop by design)
|
|
466
484
|
const devPrompt = this.buildDevFixPrompt(storyPath, flags['dev-prompt'], flags.reference);
|
|
467
485
|
// eslint-disable-next-line no-await-in-loop
|
|
468
|
-
const devResult = await this.agentRunner
|
|
486
|
+
const devResult = await runAgentWithRetry(this.agentRunner, devPrompt, {
|
|
469
487
|
agentType: 'dev',
|
|
470
|
-
timeout:
|
|
488
|
+
timeout: agentTimeout,
|
|
489
|
+
}, {
|
|
490
|
+
backoffMs: retryBackoff,
|
|
491
|
+
logger: this.logger,
|
|
492
|
+
maxRetries: agentRetries,
|
|
471
493
|
});
|
|
472
494
|
if (!devResult.success) {
|
|
473
495
|
this.logger.warn({ errors: devResult.errors, retriesUsed }, 'Dev fix-forward failed, continuing...');
|
|
@@ -476,9 +498,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
476
498
|
// Phase 3: Re-run QA to validate fixes
|
|
477
499
|
this.log(colors.info(` Phase 3: QA Re-validation (Retry ${retriesUsed})...`));
|
|
478
500
|
// eslint-disable-next-line no-await-in-loop
|
|
479
|
-
const reQaResult = await this.agentRunner
|
|
501
|
+
const reQaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
|
|
480
502
|
agentType: 'tea',
|
|
481
|
-
timeout:
|
|
503
|
+
timeout: agentTimeout,
|
|
504
|
+
}, {
|
|
505
|
+
backoffMs: retryBackoff,
|
|
506
|
+
logger: this.logger,
|
|
507
|
+
maxRetries: agentRetries,
|
|
482
508
|
});
|
|
483
509
|
if (!reQaResult.success) {
|
|
484
510
|
this.logger.warn({ errors: reQaResult.errors, retriesUsed }, 'QA re-validation failed, continuing...');
|
|
@@ -42,6 +42,9 @@ export default class Workflow extends Command {
|
|
|
42
42
|
'skip-epics': import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
43
43
|
'skip-stories': import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
44
44
|
'story-interval': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
46
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
47
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
48
|
verbose: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
46
49
|
};
|
|
47
50
|
private cancelled;
|
|
@@ -118,6 +118,18 @@ export default class Workflow extends Command {
|
|
|
118
118
|
default: 60,
|
|
119
119
|
description: 'Seconds between story development',
|
|
120
120
|
}),
|
|
121
|
+
timeout: Flags.integer({
|
|
122
|
+
default: 2_700_000,
|
|
123
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
124
|
+
}),
|
|
125
|
+
'max-retries': Flags.integer({
|
|
126
|
+
default: 0,
|
|
127
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
128
|
+
}),
|
|
129
|
+
'retry-backoff': Flags.integer({
|
|
130
|
+
default: 5000,
|
|
131
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
132
|
+
}),
|
|
121
133
|
verbose: Flags.boolean({
|
|
122
134
|
char: 'v',
|
|
123
135
|
default: false,
|
|
@@ -154,6 +166,7 @@ export default class Workflow extends Command {
|
|
|
154
166
|
dryRun: flags['dry-run'],
|
|
155
167
|
epicInterval: flags['epic-interval'],
|
|
156
168
|
input: args.input,
|
|
169
|
+
maxRetries: flags['max-retries'],
|
|
157
170
|
model: flags.model,
|
|
158
171
|
parallel: flags.parallel,
|
|
159
172
|
pipeline: flags.pipeline,
|
|
@@ -164,10 +177,12 @@ export default class Workflow extends Command {
|
|
|
164
177
|
qaPrompt: flags['qa-prompt'],
|
|
165
178
|
qaRetries: flags['qa-retries'],
|
|
166
179
|
references: flags.reference || [],
|
|
180
|
+
retryBackoffMs: flags['retry-backoff'],
|
|
167
181
|
skipDev: flags['skip-dev'],
|
|
168
182
|
skipEpics: flags['skip-epics'],
|
|
169
183
|
skipStories: flags['skip-stories'],
|
|
170
184
|
storyInterval: flags['story-interval'],
|
|
185
|
+
timeout: flags.timeout,
|
|
171
186
|
verbose: flags.verbose,
|
|
172
187
|
};
|
|
173
188
|
// Log configuration if verbose
|
|
@@ -147,6 +147,21 @@ export interface WorkflowConfig {
|
|
|
147
147
|
* @default 60
|
|
148
148
|
*/
|
|
149
149
|
storyInterval: number;
|
|
150
|
+
/**
|
|
151
|
+
* Agent execution timeout in milliseconds
|
|
152
|
+
* @default 2_700_000 (45 minutes)
|
|
153
|
+
*/
|
|
154
|
+
timeout?: number;
|
|
155
|
+
/**
|
|
156
|
+
* Maximum retries for timeout/killed agent failures
|
|
157
|
+
* @default 0 (no retries, backward compatible)
|
|
158
|
+
*/
|
|
159
|
+
maxRetries?: number;
|
|
160
|
+
/**
|
|
161
|
+
* Backoff delay between retries in milliseconds
|
|
162
|
+
* @default 5000 (5 seconds)
|
|
163
|
+
*/
|
|
164
|
+
retryBackoffMs?: number;
|
|
150
165
|
/**
|
|
151
166
|
* Detailed output mode
|
|
152
167
|
*
|
|
@@ -45,6 +45,7 @@
|
|
|
45
45
|
*/
|
|
46
46
|
import { isEpicStory } from '../../models/story.js';
|
|
47
47
|
import { ParserError, ValidationError } from '../../utils/errors.js';
|
|
48
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
48
49
|
import { PrdFixer } from '../parsers/prd-fixer.js';
|
|
49
50
|
import { FileScaffolder } from '../scaffolding/file-scaffolder.js';
|
|
50
51
|
import { BatchProcessor } from './batch-processor.js';
|
|
@@ -491,11 +492,15 @@ Write output to: ${outputPath}`;
|
|
|
491
492
|
prompt += '\n';
|
|
492
493
|
}
|
|
493
494
|
prompt += '*yolo mode*\n';
|
|
494
|
-
// Execute dev agent
|
|
495
|
-
const result = await this.agentRunner
|
|
495
|
+
// Execute dev agent with retry on timeout/killed
|
|
496
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
496
497
|
agentType: 'dev',
|
|
497
498
|
references: config.references,
|
|
498
|
-
timeout:
|
|
499
|
+
timeout: config.timeout ?? 2_700_000,
|
|
500
|
+
}, {
|
|
501
|
+
backoffMs: config.retryBackoffMs,
|
|
502
|
+
logger: this.logger,
|
|
503
|
+
maxRetries: config.maxRetries,
|
|
499
504
|
});
|
|
500
505
|
if (result.success) {
|
|
501
506
|
// Update story status to Done
|
|
@@ -688,10 +693,14 @@ Write output to: ${outputPath}`;
|
|
|
688
693
|
prompt += '\n';
|
|
689
694
|
}
|
|
690
695
|
prompt += '*yolo mode*\n';
|
|
691
|
-
const result = await this.agentRunner
|
|
696
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
692
697
|
agentType: 'dev',
|
|
693
698
|
references: config.references,
|
|
694
|
-
timeout:
|
|
699
|
+
timeout: config.timeout ?? 2_700_000,
|
|
700
|
+
}, {
|
|
701
|
+
backoffMs: config.retryBackoffMs,
|
|
702
|
+
logger: this.logger,
|
|
703
|
+
maxRetries: config.maxRetries,
|
|
695
704
|
});
|
|
696
705
|
if (result.success) {
|
|
697
706
|
// Update story status to Done
|
|
@@ -898,10 +907,14 @@ Write output to: ${outputPath}`;
|
|
|
898
907
|
}, 'Claude Prompt (Epic)');
|
|
899
908
|
}
|
|
900
909
|
// Step 3: Run Claude agent to populate content sections
|
|
901
|
-
const result = await this.agentRunner
|
|
910
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
902
911
|
agentType: 'architect',
|
|
903
912
|
references: config.references,
|
|
904
|
-
timeout:
|
|
913
|
+
timeout: config.timeout ?? 2_700_000,
|
|
914
|
+
}, {
|
|
915
|
+
backoffMs: config.retryBackoffMs,
|
|
916
|
+
logger: this.logger,
|
|
917
|
+
maxRetries: config.maxRetries,
|
|
905
918
|
});
|
|
906
919
|
// Log output if verbose
|
|
907
920
|
if (config.verbose) {
|
|
@@ -1457,10 +1470,14 @@ Write output to: ${outputPath}`;
|
|
|
1457
1470
|
}, 'Claude Prompt (Story)');
|
|
1458
1471
|
}
|
|
1459
1472
|
// Step 4: Run Claude agent to populate content sections
|
|
1460
|
-
const result = await this.agentRunner
|
|
1473
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
1461
1474
|
agentType: 'sm',
|
|
1462
1475
|
references: config.references,
|
|
1463
|
-
timeout:
|
|
1476
|
+
timeout: config.timeout ?? 2_700_000,
|
|
1477
|
+
}, {
|
|
1478
|
+
backoffMs: config.retryBackoffMs,
|
|
1479
|
+
logger: this.logger,
|
|
1480
|
+
maxRetries: config.maxRetries,
|
|
1464
1481
|
});
|
|
1465
1482
|
// Log output if verbose
|
|
1466
1483
|
if (config.verbose) {
|
|
@@ -1684,10 +1701,14 @@ Write output to: ${outputPath}`;
|
|
|
1684
1701
|
}, 'Claude Prompt (Story)');
|
|
1685
1702
|
}
|
|
1686
1703
|
// Step 4: Run Claude agent to populate content sections
|
|
1687
|
-
const result = await this.agentRunner
|
|
1704
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
1688
1705
|
agentType: 'sm',
|
|
1689
1706
|
references: config.references,
|
|
1690
|
-
timeout:
|
|
1707
|
+
timeout: config.timeout ?? 2_700_000,
|
|
1708
|
+
}, {
|
|
1709
|
+
backoffMs: config.retryBackoffMs,
|
|
1710
|
+
logger: this.logger,
|
|
1711
|
+
maxRetries: config.maxRetries,
|
|
1691
1712
|
});
|
|
1692
1713
|
// Log output if verbose
|
|
1693
1714
|
if (config.verbose) {
|
package/dist/utils/retry.d.ts
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* transient failures gracefully.
|
|
6
6
|
*/
|
|
7
7
|
import type pino from 'pino';
|
|
8
|
+
import type { AgentOptions, AgentResult } from '../models/index.js';
|
|
9
|
+
import type { AIProviderRunner } from '../services/agents/agent-runner.js';
|
|
8
10
|
/**
|
|
9
11
|
* Options for configuring retry behavior
|
|
10
12
|
*/
|
|
@@ -112,3 +114,30 @@ export declare class RetryStrategy {
|
|
|
112
114
|
* })
|
|
113
115
|
*/
|
|
114
116
|
export declare function isClaudeCliRetryable(error: Error): boolean;
|
|
117
|
+
/**
|
|
118
|
+
* Options for runAgentWithRetry
|
|
119
|
+
*/
|
|
120
|
+
export interface RunAgentWithRetryOptions {
|
|
121
|
+
/** Backoff delay between retries in ms. Default: 5000 (short because process already waited for timeout) */
|
|
122
|
+
backoffMs?: number;
|
|
123
|
+
/** Logger for retry messages */
|
|
124
|
+
logger?: pino.Logger;
|
|
125
|
+
/** Max retry attempts. 0 = no retry (preserves existing behavior). Default: 0 */
|
|
126
|
+
maxRetries?: number;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Execute an AI agent with automatic retry on timeout/killed failures
|
|
130
|
+
*
|
|
131
|
+
* Bridges the contract gap: runAgent() returns AgentResult (never throws),
|
|
132
|
+
* but RetryStrategy expects operations that throw. This adapter:
|
|
133
|
+
* 1. Wraps runAgent in a throwing closure for RetryStrategy
|
|
134
|
+
* 2. Catches exhausted retries and returns the last AgentResult (never throws)
|
|
135
|
+
* 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
|
|
136
|
+
*
|
|
137
|
+
* @param agentRunner - The AI provider runner to execute
|
|
138
|
+
* @param prompt - The prompt to send
|
|
139
|
+
* @param options - Agent execution options (timeout, agentType, etc.)
|
|
140
|
+
* @param retryOptions - Retry configuration
|
|
141
|
+
* @returns AgentResult (same contract as runAgent — never throws)
|
|
142
|
+
*/
|
|
143
|
+
export declare function runAgentWithRetry(agentRunner: AIProviderRunner, prompt: string, options: Omit<AgentOptions, 'prompt'>, retryOptions?: RunAgentWithRetryOptions): Promise<AgentResult>;
|
package/dist/utils/retry.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Provides configurable retry logic with exponential backoff for handling
|
|
5
5
|
* transient failures gracefully.
|
|
6
6
|
*/
|
|
7
|
+
import { AgentError } from './errors.js';
|
|
7
8
|
import { createLogger } from './logger.js';
|
|
8
9
|
/**
|
|
9
10
|
* Retry strategy implementation with configurable exponential backoff
|
|
@@ -158,3 +159,50 @@ export function isClaudeCliRetryable(error) {
|
|
|
158
159
|
// Default to not retryable if we can't determine exit code
|
|
159
160
|
return false;
|
|
160
161
|
}
|
|
162
|
+
/**
|
|
163
|
+
* Execute an AI agent with automatic retry on timeout/killed failures
|
|
164
|
+
*
|
|
165
|
+
* Bridges the contract gap: runAgent() returns AgentResult (never throws),
|
|
166
|
+
* but RetryStrategy expects operations that throw. This adapter:
|
|
167
|
+
* 1. Wraps runAgent in a throwing closure for RetryStrategy
|
|
168
|
+
* 2. Catches exhausted retries and returns the last AgentResult (never throws)
|
|
169
|
+
* 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
|
|
170
|
+
*
|
|
171
|
+
* @param agentRunner - The AI provider runner to execute
|
|
172
|
+
* @param prompt - The prompt to send
|
|
173
|
+
* @param options - Agent execution options (timeout, agentType, etc.)
|
|
174
|
+
* @param retryOptions - Retry configuration
|
|
175
|
+
* @returns AgentResult (same contract as runAgent — never throws)
|
|
176
|
+
*/
|
|
177
|
+
export async function runAgentWithRetry(agentRunner, prompt, options, retryOptions = {}) {
|
|
178
|
+
const maxRetries = retryOptions.maxRetries ?? 0;
|
|
179
|
+
// Short-circuit: no retries configured — preserve exact existing behavior
|
|
180
|
+
if (maxRetries <= 0) {
|
|
181
|
+
return agentRunner.runAgent(prompt, options);
|
|
182
|
+
}
|
|
183
|
+
let lastResult;
|
|
184
|
+
const strategy = new RetryStrategy({
|
|
185
|
+
backoffMs: retryOptions.backoffMs ?? 5_000,
|
|
186
|
+
backoffMultiplier: 1.5,
|
|
187
|
+
isRetryable: isClaudeCliRetryable,
|
|
188
|
+
logger: retryOptions.logger,
|
|
189
|
+
maxRetries,
|
|
190
|
+
});
|
|
191
|
+
try {
|
|
192
|
+
return await strategy.execute(async () => {
|
|
193
|
+
lastResult = await agentRunner.runAgent(prompt, options);
|
|
194
|
+
if (!lastResult.success) {
|
|
195
|
+
throw new AgentError(`Agent ${lastResult.agentType} failed with exit code ${lastResult.exitCode}`, {
|
|
196
|
+
agentType: lastResult.agentType,
|
|
197
|
+
exitCode: lastResult.exitCode,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
return lastResult;
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
catch {
|
|
204
|
+
// All retries exhausted or non-retryable error — return the last AgentResult
|
|
205
|
+
// This preserves the contract: runAgentWithRetry never throws, just like runAgent
|
|
206
|
+
return lastResult;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
@@ -26,4 +26,7 @@ export declare const agentFlags: {
|
|
|
26
26
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
27
27
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
28
28
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
29
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
30
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
31
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
29
32
|
};
|
|
@@ -44,4 +44,19 @@ export const agentFlags = {
|
|
|
44
44
|
description: 'Override which task command to execute (e.g., develop-story, draft, review-implementation). Defaults to command-appropriate task.',
|
|
45
45
|
helpGroup: 'Agent Customization',
|
|
46
46
|
}),
|
|
47
|
+
timeout: Flags.integer({
|
|
48
|
+
default: 2_700_000,
|
|
49
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
50
|
+
helpGroup: 'Resilience',
|
|
51
|
+
}),
|
|
52
|
+
'max-retries': Flags.integer({
|
|
53
|
+
default: 0,
|
|
54
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
55
|
+
helpGroup: 'Resilience',
|
|
56
|
+
}),
|
|
57
|
+
'retry-backoff': Flags.integer({
|
|
58
|
+
default: 5000,
|
|
59
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
60
|
+
helpGroup: 'Resilience',
|
|
61
|
+
}),
|
|
47
62
|
};
|
package/package.json
CHANGED