@hyperdrive.bot/bmad-workflow 1.0.11 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/epics/create.d.ts +3 -0
- package/dist/commands/epics/create.js +15 -4
- package/dist/commands/stories/create.d.ts +3 -0
- package/dist/commands/stories/create.js +12 -4
- package/dist/commands/stories/develop.d.ts +3 -0
- package/dist/commands/stories/develop.js +12 -9
- package/dist/commands/stories/qa.d.ts +3 -0
- package/dist/commands/stories/qa.js +37 -11
- package/dist/commands/workflow.d.ts +3 -0
- package/dist/commands/workflow.js +15 -0
- package/dist/models/workflow-config.d.ts +15 -0
- package/dist/services/orchestration/workflow-orchestrator.d.ts +3 -1
- package/dist/services/orchestration/workflow-orchestrator.js +44 -20
- package/dist/utils/retry.d.ts +29 -0
- package/dist/utils/retry.js +48 -0
- package/dist/utils/shared-flags.d.ts +3 -0
- package/dist/utils/shared-flags.js +15 -0
- package/package.json +1 -1
|
@@ -40,6 +40,9 @@ export default class EpicsCreate extends Command {
|
|
|
40
40
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
41
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
42
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
44
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
46
|
};
|
|
44
47
|
/**
|
|
45
48
|
* AI agent runner service (Claude or Gemini)
|
|
@@ -22,6 +22,7 @@ import * as colors from '../../utils/colors.js';
|
|
|
22
22
|
import { ValidationError } from '../../utils/errors.js';
|
|
23
23
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
24
24
|
import { createSpinner } from '../../utils/progress.js';
|
|
25
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
25
26
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
26
27
|
/**
|
|
27
28
|
* Epics Create Command
|
|
@@ -136,10 +137,13 @@ export default class EpicsCreate extends Command {
|
|
|
136
137
|
epicDir,
|
|
137
138
|
epics: toCreate,
|
|
138
139
|
interval: flags.interval,
|
|
140
|
+
maxRetries: flags['max-retries'],
|
|
139
141
|
prdPath: args['prd-path'],
|
|
140
142
|
prefix: flags.prefix || '',
|
|
141
143
|
references: flags.reference,
|
|
144
|
+
retryBackoff: flags['retry-backoff'],
|
|
142
145
|
task: flags.task,
|
|
146
|
+
timeout: flags.timeout,
|
|
143
147
|
});
|
|
144
148
|
// Display summary
|
|
145
149
|
this.displaySummary({
|
|
@@ -208,7 +212,7 @@ export default class EpicsCreate extends Command {
|
|
|
208
212
|
* Create epic files using Claude AI agents
|
|
209
213
|
*/
|
|
210
214
|
async createEpics(options) {
|
|
211
|
-
const { agent, epicDir, epics, interval, prdPath, prefix, references, task } = options;
|
|
215
|
+
const { agent, epicDir, epics, interval, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
|
|
212
216
|
const results = [];
|
|
213
217
|
/* eslint-disable no-await-in-loop */
|
|
214
218
|
for (let i = 0; i < epics.length; i++) {
|
|
@@ -217,10 +221,13 @@ export default class EpicsCreate extends Command {
|
|
|
217
221
|
agent,
|
|
218
222
|
epic,
|
|
219
223
|
epicDir,
|
|
224
|
+
maxRetries,
|
|
220
225
|
prdPath,
|
|
221
226
|
prefix,
|
|
222
227
|
references,
|
|
228
|
+
retryBackoff,
|
|
223
229
|
task,
|
|
230
|
+
timeout,
|
|
224
231
|
});
|
|
225
232
|
results.push(result);
|
|
226
233
|
// Wait interval if not last epic
|
|
@@ -235,7 +242,7 @@ export default class EpicsCreate extends Command {
|
|
|
235
242
|
* Create a single epic file
|
|
236
243
|
*/
|
|
237
244
|
async createSingleEpic(options) {
|
|
238
|
-
const { agent, epic, epicDir, prdPath, prefix, references, task } = options;
|
|
245
|
+
const { agent, epic, epicDir, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
|
|
239
246
|
const fileName = this.generateEpicFileName(epic, prefix);
|
|
240
247
|
const filePath = path.join(epicDir, fileName);
|
|
241
248
|
const spinner = createSpinner(`Creating Epic ${epic.number}: ${epic.title}...`);
|
|
@@ -262,10 +269,14 @@ export default class EpicsCreate extends Command {
|
|
|
262
269
|
});
|
|
263
270
|
// Step 3: Run Claude AI agent to populate epic content sections
|
|
264
271
|
spinner.text = `Populating epic ${epic.number} with AI agent...`;
|
|
265
|
-
const result = await this.agentRunner
|
|
272
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
266
273
|
agentType: 'sm',
|
|
267
274
|
references,
|
|
268
|
-
timeout:
|
|
275
|
+
timeout: timeout ?? 2_700_000,
|
|
276
|
+
}, {
|
|
277
|
+
backoffMs: retryBackoff,
|
|
278
|
+
logger: this.logger,
|
|
279
|
+
maxRetries,
|
|
269
280
|
});
|
|
270
281
|
if (!result.success) {
|
|
271
282
|
throw new Error(result.errors || 'Claude agent failed to populate epic');
|
|
@@ -38,6 +38,9 @@ export default class StoriesCreateCommand extends Command {
|
|
|
38
38
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
39
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
40
40
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
44
|
};
|
|
42
45
|
private agentRunner;
|
|
43
46
|
private batchProcessor;
|
|
@@ -24,6 +24,7 @@ import * as colors from '../../utils/colors.js';
|
|
|
24
24
|
import { ValidationError } from '../../utils/errors.js';
|
|
25
25
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
26
26
|
import { createSpinner } from '../../utils/progress.js';
|
|
27
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
27
28
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
28
29
|
/**
|
|
29
30
|
* Stories Create Command
|
|
@@ -220,10 +221,13 @@ export default class StoriesCreateCommand extends Command {
|
|
|
220
221
|
const processor = async (story) => this.createStory({
|
|
221
222
|
agent: flags.agent,
|
|
222
223
|
epicPath,
|
|
224
|
+
maxRetries: flags['max-retries'],
|
|
223
225
|
prefix: flags.prefix,
|
|
224
226
|
references: flags.reference,
|
|
227
|
+
retryBackoff: flags['retry-backoff'],
|
|
225
228
|
story,
|
|
226
229
|
task: flags.task,
|
|
230
|
+
timeout: flags.timeout,
|
|
227
231
|
});
|
|
228
232
|
// Progress callback for batch updates
|
|
229
233
|
const onProgress = (info) => {
|
|
@@ -266,7 +270,7 @@ export default class StoriesCreateCommand extends Command {
|
|
|
266
270
|
* Create a single story file
|
|
267
271
|
*/
|
|
268
272
|
async createStory(options) {
|
|
269
|
-
const { agent, epicPath, prefix, references, story, task } = options;
|
|
273
|
+
const { agent, epicPath, maxRetries, prefix, references, retryBackoff, story, task, timeout } = options;
|
|
270
274
|
const storyDir = this.pathResolver.getStoryDir();
|
|
271
275
|
const filename = this.generateStoryFilename(story, prefix);
|
|
272
276
|
const filePath = path.join(storyDir, filename);
|
|
@@ -290,10 +294,14 @@ export default class StoriesCreateCommand extends Command {
|
|
|
290
294
|
storyFilePath: absolutePath,
|
|
291
295
|
task,
|
|
292
296
|
});
|
|
293
|
-
// Step 3: Run Claude agent
|
|
294
|
-
const result = await this.agentRunner
|
|
297
|
+
// Step 3: Run Claude agent with retry on timeout/killed
|
|
298
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
295
299
|
agentType: 'sm',
|
|
296
|
-
timeout:
|
|
300
|
+
timeout: timeout ?? 2_700_000,
|
|
301
|
+
}, {
|
|
302
|
+
backoffMs: retryBackoff,
|
|
303
|
+
logger: this.logger,
|
|
304
|
+
maxRetries,
|
|
297
305
|
});
|
|
298
306
|
// Step 4: Verify file was updated
|
|
299
307
|
if (result.success) {
|
|
@@ -40,6 +40,9 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
40
40
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
41
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
42
42
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
44
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
43
46
|
};
|
|
44
47
|
private agentRunner;
|
|
45
48
|
private fileManager;
|
|
@@ -25,12 +25,8 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
|
|
|
25
25
|
import * as colors from '../../utils/colors.js';
|
|
26
26
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
27
27
|
import { createSpinner } from '../../utils/progress.js';
|
|
28
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
28
29
|
import { agentFlags } from '../../utils/shared-flags.js';
|
|
29
|
-
/**
|
|
30
|
-
* Agent timeout in milliseconds (30 minutes)
|
|
31
|
-
* Dev agents need sufficient time for comprehensive implementation
|
|
32
|
-
*/
|
|
33
|
-
const DEV_AGENT_TIMEOUT_MS = 1_800_000;
|
|
34
30
|
/**
|
|
35
31
|
* Stories Develop Command
|
|
36
32
|
*
|
|
@@ -251,10 +247,13 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
251
247
|
const result = await this.developStory({
|
|
252
248
|
agent: flags.agent,
|
|
253
249
|
cwd: flags.cwd,
|
|
250
|
+
maxRetries: flags['max-retries'],
|
|
254
251
|
references: flags.reference,
|
|
252
|
+
retryBackoff: flags['retry-backoff'],
|
|
255
253
|
storyMetadata,
|
|
256
254
|
storyPath,
|
|
257
255
|
task: flags.task,
|
|
256
|
+
timeout: flags.timeout,
|
|
258
257
|
});
|
|
259
258
|
results.push(result);
|
|
260
259
|
if (result.success) {
|
|
@@ -275,7 +274,7 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
275
274
|
* Develop a single story
|
|
276
275
|
*/
|
|
277
276
|
async developStory(options) {
|
|
278
|
-
const { agent, cwd, references, storyMetadata, storyPath, task } = options;
|
|
277
|
+
const { agent, cwd, maxRetries, references, retryBackoff, storyMetadata, storyPath, task, timeout } = options;
|
|
279
278
|
const storyNumber = isEpicStory(storyMetadata) ? storyMetadata.number : storyMetadata.id;
|
|
280
279
|
this.logger.info({ storyNumber, storyPath }, 'Starting story development');
|
|
281
280
|
try {
|
|
@@ -305,11 +304,15 @@ export default class StoriesDevelopCommand extends Command {
|
|
|
305
304
|
storyPath,
|
|
306
305
|
task,
|
|
307
306
|
});
|
|
308
|
-
// Step 4: Run Claude dev agent
|
|
307
|
+
// Step 4: Run Claude dev agent with retry on timeout/killed
|
|
309
308
|
this.logger.info({ storyNumber }, 'Running Claude dev agent');
|
|
310
|
-
const result = await this.agentRunner
|
|
309
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
311
310
|
agentType: 'dev',
|
|
312
|
-
timeout:
|
|
311
|
+
timeout: timeout ?? 2_700_000,
|
|
312
|
+
}, {
|
|
313
|
+
backoffMs: retryBackoff,
|
|
314
|
+
logger: this.logger,
|
|
315
|
+
maxRetries,
|
|
313
316
|
});
|
|
314
317
|
if (!result.success) {
|
|
315
318
|
throw new Error(`Dev agent failed: ${result.errors}`);
|
|
@@ -36,6 +36,9 @@ export default class StoriesQaCommand extends Command {
|
|
|
36
36
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
37
37
|
'qa-prompt': import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
38
38
|
reference: import("@oclif/core/interfaces").OptionFlag<string[] | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
40
|
+
'agent-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
41
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
39
42
|
};
|
|
40
43
|
private agentRunner;
|
|
41
44
|
private fileManager;
|
|
@@ -23,11 +23,7 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
|
|
|
23
23
|
import * as colors from '../../utils/colors.js';
|
|
24
24
|
import { createLogger, generateCorrelationId } from '../../utils/logger.js';
|
|
25
25
|
import { createSpinner } from '../../utils/progress.js';
|
|
26
|
-
|
|
27
|
-
* Agent timeout in milliseconds (30 minutes)
|
|
28
|
-
* QA and Dev agents need longer timeouts for comprehensive analysis
|
|
29
|
-
*/
|
|
30
|
-
const AGENT_TIMEOUT_MS = 1_800_000;
|
|
26
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
31
27
|
/**
|
|
32
28
|
* Stories QA Command
|
|
33
29
|
*
|
|
@@ -89,6 +85,21 @@ export default class StoriesQaCommand extends Command {
|
|
|
89
85
|
description: 'Additional context files for agents',
|
|
90
86
|
multiple: true,
|
|
91
87
|
}),
|
|
88
|
+
timeout: Flags.integer({
|
|
89
|
+
default: 2_700_000,
|
|
90
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
91
|
+
helpGroup: 'Resilience',
|
|
92
|
+
}),
|
|
93
|
+
'agent-retries': Flags.integer({
|
|
94
|
+
default: 0,
|
|
95
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
96
|
+
helpGroup: 'Resilience',
|
|
97
|
+
}),
|
|
98
|
+
'retry-backoff': Flags.integer({
|
|
99
|
+
default: 5000,
|
|
100
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
101
|
+
helpGroup: 'Resilience',
|
|
102
|
+
}),
|
|
92
103
|
};
|
|
93
104
|
// Service instances
|
|
94
105
|
agentRunner;
|
|
@@ -446,10 +457,17 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
446
457
|
try {
|
|
447
458
|
// Phase 1: Initial QA Deep Dive
|
|
448
459
|
this.log(colors.info(' Phase 1: QA Deep Dive Review...'));
|
|
460
|
+
const agentTimeout = flags.timeout ?? 2_700_000;
|
|
461
|
+
const agentRetries = flags['agent-retries'];
|
|
462
|
+
const retryBackoff = flags['retry-backoff'];
|
|
449
463
|
const qaPrompt = this.buildQaPrompt(storyPath, flags['qa-prompt'], flags.reference);
|
|
450
|
-
const qaResult = await this.agentRunner
|
|
464
|
+
const qaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
|
|
451
465
|
agentType: 'tea',
|
|
452
|
-
timeout:
|
|
466
|
+
timeout: agentTimeout,
|
|
467
|
+
}, {
|
|
468
|
+
backoffMs: retryBackoff,
|
|
469
|
+
logger: this.logger,
|
|
470
|
+
maxRetries: agentRetries,
|
|
453
471
|
});
|
|
454
472
|
if (!qaResult.success) {
|
|
455
473
|
throw new Error(`QA agent failed: ${qaResult.errors}`);
|
|
@@ -465,9 +483,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
465
483
|
// Run Dev agent to fix issues (sequential retry loop by design)
|
|
466
484
|
const devPrompt = this.buildDevFixPrompt(storyPath, flags['dev-prompt'], flags.reference);
|
|
467
485
|
// eslint-disable-next-line no-await-in-loop
|
|
468
|
-
const devResult = await this.agentRunner
|
|
486
|
+
const devResult = await runAgentWithRetry(this.agentRunner, devPrompt, {
|
|
469
487
|
agentType: 'dev',
|
|
470
|
-
timeout:
|
|
488
|
+
timeout: agentTimeout,
|
|
489
|
+
}, {
|
|
490
|
+
backoffMs: retryBackoff,
|
|
491
|
+
logger: this.logger,
|
|
492
|
+
maxRetries: agentRetries,
|
|
471
493
|
});
|
|
472
494
|
if (!devResult.success) {
|
|
473
495
|
this.logger.warn({ errors: devResult.errors, retriesUsed }, 'Dev fix-forward failed, continuing...');
|
|
@@ -476,9 +498,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
|
|
|
476
498
|
// Phase 3: Re-run QA to validate fixes
|
|
477
499
|
this.log(colors.info(` Phase 3: QA Re-validation (Retry ${retriesUsed})...`));
|
|
478
500
|
// eslint-disable-next-line no-await-in-loop
|
|
479
|
-
const reQaResult = await this.agentRunner
|
|
501
|
+
const reQaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
|
|
480
502
|
agentType: 'tea',
|
|
481
|
-
timeout:
|
|
503
|
+
timeout: agentTimeout,
|
|
504
|
+
}, {
|
|
505
|
+
backoffMs: retryBackoff,
|
|
506
|
+
logger: this.logger,
|
|
507
|
+
maxRetries: agentRetries,
|
|
482
508
|
});
|
|
483
509
|
if (!reQaResult.success) {
|
|
484
510
|
this.logger.warn({ errors: reQaResult.errors, retriesUsed }, 'QA re-validation failed, continuing...');
|
|
@@ -42,6 +42,9 @@ export default class Workflow extends Command {
|
|
|
42
42
|
'skip-epics': import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
43
43
|
'skip-stories': import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
44
44
|
'story-interval': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
46
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
47
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
45
48
|
verbose: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
46
49
|
};
|
|
47
50
|
private cancelled;
|
|
@@ -118,6 +118,18 @@ export default class Workflow extends Command {
|
|
|
118
118
|
default: 60,
|
|
119
119
|
description: 'Seconds between story development',
|
|
120
120
|
}),
|
|
121
|
+
timeout: Flags.integer({
|
|
122
|
+
default: 2_700_000,
|
|
123
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
124
|
+
}),
|
|
125
|
+
'max-retries': Flags.integer({
|
|
126
|
+
default: 0,
|
|
127
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
128
|
+
}),
|
|
129
|
+
'retry-backoff': Flags.integer({
|
|
130
|
+
default: 5000,
|
|
131
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
132
|
+
}),
|
|
121
133
|
verbose: Flags.boolean({
|
|
122
134
|
char: 'v',
|
|
123
135
|
default: false,
|
|
@@ -154,6 +166,7 @@ export default class Workflow extends Command {
|
|
|
154
166
|
dryRun: flags['dry-run'],
|
|
155
167
|
epicInterval: flags['epic-interval'],
|
|
156
168
|
input: args.input,
|
|
169
|
+
maxRetries: flags['max-retries'],
|
|
157
170
|
model: flags.model,
|
|
158
171
|
parallel: flags.parallel,
|
|
159
172
|
pipeline: flags.pipeline,
|
|
@@ -164,10 +177,12 @@ export default class Workflow extends Command {
|
|
|
164
177
|
qaPrompt: flags['qa-prompt'],
|
|
165
178
|
qaRetries: flags['qa-retries'],
|
|
166
179
|
references: flags.reference || [],
|
|
180
|
+
retryBackoffMs: flags['retry-backoff'],
|
|
167
181
|
skipDev: flags['skip-dev'],
|
|
168
182
|
skipEpics: flags['skip-epics'],
|
|
169
183
|
skipStories: flags['skip-stories'],
|
|
170
184
|
storyInterval: flags['story-interval'],
|
|
185
|
+
timeout: flags.timeout,
|
|
171
186
|
verbose: flags.verbose,
|
|
172
187
|
};
|
|
173
188
|
// Log configuration if verbose
|
|
@@ -147,6 +147,21 @@ export interface WorkflowConfig {
|
|
|
147
147
|
* @default 60
|
|
148
148
|
*/
|
|
149
149
|
storyInterval: number;
|
|
150
|
+
/**
|
|
151
|
+
* Agent execution timeout in milliseconds
|
|
152
|
+
* @default 2_700_000 (45 minutes)
|
|
153
|
+
*/
|
|
154
|
+
timeout?: number;
|
|
155
|
+
/**
|
|
156
|
+
* Maximum retries for timeout/killed agent failures
|
|
157
|
+
* @default 0 (no retries, backward compatible)
|
|
158
|
+
*/
|
|
159
|
+
maxRetries?: number;
|
|
160
|
+
/**
|
|
161
|
+
* Backoff delay between retries in milliseconds
|
|
162
|
+
* @default 5000 (5 seconds)
|
|
163
|
+
*/
|
|
164
|
+
retryBackoffMs?: number;
|
|
150
165
|
/**
|
|
151
166
|
* Detailed output mode
|
|
152
167
|
*
|
|
@@ -390,10 +390,11 @@ export declare class WorkflowOrchestrator {
|
|
|
390
390
|
/**
|
|
391
391
|
* Execute QA phase
|
|
392
392
|
*
|
|
393
|
-
* Runs QA workflow on
|
|
393
|
+
* Runs QA workflow on stories matching the workflow prefix.
|
|
394
394
|
* Dynamically imports and delegates to StoriesQaCommand.
|
|
395
395
|
*
|
|
396
396
|
* @param config - Workflow configuration
|
|
397
|
+
* @param detection - Input detection result for prefix resolution
|
|
397
398
|
* @returns PhaseResult with success count, failures, and duration
|
|
398
399
|
* @private
|
|
399
400
|
*/
|
|
@@ -402,6 +403,7 @@ export declare class WorkflowOrchestrator {
|
|
|
402
403
|
* Execute QA phase if needed
|
|
403
404
|
*
|
|
404
405
|
* @param config - Workflow configuration
|
|
406
|
+
* @param detection - Input detection result for prefix resolution
|
|
405
407
|
* @param devPhase - Dev phase result
|
|
406
408
|
* @param shouldExecute - Whether to execute QA phase
|
|
407
409
|
* @returns QA phase result
|
|
@@ -45,6 +45,7 @@
|
|
|
45
45
|
*/
|
|
46
46
|
import { isEpicStory } from '../../models/story.js';
|
|
47
47
|
import { ParserError, ValidationError } from '../../utils/errors.js';
|
|
48
|
+
import { runAgentWithRetry } from '../../utils/retry.js';
|
|
48
49
|
import { PrdFixer } from '../parsers/prd-fixer.js';
|
|
49
50
|
import { FileScaffolder } from '../scaffolding/file-scaffolder.js';
|
|
50
51
|
import { BatchProcessor } from './batch-processor.js';
|
|
@@ -111,7 +112,7 @@ export class WorkflowOrchestrator {
|
|
|
111
112
|
return this.buildFailureResult(startTime, epicPhase);
|
|
112
113
|
}
|
|
113
114
|
const { devPhase, storyPhase } = await this.executeStoryAndDevPhases(config, detection, epicPhase, phaseFlags, startTime);
|
|
114
|
-
const qaPhase = await this.executeQaPhaseIfNeeded(config, devPhase, phaseFlags.shouldExecuteQaPhase);
|
|
115
|
+
const qaPhase = await this.executeQaPhaseIfNeeded(config, detection, devPhase, phaseFlags.shouldExecuteQaPhase);
|
|
115
116
|
return this.buildSuccessResult(startTime, epicPhase, storyPhase, devPhase, qaPhase);
|
|
116
117
|
}
|
|
117
118
|
/**
|
|
@@ -491,11 +492,15 @@ Write output to: ${outputPath}`;
|
|
|
491
492
|
prompt += '\n';
|
|
492
493
|
}
|
|
493
494
|
prompt += '*yolo mode*\n';
|
|
494
|
-
// Execute dev agent
|
|
495
|
-
const result = await this.agentRunner
|
|
495
|
+
// Execute dev agent with retry on timeout/killed
|
|
496
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
496
497
|
agentType: 'dev',
|
|
497
498
|
references: config.references,
|
|
498
|
-
timeout:
|
|
499
|
+
timeout: config.timeout ?? 2_700_000,
|
|
500
|
+
}, {
|
|
501
|
+
backoffMs: config.retryBackoffMs,
|
|
502
|
+
logger: this.logger,
|
|
503
|
+
maxRetries: config.maxRetries,
|
|
499
504
|
});
|
|
500
505
|
if (result.success) {
|
|
501
506
|
// Update story status to Done
|
|
@@ -688,10 +693,14 @@ Write output to: ${outputPath}`;
|
|
|
688
693
|
prompt += '\n';
|
|
689
694
|
}
|
|
690
695
|
prompt += '*yolo mode*\n';
|
|
691
|
-
const result = await this.agentRunner
|
|
696
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
692
697
|
agentType: 'dev',
|
|
693
698
|
references: config.references,
|
|
694
|
-
timeout:
|
|
699
|
+
timeout: config.timeout ?? 2_700_000,
|
|
700
|
+
}, {
|
|
701
|
+
backoffMs: config.retryBackoffMs,
|
|
702
|
+
logger: this.logger,
|
|
703
|
+
maxRetries: config.maxRetries,
|
|
695
704
|
});
|
|
696
705
|
if (result.success) {
|
|
697
706
|
// Update story status to Done
|
|
@@ -898,10 +907,14 @@ Write output to: ${outputPath}`;
|
|
|
898
907
|
}, 'Claude Prompt (Epic)');
|
|
899
908
|
}
|
|
900
909
|
// Step 3: Run Claude agent to populate content sections
|
|
901
|
-
const result = await this.agentRunner
|
|
910
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
902
911
|
agentType: 'architect',
|
|
903
912
|
references: config.references,
|
|
904
|
-
timeout:
|
|
913
|
+
timeout: config.timeout ?? 2_700_000,
|
|
914
|
+
}, {
|
|
915
|
+
backoffMs: config.retryBackoffMs,
|
|
916
|
+
logger: this.logger,
|
|
917
|
+
maxRetries: config.maxRetries,
|
|
905
918
|
});
|
|
906
919
|
// Log output if verbose
|
|
907
920
|
if (config.verbose) {
|
|
@@ -1113,14 +1126,15 @@ Write output to: ${outputPath}`;
|
|
|
1113
1126
|
/**
|
|
1114
1127
|
* Execute QA phase
|
|
1115
1128
|
*
|
|
1116
|
-
* Runs QA workflow on
|
|
1129
|
+
* Runs QA workflow on stories matching the workflow prefix.
|
|
1117
1130
|
* Dynamically imports and delegates to StoriesQaCommand.
|
|
1118
1131
|
*
|
|
1119
1132
|
* @param config - Workflow configuration
|
|
1133
|
+
* @param detection - Input detection result for prefix resolution
|
|
1120
1134
|
* @returns PhaseResult with success count, failures, and duration
|
|
1121
1135
|
* @private
|
|
1122
1136
|
*/
|
|
1123
|
-
async executeQaPhase(config) {
|
|
1137
|
+
async executeQaPhase(config, detection) {
|
|
1124
1138
|
const startTime = Date.now();
|
|
1125
1139
|
const failures = [];
|
|
1126
1140
|
let successCount = 0;
|
|
@@ -1130,8 +1144,9 @@ Write output to: ${outputPath}`;
|
|
|
1130
1144
|
try {
|
|
1131
1145
|
// Get QA story directory
|
|
1132
1146
|
const qaStoryDir = await this.pathResolver.getQaStoryDir();
|
|
1133
|
-
// Find
|
|
1134
|
-
const
|
|
1147
|
+
// Find stories matching the workflow prefix
|
|
1148
|
+
const prefix = this.resolvePrefix(config, detection);
|
|
1149
|
+
const storyPattern = `${prefix}-story-*.md`;
|
|
1135
1150
|
const storyFiles = await this.fileManager.listFiles(qaStoryDir, storyPattern);
|
|
1136
1151
|
if (storyFiles.length === 0) {
|
|
1137
1152
|
this.logger.info('No stories found in QA folder, skipping QA phase');
|
|
@@ -1146,9 +1161,9 @@ Write output to: ${outputPath}`;
|
|
|
1146
1161
|
this.logger.info({ storyCount: storyFiles.length }, 'Found stories for QA phase');
|
|
1147
1162
|
// Dynamically import QA command to avoid circular dependencies
|
|
1148
1163
|
const { default: StoriesQaCommand } = await import('../../commands/stories/qa.js');
|
|
1149
|
-
// Process each story through QA
|
|
1164
|
+
// Process each story through QA (storyFile is already an absolute path from listFiles)
|
|
1150
1165
|
for (const storyFile of storyFiles) {
|
|
1151
|
-
const storyPath =
|
|
1166
|
+
const storyPath = storyFile;
|
|
1152
1167
|
this.logger.info({ storyPath }, 'Running QA workflow for story');
|
|
1153
1168
|
try {
|
|
1154
1169
|
// Build args for QA command
|
|
@@ -1216,12 +1231,13 @@ Write output to: ${outputPath}`;
|
|
|
1216
1231
|
* Execute QA phase if needed
|
|
1217
1232
|
*
|
|
1218
1233
|
* @param config - Workflow configuration
|
|
1234
|
+
* @param detection - Input detection result for prefix resolution
|
|
1219
1235
|
* @param devPhase - Dev phase result
|
|
1220
1236
|
* @param shouldExecute - Whether to execute QA phase
|
|
1221
1237
|
* @returns QA phase result
|
|
1222
1238
|
* @private
|
|
1223
1239
|
*/
|
|
1224
|
-
async executeQaPhaseIfNeeded(config, devPhase, shouldExecute) {
|
|
1240
|
+
async executeQaPhaseIfNeeded(config, detection, devPhase, shouldExecute) {
|
|
1225
1241
|
if (!shouldExecute || !devPhase || devPhase.success === 0) {
|
|
1226
1242
|
return this.createSkippedPhaseResult('qa');
|
|
1227
1243
|
}
|
|
@@ -1229,7 +1245,7 @@ Write output to: ${outputPath}`;
|
|
|
1229
1245
|
this.logger.info('[DRY RUN] Would execute QA phase');
|
|
1230
1246
|
return this.createSkippedPhaseResult('qa');
|
|
1231
1247
|
}
|
|
1232
|
-
return this.executeQaPhase(config);
|
|
1248
|
+
return this.executeQaPhase(config, detection);
|
|
1233
1249
|
}
|
|
1234
1250
|
/**
|
|
1235
1251
|
* Execute dev phase in sequential mode
|
|
@@ -1457,10 +1473,14 @@ Write output to: ${outputPath}`;
|
|
|
1457
1473
|
}, 'Claude Prompt (Story)');
|
|
1458
1474
|
}
|
|
1459
1475
|
// Step 4: Run Claude agent to populate content sections
|
|
1460
|
-
const result = await this.agentRunner
|
|
1476
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
1461
1477
|
agentType: 'sm',
|
|
1462
1478
|
references: config.references,
|
|
1463
|
-
timeout:
|
|
1479
|
+
timeout: config.timeout ?? 2_700_000,
|
|
1480
|
+
}, {
|
|
1481
|
+
backoffMs: config.retryBackoffMs,
|
|
1482
|
+
logger: this.logger,
|
|
1483
|
+
maxRetries: config.maxRetries,
|
|
1464
1484
|
});
|
|
1465
1485
|
// Log output if verbose
|
|
1466
1486
|
if (config.verbose) {
|
|
@@ -1684,10 +1704,14 @@ Write output to: ${outputPath}`;
|
|
|
1684
1704
|
}, 'Claude Prompt (Story)');
|
|
1685
1705
|
}
|
|
1686
1706
|
// Step 4: Run Claude agent to populate content sections
|
|
1687
|
-
const result = await this.agentRunner
|
|
1707
|
+
const result = await runAgentWithRetry(this.agentRunner, prompt, {
|
|
1688
1708
|
agentType: 'sm',
|
|
1689
1709
|
references: config.references,
|
|
1690
|
-
timeout:
|
|
1710
|
+
timeout: config.timeout ?? 2_700_000,
|
|
1711
|
+
}, {
|
|
1712
|
+
backoffMs: config.retryBackoffMs,
|
|
1713
|
+
logger: this.logger,
|
|
1714
|
+
maxRetries: config.maxRetries,
|
|
1691
1715
|
});
|
|
1692
1716
|
// Log output if verbose
|
|
1693
1717
|
if (config.verbose) {
|
package/dist/utils/retry.d.ts
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* transient failures gracefully.
|
|
6
6
|
*/
|
|
7
7
|
import type pino from 'pino';
|
|
8
|
+
import type { AgentOptions, AgentResult } from '../models/index.js';
|
|
9
|
+
import type { AIProviderRunner } from '../services/agents/agent-runner.js';
|
|
8
10
|
/**
|
|
9
11
|
* Options for configuring retry behavior
|
|
10
12
|
*/
|
|
@@ -112,3 +114,30 @@ export declare class RetryStrategy {
|
|
|
112
114
|
* })
|
|
113
115
|
*/
|
|
114
116
|
export declare function isClaudeCliRetryable(error: Error): boolean;
|
|
117
|
+
/**
|
|
118
|
+
* Options for runAgentWithRetry
|
|
119
|
+
*/
|
|
120
|
+
export interface RunAgentWithRetryOptions {
|
|
121
|
+
/** Backoff delay between retries in ms. Default: 5000 (short because process already waited for timeout) */
|
|
122
|
+
backoffMs?: number;
|
|
123
|
+
/** Logger for retry messages */
|
|
124
|
+
logger?: pino.Logger;
|
|
125
|
+
/** Max retry attempts. 0 = no retry (preserves existing behavior). Default: 0 */
|
|
126
|
+
maxRetries?: number;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Execute an AI agent with automatic retry on timeout/killed failures
|
|
130
|
+
*
|
|
131
|
+
* Bridges the contract gap: runAgent() returns AgentResult (never throws),
|
|
132
|
+
* but RetryStrategy expects operations that throw. This adapter:
|
|
133
|
+
* 1. Wraps runAgent in a throwing closure for RetryStrategy
|
|
134
|
+
* 2. Catches exhausted retries and returns the last AgentResult (never throws)
|
|
135
|
+
* 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
|
|
136
|
+
*
|
|
137
|
+
* @param agentRunner - The AI provider runner to execute
|
|
138
|
+
* @param prompt - The prompt to send
|
|
139
|
+
* @param options - Agent execution options (timeout, agentType, etc.)
|
|
140
|
+
* @param retryOptions - Retry configuration
|
|
141
|
+
* @returns AgentResult (same contract as runAgent — never throws)
|
|
142
|
+
*/
|
|
143
|
+
export declare function runAgentWithRetry(agentRunner: AIProviderRunner, prompt: string, options: Omit<AgentOptions, 'prompt'>, retryOptions?: RunAgentWithRetryOptions): Promise<AgentResult>;
|
package/dist/utils/retry.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Provides configurable retry logic with exponential backoff for handling
|
|
5
5
|
* transient failures gracefully.
|
|
6
6
|
*/
|
|
7
|
+
import { AgentError } from './errors.js';
|
|
7
8
|
import { createLogger } from './logger.js';
|
|
8
9
|
/**
|
|
9
10
|
* Retry strategy implementation with configurable exponential backoff
|
|
@@ -158,3 +159,50 @@ export function isClaudeCliRetryable(error) {
|
|
|
158
159
|
// Default to not retryable if we can't determine exit code
|
|
159
160
|
return false;
|
|
160
161
|
}
|
|
162
|
+
/**
|
|
163
|
+
* Execute an AI agent with automatic retry on timeout/killed failures
|
|
164
|
+
*
|
|
165
|
+
* Bridges the contract gap: runAgent() returns AgentResult (never throws),
|
|
166
|
+
* but RetryStrategy expects operations that throw. This adapter:
|
|
167
|
+
* 1. Wraps runAgent in a throwing closure for RetryStrategy
|
|
168
|
+
* 2. Catches exhausted retries and returns the last AgentResult (never throws)
|
|
169
|
+
* 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
|
|
170
|
+
*
|
|
171
|
+
* @param agentRunner - The AI provider runner to execute
|
|
172
|
+
* @param prompt - The prompt to send
|
|
173
|
+
* @param options - Agent execution options (timeout, agentType, etc.)
|
|
174
|
+
* @param retryOptions - Retry configuration
|
|
175
|
+
* @returns AgentResult (same contract as runAgent — never throws)
|
|
176
|
+
*/
|
|
177
|
+
export async function runAgentWithRetry(agentRunner, prompt, options, retryOptions = {}) {
|
|
178
|
+
const maxRetries = retryOptions.maxRetries ?? 0;
|
|
179
|
+
// Short-circuit: no retries configured — preserve exact existing behavior
|
|
180
|
+
if (maxRetries <= 0) {
|
|
181
|
+
return agentRunner.runAgent(prompt, options);
|
|
182
|
+
}
|
|
183
|
+
let lastResult;
|
|
184
|
+
const strategy = new RetryStrategy({
|
|
185
|
+
backoffMs: retryOptions.backoffMs ?? 5_000,
|
|
186
|
+
backoffMultiplier: 1.5,
|
|
187
|
+
isRetryable: isClaudeCliRetryable,
|
|
188
|
+
logger: retryOptions.logger,
|
|
189
|
+
maxRetries,
|
|
190
|
+
});
|
|
191
|
+
try {
|
|
192
|
+
return await strategy.execute(async () => {
|
|
193
|
+
lastResult = await agentRunner.runAgent(prompt, options);
|
|
194
|
+
if (!lastResult.success) {
|
|
195
|
+
throw new AgentError(`Agent ${lastResult.agentType} failed with exit code ${lastResult.exitCode}`, {
|
|
196
|
+
agentType: lastResult.agentType,
|
|
197
|
+
exitCode: lastResult.exitCode,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
return lastResult;
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
catch {
|
|
204
|
+
// All retries exhausted or non-retryable error — return the last AgentResult
|
|
205
|
+
// This preserves the contract: runAgentWithRetry never throws, just like runAgent
|
|
206
|
+
return lastResult;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
@@ -26,4 +26,7 @@ export declare const agentFlags: {
|
|
|
26
26
|
model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
27
27
|
provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
|
|
28
28
|
task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
|
|
29
|
+
timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
30
|
+
'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
31
|
+
'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
|
|
29
32
|
};
|
|
@@ -44,4 +44,19 @@ export const agentFlags = {
|
|
|
44
44
|
description: 'Override which task command to execute (e.g., develop-story, draft, review-implementation). Defaults to command-appropriate task.',
|
|
45
45
|
helpGroup: 'Agent Customization',
|
|
46
46
|
}),
|
|
47
|
+
timeout: Flags.integer({
|
|
48
|
+
default: 2_700_000,
|
|
49
|
+
description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
|
|
50
|
+
helpGroup: 'Resilience',
|
|
51
|
+
}),
|
|
52
|
+
'max-retries': Flags.integer({
|
|
53
|
+
default: 0,
|
|
54
|
+
description: 'Max retries for timeout/killed agent failures (0 = no retry)',
|
|
55
|
+
helpGroup: 'Resilience',
|
|
56
|
+
}),
|
|
57
|
+
'retry-backoff': Flags.integer({
|
|
58
|
+
default: 5000,
|
|
59
|
+
description: 'Backoff delay between retries in milliseconds',
|
|
60
|
+
helpGroup: 'Resilience',
|
|
61
|
+
}),
|
|
47
62
|
};
|
package/package.json
CHANGED