@hyperdrive.bot/bmad-workflow 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -203,7 +203,7 @@ export default class Decompose extends Command {
203
203
  }
204
204
  }
205
205
  this.log(colors.bold('\n⚙️ Phase 3: Executing task graph...\n'));
206
- const executor = new DependencyGraphExecutor(taskGraph, createAgentRunner(flags.provider, this.logger), new BatchProcessor(flags['max-parallel'], 0, this.logger), this.fileManager, this.logger, flags.cwd);
206
+ const executor = new DependencyGraphExecutor(taskGraph, createAgentRunner(flags.provider, this.logger), new BatchProcessor(flags['max-parallel'], 0, this.logger), this.fileManager, this.logger, flags.cwd, flags.model);
207
207
  const executionResult = await executor.execute((layerIndex, totalLayers, layerSize) => {
208
208
  this.log(colors.info(`\n🔄 Starting Layer ${layerIndex + 1}/${totalLayers} (${layerSize} task${layerSize > 1 ? 's' : ''} in parallel)`));
209
209
  }, (taskId, _layerIndex, _taskIndex, _totalTasks) => {
@@ -40,6 +40,9 @@ export default class EpicsCreate extends Command {
40
40
  model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
41
41
  provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
42
42
  task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
43
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
44
+ 'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
45
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
43
46
  };
44
47
  /**
45
48
  * AI agent runner service (Claude or Gemini)
@@ -22,6 +22,7 @@ import * as colors from '../../utils/colors.js';
22
22
  import { ValidationError } from '../../utils/errors.js';
23
23
  import { createLogger, generateCorrelationId } from '../../utils/logger.js';
24
24
  import { createSpinner } from '../../utils/progress.js';
25
+ import { runAgentWithRetry } from '../../utils/retry.js';
25
26
  import { agentFlags } from '../../utils/shared-flags.js';
26
27
  /**
27
28
  * Epics Create Command
@@ -136,10 +137,13 @@ export default class EpicsCreate extends Command {
136
137
  epicDir,
137
138
  epics: toCreate,
138
139
  interval: flags.interval,
140
+ maxRetries: flags['max-retries'],
139
141
  prdPath: args['prd-path'],
140
142
  prefix: flags.prefix || '',
141
143
  references: flags.reference,
144
+ retryBackoff: flags['retry-backoff'],
142
145
  task: flags.task,
146
+ timeout: flags.timeout,
143
147
  });
144
148
  // Display summary
145
149
  this.displaySummary({
@@ -208,7 +212,7 @@ export default class EpicsCreate extends Command {
208
212
  * Create epic files using Claude AI agents
209
213
  */
210
214
  async createEpics(options) {
211
- const { agent, epicDir, epics, interval, prdPath, prefix, references, task } = options;
215
+ const { agent, epicDir, epics, interval, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
212
216
  const results = [];
213
217
  /* eslint-disable no-await-in-loop */
214
218
  for (let i = 0; i < epics.length; i++) {
@@ -217,10 +221,13 @@ export default class EpicsCreate extends Command {
217
221
  agent,
218
222
  epic,
219
223
  epicDir,
224
+ maxRetries,
220
225
  prdPath,
221
226
  prefix,
222
227
  references,
228
+ retryBackoff,
223
229
  task,
230
+ timeout,
224
231
  });
225
232
  results.push(result);
226
233
  // Wait interval if not last epic
@@ -235,7 +242,7 @@ export default class EpicsCreate extends Command {
235
242
  * Create a single epic file
236
243
  */
237
244
  async createSingleEpic(options) {
238
- const { agent, epic, epicDir, prdPath, prefix, references, task } = options;
245
+ const { agent, epic, epicDir, maxRetries, prdPath, prefix, references, retryBackoff, task, timeout } = options;
239
246
  const fileName = this.generateEpicFileName(epic, prefix);
240
247
  const filePath = path.join(epicDir, fileName);
241
248
  const spinner = createSpinner(`Creating Epic ${epic.number}: ${epic.title}...`);
@@ -262,10 +269,14 @@ export default class EpicsCreate extends Command {
262
269
  });
263
270
  // Step 3: Run Claude AI agent to populate epic content sections
264
271
  spinner.text = `Populating epic ${epic.number} with AI agent...`;
265
- const result = await this.agentRunner.runAgent(prompt, {
272
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
266
273
  agentType: 'sm',
267
274
  references,
268
- timeout: 1_800_000, // 30 minutes
275
+ timeout: timeout ?? 2_700_000,
276
+ }, {
277
+ backoffMs: retryBackoff,
278
+ logger: this.logger,
279
+ maxRetries,
269
280
  });
270
281
  if (!result.success) {
271
282
  throw new Error(result.errors || 'Claude agent failed to populate epic');
@@ -38,6 +38,9 @@ export default class StoriesCreateCommand extends Command {
38
38
  model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
39
39
  provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
40
40
  task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
41
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
42
+ 'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
43
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
41
44
  };
42
45
  private agentRunner;
43
46
  private batchProcessor;
@@ -24,6 +24,7 @@ import * as colors from '../../utils/colors.js';
24
24
  import { ValidationError } from '../../utils/errors.js';
25
25
  import { createLogger, generateCorrelationId } from '../../utils/logger.js';
26
26
  import { createSpinner } from '../../utils/progress.js';
27
+ import { runAgentWithRetry } from '../../utils/retry.js';
27
28
  import { agentFlags } from '../../utils/shared-flags.js';
28
29
  /**
29
30
  * Stories Create Command
@@ -220,10 +221,13 @@ export default class StoriesCreateCommand extends Command {
220
221
  const processor = async (story) => this.createStory({
221
222
  agent: flags.agent,
222
223
  epicPath,
224
+ maxRetries: flags['max-retries'],
223
225
  prefix: flags.prefix,
224
226
  references: flags.reference,
227
+ retryBackoff: flags['retry-backoff'],
225
228
  story,
226
229
  task: flags.task,
230
+ timeout: flags.timeout,
227
231
  });
228
232
  // Progress callback for batch updates
229
233
  const onProgress = (info) => {
@@ -266,7 +270,7 @@ export default class StoriesCreateCommand extends Command {
266
270
  * Create a single story file
267
271
  */
268
272
  async createStory(options) {
269
- const { agent, epicPath, prefix, references, story, task } = options;
273
+ const { agent, epicPath, maxRetries, prefix, references, retryBackoff, story, task, timeout } = options;
270
274
  const storyDir = this.pathResolver.getStoryDir();
271
275
  const filename = this.generateStoryFilename(story, prefix);
272
276
  const filePath = path.join(storyDir, filename);
@@ -290,10 +294,14 @@ export default class StoriesCreateCommand extends Command {
290
294
  storyFilePath: absolutePath,
291
295
  task,
292
296
  });
293
- // Step 3: Run Claude agent
294
- const result = await this.agentRunner.runAgent(prompt, {
297
+ // Step 3: Run Claude agent with retry on timeout/killed
298
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
295
299
  agentType: 'sm',
296
- timeout: 1_800_000, // 30 minutes
300
+ timeout: timeout ?? 2_700_000,
301
+ }, {
302
+ backoffMs: retryBackoff,
303
+ logger: this.logger,
304
+ maxRetries,
297
305
  });
298
306
  // Step 4: Verify file was updated
299
307
  if (result.success) {
@@ -40,6 +40,9 @@ export default class StoriesDevelopCommand extends Command {
40
40
  model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
41
41
  provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
42
42
  task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
43
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
44
+ 'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
45
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
43
46
  };
44
47
  private agentRunner;
45
48
  private fileManager;
@@ -25,12 +25,8 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
25
25
  import * as colors from '../../utils/colors.js';
26
26
  import { createLogger, generateCorrelationId } from '../../utils/logger.js';
27
27
  import { createSpinner } from '../../utils/progress.js';
28
+ import { runAgentWithRetry } from '../../utils/retry.js';
28
29
  import { agentFlags } from '../../utils/shared-flags.js';
29
- /**
30
- * Agent timeout in milliseconds (30 minutes)
31
- * Dev agents need sufficient time for comprehensive implementation
32
- */
33
- const DEV_AGENT_TIMEOUT_MS = 1_800_000;
34
30
  /**
35
31
  * Stories Develop Command
36
32
  *
@@ -251,10 +247,13 @@ export default class StoriesDevelopCommand extends Command {
251
247
  const result = await this.developStory({
252
248
  agent: flags.agent,
253
249
  cwd: flags.cwd,
250
+ maxRetries: flags['max-retries'],
254
251
  references: flags.reference,
252
+ retryBackoff: flags['retry-backoff'],
255
253
  storyMetadata,
256
254
  storyPath,
257
255
  task: flags.task,
256
+ timeout: flags.timeout,
258
257
  });
259
258
  results.push(result);
260
259
  if (result.success) {
@@ -275,7 +274,7 @@ export default class StoriesDevelopCommand extends Command {
275
274
  * Develop a single story
276
275
  */
277
276
  async developStory(options) {
278
- const { agent, cwd, references, storyMetadata, storyPath, task } = options;
277
+ const { agent, cwd, maxRetries, references, retryBackoff, storyMetadata, storyPath, task, timeout } = options;
279
278
  const storyNumber = isEpicStory(storyMetadata) ? storyMetadata.number : storyMetadata.id;
280
279
  this.logger.info({ storyNumber, storyPath }, 'Starting story development');
281
280
  try {
@@ -305,11 +304,15 @@ export default class StoriesDevelopCommand extends Command {
305
304
  storyPath,
306
305
  task,
307
306
  });
308
- // Step 4: Run Claude dev agent
307
+ // Step 4: Run Claude dev agent with retry on timeout/killed
309
308
  this.logger.info({ storyNumber }, 'Running Claude dev agent');
310
- const result = await this.agentRunner.runAgent(prompt, {
309
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
311
310
  agentType: 'dev',
312
- timeout: DEV_AGENT_TIMEOUT_MS,
311
+ timeout: timeout ?? 2_700_000,
312
+ }, {
313
+ backoffMs: retryBackoff,
314
+ logger: this.logger,
315
+ maxRetries,
313
316
  });
314
317
  if (!result.success) {
315
318
  throw new Error(`Dev agent failed: ${result.errors}`);
@@ -36,6 +36,9 @@ export default class StoriesQaCommand extends Command {
36
36
  provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
37
37
  'qa-prompt': import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
38
38
  reference: import("@oclif/core/interfaces").OptionFlag<string[] | undefined, import("@oclif/core/interfaces").CustomOptions>;
39
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
40
+ 'agent-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
41
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
39
42
  };
40
43
  private agentRunner;
41
44
  private fileManager;
@@ -23,11 +23,7 @@ import { StoryParserFactory } from '../../services/parsers/story-parser-factory.
23
23
  import * as colors from '../../utils/colors.js';
24
24
  import { createLogger, generateCorrelationId } from '../../utils/logger.js';
25
25
  import { createSpinner } from '../../utils/progress.js';
26
- /**
27
- * Agent timeout in milliseconds (30 minutes)
28
- * QA and Dev agents need longer timeouts for comprehensive analysis
29
- */
30
- const AGENT_TIMEOUT_MS = 1_800_000;
26
+ import { runAgentWithRetry } from '../../utils/retry.js';
31
27
  /**
32
28
  * Stories QA Command
33
29
  *
@@ -89,6 +85,21 @@ export default class StoriesQaCommand extends Command {
89
85
  description: 'Additional context files for agents',
90
86
  multiple: true,
91
87
  }),
88
+ timeout: Flags.integer({
89
+ default: 2_700_000,
90
+ description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
91
+ helpGroup: 'Resilience',
92
+ }),
93
+ 'agent-retries': Flags.integer({
94
+ default: 0,
95
+ description: 'Max retries for timeout/killed agent failures (0 = no retry)',
96
+ helpGroup: 'Resilience',
97
+ }),
98
+ 'retry-backoff': Flags.integer({
99
+ default: 5000,
100
+ description: 'Backoff delay between retries in milliseconds',
101
+ helpGroup: 'Resilience',
102
+ }),
92
103
  };
93
104
  // Service instances
94
105
  agentRunner;
@@ -446,10 +457,17 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
446
457
  try {
447
458
  // Phase 1: Initial QA Deep Dive
448
459
  this.log(colors.info(' Phase 1: QA Deep Dive Review...'));
460
+ const agentTimeout = flags.timeout ?? 2_700_000;
461
+ const agentRetries = flags['agent-retries'];
462
+ const retryBackoff = flags['retry-backoff'];
449
463
  const qaPrompt = this.buildQaPrompt(storyPath, flags['qa-prompt'], flags.reference);
450
- const qaResult = await this.agentRunner.runAgent(qaPrompt, {
464
+ const qaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
451
465
  agentType: 'tea',
452
- timeout: AGENT_TIMEOUT_MS,
466
+ timeout: agentTimeout,
467
+ }, {
468
+ backoffMs: retryBackoff,
469
+ logger: this.logger,
470
+ maxRetries: agentRetries,
453
471
  });
454
472
  if (!qaResult.success) {
455
473
  throw new Error(`QA agent failed: ${qaResult.errors}`);
@@ -465,9 +483,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
465
483
  // Run Dev agent to fix issues (sequential retry loop by design)
466
484
  const devPrompt = this.buildDevFixPrompt(storyPath, flags['dev-prompt'], flags.reference);
467
485
  // eslint-disable-next-line no-await-in-loop
468
- const devResult = await this.agentRunner.runAgent(devPrompt, {
486
+ const devResult = await runAgentWithRetry(this.agentRunner, devPrompt, {
469
487
  agentType: 'dev',
470
- timeout: AGENT_TIMEOUT_MS,
488
+ timeout: agentTimeout,
489
+ }, {
490
+ backoffMs: retryBackoff,
491
+ logger: this.logger,
492
+ maxRetries: agentRetries,
471
493
  });
472
494
  if (!devResult.success) {
473
495
  this.logger.warn({ errors: devResult.errors, retriesUsed }, 'Dev fix-forward failed, continuing...');
@@ -476,9 +498,13 @@ ${result.finalGate === 'PASS' ? '3. [x] Final QA Validation - PASSED' : result.f
476
498
  // Phase 3: Re-run QA to validate fixes
477
499
  this.log(colors.info(` Phase 3: QA Re-validation (Retry ${retriesUsed})...`));
478
500
  // eslint-disable-next-line no-await-in-loop
479
- const reQaResult = await this.agentRunner.runAgent(qaPrompt, {
501
+ const reQaResult = await runAgentWithRetry(this.agentRunner, qaPrompt, {
480
502
  agentType: 'tea',
481
- timeout: AGENT_TIMEOUT_MS,
503
+ timeout: agentTimeout,
504
+ }, {
505
+ backoffMs: retryBackoff,
506
+ logger: this.logger,
507
+ maxRetries: agentRetries,
482
508
  });
483
509
  if (!reQaResult.success) {
484
510
  this.logger.warn({ errors: reQaResult.errors, retriesUsed }, 'QA re-validation failed, continuing...');
@@ -42,6 +42,9 @@ export default class Workflow extends Command {
42
42
  'skip-epics': import("@oclif/core/interfaces").BooleanFlag<boolean>;
43
43
  'skip-stories': import("@oclif/core/interfaces").BooleanFlag<boolean>;
44
44
  'story-interval': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
45
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
46
+ 'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
47
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
45
48
  verbose: import("@oclif/core/interfaces").BooleanFlag<boolean>;
46
49
  };
47
50
  private cancelled;
@@ -118,6 +118,18 @@ export default class Workflow extends Command {
118
118
  default: 60,
119
119
  description: 'Seconds between story development',
120
120
  }),
121
+ timeout: Flags.integer({
122
+ default: 2_700_000,
123
+ description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
124
+ }),
125
+ 'max-retries': Flags.integer({
126
+ default: 0,
127
+ description: 'Max retries for timeout/killed agent failures (0 = no retry)',
128
+ }),
129
+ 'retry-backoff': Flags.integer({
130
+ default: 5000,
131
+ description: 'Backoff delay between retries in milliseconds',
132
+ }),
121
133
  verbose: Flags.boolean({
122
134
  char: 'v',
123
135
  default: false,
@@ -154,6 +166,7 @@ export default class Workflow extends Command {
154
166
  dryRun: flags['dry-run'],
155
167
  epicInterval: flags['epic-interval'],
156
168
  input: args.input,
169
+ maxRetries: flags['max-retries'],
157
170
  model: flags.model,
158
171
  parallel: flags.parallel,
159
172
  pipeline: flags.pipeline,
@@ -164,10 +177,12 @@ export default class Workflow extends Command {
164
177
  qaPrompt: flags['qa-prompt'],
165
178
  qaRetries: flags['qa-retries'],
166
179
  references: flags.reference || [],
180
+ retryBackoffMs: flags['retry-backoff'],
167
181
  skipDev: flags['skip-dev'],
168
182
  skipEpics: flags['skip-epics'],
169
183
  skipStories: flags['skip-stories'],
170
184
  storyInterval: flags['story-interval'],
185
+ timeout: flags.timeout,
171
186
  verbose: flags.verbose,
172
187
  };
173
188
  // Log configuration if verbose
@@ -147,6 +147,21 @@ export interface WorkflowConfig {
147
147
  * @default 60
148
148
  */
149
149
  storyInterval: number;
150
+ /**
151
+ * Agent execution timeout in milliseconds
152
+ * @default 2_700_000 (45 minutes)
153
+ */
154
+ timeout?: number;
155
+ /**
156
+ * Maximum retries for timeout/killed agent failures
157
+ * @default 0 (no retries, backward compatible)
158
+ */
159
+ maxRetries?: number;
160
+ /**
161
+ * Backoff delay between retries in milliseconds
162
+ * @default 5000 (5 seconds)
163
+ */
164
+ retryBackoffMs?: number;
150
165
  /**
151
166
  * Detailed output mode
152
167
  *
@@ -27,8 +27,9 @@ export declare class DependencyGraphExecutor {
27
27
  private readonly cwd?;
28
28
  private readonly fileManager;
29
29
  private readonly logger;
30
+ private readonly model?;
30
31
  private readonly taskGraph;
31
- constructor(taskGraph: TaskGraph, agentRunner: AIProviderRunner, batchProcessor: BatchProcessor, fileManager: FileManager, logger: pino.Logger, cwd?: string);
32
+ constructor(taskGraph: TaskGraph, agentRunner: AIProviderRunner, batchProcessor: BatchProcessor, fileManager: FileManager, logger: pino.Logger, cwd?: string, model?: string);
32
33
  /**
33
34
  * Execute the entire task graph
34
35
  *
@@ -14,15 +14,17 @@ export class DependencyGraphExecutor {
14
14
  cwd;
15
15
  fileManager;
16
16
  logger;
17
+ model;
17
18
  taskGraph;
18
19
  // eslint-disable-next-line max-params -- Constructor dependencies will be refactored to config object in future
19
- constructor(taskGraph, agentRunner, batchProcessor, fileManager, logger, cwd) {
20
+ constructor(taskGraph, agentRunner, batchProcessor, fileManager, logger, cwd, model) {
20
21
  this.taskGraph = taskGraph;
21
22
  this.agentRunner = agentRunner;
22
23
  this.batchProcessor = batchProcessor;
23
24
  this.fileManager = fileManager;
24
25
  this.logger = logger;
25
26
  this.cwd = cwd;
27
+ this.model = model;
26
28
  }
27
29
  /**
28
30
  * Execute the entire task graph
@@ -306,6 +308,7 @@ Use the file at the path above to document:
306
308
  // Execute agent
307
309
  const result = await this.agentRunner.runAgent(fullPrompt, {
308
310
  agentType,
311
+ model: this.model,
309
312
  references: task.targetFiles, // Pass target files as references
310
313
  timeout: task.estimatedMinutes * 60 * 1000 * 1.5, // 1.5x estimated time as buffer
311
314
  });
@@ -507,6 +507,7 @@ YOUR RESPONSE MUST START WITH "masterPrompt: |" - NO OTHER TEXT ALLOWED!`;
507
507
  this.logger.info('Asking Claude to fix YAML errors');
508
508
  const fixResult = await this.agentRunner.runAgent(fixPrompt, {
509
509
  agentType: 'architect',
510
+ model: options.model,
510
511
  timeout: 60_000, // 1 minute for fix
511
512
  });
512
513
  if (!fixResult.success) {
@@ -45,6 +45,7 @@
45
45
  */
46
46
  import { isEpicStory } from '../../models/story.js';
47
47
  import { ParserError, ValidationError } from '../../utils/errors.js';
48
+ import { runAgentWithRetry } from '../../utils/retry.js';
48
49
  import { PrdFixer } from '../parsers/prd-fixer.js';
49
50
  import { FileScaffolder } from '../scaffolding/file-scaffolder.js';
50
51
  import { BatchProcessor } from './batch-processor.js';
@@ -491,11 +492,15 @@ Write output to: ${outputPath}`;
491
492
  prompt += '\n';
492
493
  }
493
494
  prompt += '*yolo mode*\n';
494
- // Execute dev agent
495
- const result = await this.agentRunner.runAgent(prompt, {
495
+ // Execute dev agent with retry on timeout/killed
496
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
496
497
  agentType: 'dev',
497
498
  references: config.references,
498
- timeout: 1_800_000, // 30 minutes
499
+ timeout: config.timeout ?? 2_700_000,
500
+ }, {
501
+ backoffMs: config.retryBackoffMs,
502
+ logger: this.logger,
503
+ maxRetries: config.maxRetries,
499
504
  });
500
505
  if (result.success) {
501
506
  // Update story status to Done
@@ -688,10 +693,14 @@ Write output to: ${outputPath}`;
688
693
  prompt += '\n';
689
694
  }
690
695
  prompt += '*yolo mode*\n';
691
- const result = await this.agentRunner.runAgent(prompt, {
696
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
692
697
  agentType: 'dev',
693
698
  references: config.references,
694
- timeout: 1_800_000, // 30 minutes
699
+ timeout: config.timeout ?? 2_700_000,
700
+ }, {
701
+ backoffMs: config.retryBackoffMs,
702
+ logger: this.logger,
703
+ maxRetries: config.maxRetries,
695
704
  });
696
705
  if (result.success) {
697
706
  // Update story status to Done
@@ -898,10 +907,14 @@ Write output to: ${outputPath}`;
898
907
  }, 'Claude Prompt (Epic)');
899
908
  }
900
909
  // Step 3: Run Claude agent to populate content sections
901
- const result = await this.agentRunner.runAgent(prompt, {
910
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
902
911
  agentType: 'architect',
903
912
  references: config.references,
904
- timeout: 1_800_000, // 30 minutes
913
+ timeout: config.timeout ?? 2_700_000,
914
+ }, {
915
+ backoffMs: config.retryBackoffMs,
916
+ logger: this.logger,
917
+ maxRetries: config.maxRetries,
905
918
  });
906
919
  // Log output if verbose
907
920
  if (config.verbose) {
@@ -1457,10 +1470,14 @@ Write output to: ${outputPath}`;
1457
1470
  }, 'Claude Prompt (Story)');
1458
1471
  }
1459
1472
  // Step 4: Run Claude agent to populate content sections
1460
- const result = await this.agentRunner.runAgent(prompt, {
1473
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
1461
1474
  agentType: 'sm',
1462
1475
  references: config.references,
1463
- timeout: 1_800_000, // 30 minutes
1476
+ timeout: config.timeout ?? 2_700_000,
1477
+ }, {
1478
+ backoffMs: config.retryBackoffMs,
1479
+ logger: this.logger,
1480
+ maxRetries: config.maxRetries,
1464
1481
  });
1465
1482
  // Log output if verbose
1466
1483
  if (config.verbose) {
@@ -1684,10 +1701,14 @@ Write output to: ${outputPath}`;
1684
1701
  }, 'Claude Prompt (Story)');
1685
1702
  }
1686
1703
  // Step 4: Run Claude agent to populate content sections
1687
- const result = await this.agentRunner.runAgent(prompt, {
1704
+ const result = await runAgentWithRetry(this.agentRunner, prompt, {
1688
1705
  agentType: 'sm',
1689
1706
  references: config.references,
1690
- timeout: 1_800_000, // 30 minutes
1707
+ timeout: config.timeout ?? 2_700_000,
1708
+ }, {
1709
+ backoffMs: config.retryBackoffMs,
1710
+ logger: this.logger,
1711
+ maxRetries: config.maxRetries,
1691
1712
  });
1692
1713
  // Log output if verbose
1693
1714
  if (config.verbose) {
@@ -5,6 +5,8 @@
5
5
  * transient failures gracefully.
6
6
  */
7
7
  import type pino from 'pino';
8
+ import type { AgentOptions, AgentResult } from '../models/index.js';
9
+ import type { AIProviderRunner } from '../services/agents/agent-runner.js';
8
10
  /**
9
11
  * Options for configuring retry behavior
10
12
  */
@@ -112,3 +114,30 @@ export declare class RetryStrategy {
112
114
  * })
113
115
  */
114
116
  export declare function isClaudeCliRetryable(error: Error): boolean;
117
+ /**
118
+ * Options for runAgentWithRetry
119
+ */
120
+ export interface RunAgentWithRetryOptions {
121
+ /** Backoff delay between retries in ms. Default: 5000 (short because process already waited for timeout) */
122
+ backoffMs?: number;
123
+ /** Logger for retry messages */
124
+ logger?: pino.Logger;
125
+ /** Max retry attempts. 0 = no retry (preserves existing behavior). Default: 0 */
126
+ maxRetries?: number;
127
+ }
128
+ /**
129
+ * Execute an AI agent with automatic retry on timeout/killed failures
130
+ *
131
+ * Bridges the contract gap: runAgent() returns AgentResult (never throws),
132
+ * but RetryStrategy expects operations that throw. This adapter:
133
+ * 1. Wraps runAgent in a throwing closure for RetryStrategy
134
+ * 2. Catches exhausted retries and returns the last AgentResult (never throws)
135
+ * 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
136
+ *
137
+ * @param agentRunner - The AI provider runner to execute
138
+ * @param prompt - The prompt to send
139
+ * @param options - Agent execution options (timeout, agentType, etc.)
140
+ * @param retryOptions - Retry configuration
141
+ * @returns AgentResult (same contract as runAgent — never throws)
142
+ */
143
+ export declare function runAgentWithRetry(agentRunner: AIProviderRunner, prompt: string, options: Omit<AgentOptions, 'prompt'>, retryOptions?: RunAgentWithRetryOptions): Promise<AgentResult>;
@@ -4,6 +4,7 @@
4
4
  * Provides configurable retry logic with exponential backoff for handling
5
5
  * transient failures gracefully.
6
6
  */
7
+ import { AgentError } from './errors.js';
7
8
  import { createLogger } from './logger.js';
8
9
  /**
9
10
  * Retry strategy implementation with configurable exponential backoff
@@ -158,3 +159,50 @@ export function isClaudeCliRetryable(error) {
158
159
  // Default to not retryable if we can't determine exit code
159
160
  return false;
160
161
  }
162
+ /**
163
+ * Execute an AI agent with automatic retry on timeout/killed failures
164
+ *
165
+ * Bridges the contract gap: runAgent() returns AgentResult (never throws),
166
+ * but RetryStrategy expects operations that throw. This adapter:
167
+ * 1. Wraps runAgent in a throwing closure for RetryStrategy
168
+ * 2. Catches exhausted retries and returns the last AgentResult (never throws)
169
+ * 3. Short-circuits to plain runAgent when maxRetries <= 0 (zero overhead)
170
+ *
171
+ * @param agentRunner - The AI provider runner to execute
172
+ * @param prompt - The prompt to send
173
+ * @param options - Agent execution options (timeout, agentType, etc.)
174
+ * @param retryOptions - Retry configuration
175
+ * @returns AgentResult (same contract as runAgent — never throws)
176
+ */
177
+ export async function runAgentWithRetry(agentRunner, prompt, options, retryOptions = {}) {
178
+ const maxRetries = retryOptions.maxRetries ?? 0;
179
+ // Short-circuit: no retries configured — preserve exact existing behavior
180
+ if (maxRetries <= 0) {
181
+ return agentRunner.runAgent(prompt, options);
182
+ }
183
+ let lastResult;
184
+ const strategy = new RetryStrategy({
185
+ backoffMs: retryOptions.backoffMs ?? 5_000,
186
+ backoffMultiplier: 1.5,
187
+ isRetryable: isClaudeCliRetryable,
188
+ logger: retryOptions.logger,
189
+ maxRetries,
190
+ });
191
+ try {
192
+ return await strategy.execute(async () => {
193
+ lastResult = await agentRunner.runAgent(prompt, options);
194
+ if (!lastResult.success) {
195
+ throw new AgentError(`Agent ${lastResult.agentType} failed with exit code ${lastResult.exitCode}`, {
196
+ agentType: lastResult.agentType,
197
+ exitCode: lastResult.exitCode,
198
+ });
199
+ }
200
+ return lastResult;
201
+ });
202
+ }
203
+ catch {
204
+ // All retries exhausted or non-retryable error — return the last AgentResult
205
+ // This preserves the contract: runAgentWithRetry never throws, just like runAgent
206
+ return lastResult;
207
+ }
208
+ }
@@ -26,4 +26,7 @@ export declare const agentFlags: {
26
26
  model: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
27
27
  provider: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
28
28
  task: import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
29
+ timeout: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
30
+ 'max-retries': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
31
+ 'retry-backoff': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
29
32
  };
@@ -44,4 +44,19 @@ export const agentFlags = {
44
44
  description: 'Override which task command to execute (e.g., develop-story, draft, review-implementation). Defaults to command-appropriate task.',
45
45
  helpGroup: 'Agent Customization',
46
46
  }),
47
+ timeout: Flags.integer({
48
+ default: 2_700_000,
49
+ description: 'Agent execution timeout in milliseconds (default: 2700000 = 45 minutes)',
50
+ helpGroup: 'Resilience',
51
+ }),
52
+ 'max-retries': Flags.integer({
53
+ default: 0,
54
+ description: 'Max retries for timeout/killed agent failures (0 = no retry)',
55
+ helpGroup: 'Resilience',
56
+ }),
57
+ 'retry-backoff': Flags.integer({
58
+ default: 5000,
59
+ description: 'Backoff delay between retries in milliseconds',
60
+ helpGroup: 'Resilience',
61
+ }),
47
62
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@hyperdrive.bot/bmad-workflow",
3
3
  "description": "AI-driven development workflow orchestration CLI for BMAD projects",
4
- "version": "1.0.10",
4
+ "version": "1.0.12",
5
5
  "author": {
6
6
  "name": "DevSquad",
7
7
  "email": "marcelo@devsquad.email",