agent-relay 3.1.16 → 3.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  2. package/bin/agent-relay-broker-darwin-x64 +0 -0
  3. package/bin/agent-relay-broker-linux-arm64 +0 -0
  4. package/bin/agent-relay-broker-linux-x64 +0 -0
  5. package/dist/index.cjs +573 -32
  6. package/package.json +8 -8
  7. package/packages/acp-bridge/package.json +2 -2
  8. package/packages/config/package.json +1 -1
  9. package/packages/hooks/package.json +4 -4
  10. package/packages/memory/package.json +2 -2
  11. package/packages/openclaw/package.json +2 -2
  12. package/packages/policy/package.json +2 -2
  13. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts +16 -0
  14. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts.map +1 -0
  15. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +640 -0
  16. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -0
  17. package/packages/sdk/dist/client.d.ts +2 -0
  18. package/packages/sdk/dist/client.d.ts.map +1 -1
  19. package/packages/sdk/dist/client.js +2 -0
  20. package/packages/sdk/dist/client.js.map +1 -1
  21. package/packages/sdk/dist/protocol.d.ts +4 -0
  22. package/packages/sdk/dist/protocol.d.ts.map +1 -1
  23. package/packages/sdk/dist/workflows/cli.js +10 -0
  24. package/packages/sdk/dist/workflows/cli.js.map +1 -1
  25. package/packages/sdk/dist/workflows/runner.d.ts +31 -0
  26. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  27. package/packages/sdk/dist/workflows/runner.js +542 -31
  28. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  29. package/packages/sdk/dist/workflows/trajectory.d.ts +22 -1
  30. package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
  31. package/packages/sdk/dist/workflows/trajectory.js +55 -8
  32. package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
  33. package/packages/sdk/dist/workflows/validator.d.ts.map +1 -1
  34. package/packages/sdk/dist/workflows/validator.js +29 -0
  35. package/packages/sdk/dist/workflows/validator.js.map +1 -1
  36. package/packages/sdk/package.json +2 -2
  37. package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +778 -0
  38. package/packages/sdk/src/__tests__/workflow-runner.test.ts +484 -9
  39. package/packages/sdk/src/client.ts +4 -0
  40. package/packages/sdk/src/protocol.ts +4 -0
  41. package/packages/sdk/src/workflows/README.md +11 -0
  42. package/packages/sdk/src/workflows/cli.ts +10 -0
  43. package/packages/sdk/src/workflows/runner.ts +714 -33
  44. package/packages/sdk/src/workflows/trajectory.ts +89 -8
  45. package/packages/sdk/src/workflows/validator.ts +29 -0
  46. package/packages/sdk-py/pyproject.toml +1 -1
  47. package/packages/telemetry/package.json +1 -1
  48. package/packages/trajectory/package.json +2 -2
  49. package/packages/user-directory/package.json +2 -2
  50. package/packages/utils/package.json +2 -2
  51. package/relay-snippets/agent-relay-snippet.md +12 -0
@@ -72,7 +72,22 @@ export type WorkflowEvent =
72
72
  | { type: 'run:failed'; runId: string; error: string }
73
73
  | { type: 'run:cancelled'; runId: string }
74
74
  | { type: 'step:started'; runId: string; stepName: string }
75
+ | {
76
+ type: 'step:owner-assigned';
77
+ runId: string;
78
+ stepName: string;
79
+ ownerName: string;
80
+ specialistName: string;
81
+ }
75
82
  | { type: 'step:completed'; runId: string; stepName: string; output?: string }
83
+ | {
84
+ type: 'step:review-completed';
85
+ runId: string;
86
+ stepName: string;
87
+ reviewerName: string;
88
+ decision: 'approved' | 'rejected';
89
+ }
90
+ | { type: 'step:owner-timeout'; runId: string; stepName: string; ownerName: string }
76
91
  | { type: 'step:failed'; runId: string; stepName: string; error: string }
77
92
  | { type: 'step:skipped'; runId: string; stepName: string }
78
93
  | { type: 'step:retrying'; runId: string; stepName: string; attempt: number }
@@ -127,6 +142,30 @@ interface StepState {
127
142
  agent?: Agent;
128
143
  }
129
144
 
145
+ interface SupervisedStep {
146
+ specialist: AgentDefinition;
147
+ owner: AgentDefinition;
148
+ reviewer?: AgentDefinition;
149
+ }
150
+
151
+ interface SpawnedAgentInfo {
152
+ requestedName: string;
153
+ actualName: string;
154
+ agent: Agent;
155
+ }
156
+
157
+ interface SpawnAndWaitOptions {
158
+ agentNameSuffix?: string;
159
+ onSpawned?: (info: SpawnedAgentInfo) => void | Promise<void>;
160
+ onChunk?: (info: { agentName: string; chunk: string }) => void;
161
+ }
162
+
163
+ interface SupervisedRuntimeAgent {
164
+ stepName: string;
165
+ role: 'owner' | 'specialist';
166
+ logicalName: string;
167
+ }
168
+
130
169
  // ── CLI resolution ───────────────────────────────────────────────────────────
131
170
 
132
171
  /**
@@ -203,6 +242,8 @@ export class WorkflowRunner {
203
242
  private readonly lastIdleLog = new Map<string, number>();
204
243
  /** Tracks last logged activity type per agent to avoid duplicate status lines. */
205
244
  private readonly lastActivity = new Map<string, string>();
245
+ /** Runtime-name lookup for agents participating in supervised owner flows. */
246
+ private readonly supervisedRuntimeAgents = new Map<string, SupervisedRuntimeAgent>();
206
247
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
207
248
  private resolvedPaths = new Map<string, string>();
208
249
 
@@ -1375,6 +1416,16 @@ export class WorkflowRunner {
1375
1416
  const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
1376
1417
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1377
1418
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1419
+
1420
+ const supervision = this.supervisedRuntimeAgents.get(msg.from);
1421
+ if (supervision?.role === 'owner') {
1422
+ void this.trajectory?.ownerMonitoringEvent(
1423
+ supervision.stepName,
1424
+ supervision.logicalName,
1425
+ `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
1426
+ { to: msg.to, text: msg.text }
1427
+ );
1428
+ }
1378
1429
  };
1379
1430
 
1380
1431
  this.relay.onAgentSpawned = (agent) => {
@@ -1504,6 +1555,7 @@ export class WorkflowRunner {
1504
1555
  }
1505
1556
  this.lastIdleLog.clear();
1506
1557
  this.lastActivity.clear();
1558
+ this.supervisedRuntimeAgents.clear();
1507
1559
 
1508
1560
  this.log('Shutting down broker...');
1509
1561
  await this.relay?.shutdown();
@@ -2209,12 +2261,29 @@ export class WorkflowRunner {
2209
2261
  if (!rawAgentDef) {
2210
2262
  throw new Error(`Agent "${agentName}" not found in config`);
2211
2263
  }
2212
- const agentDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
2213
-
2214
- const maxRetries = step.retries ?? agentDef.constraints?.retries ?? errorHandling?.maxRetries ?? 0;
2264
+ const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
2265
+ const usesOwnerFlow = specialistDef.interactive !== false;
2266
+ const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
2267
+ const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
2268
+ const supervised: SupervisedStep = {
2269
+ specialist: specialistDef,
2270
+ owner: ownerDef,
2271
+ reviewer: reviewDef,
2272
+ };
2273
+ const usesDedicatedOwner = usesOwnerFlow && ownerDef.name !== specialistDef.name;
2274
+
2275
+ const maxRetries =
2276
+ step.retries ??
2277
+ ownerDef.constraints?.retries ??
2278
+ specialistDef.constraints?.retries ??
2279
+ errorHandling?.maxRetries ??
2280
+ 0;
2215
2281
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
2216
2282
  const timeoutMs =
2217
- step.timeoutMs ?? agentDef.constraints?.timeoutMs ?? this.currentConfig?.swarm?.timeoutMs;
2283
+ step.timeoutMs ??
2284
+ ownerDef.constraints?.timeoutMs ??
2285
+ specialistDef.constraints?.timeoutMs ??
2286
+ this.currentConfig?.swarm?.timeoutMs;
2218
2287
 
2219
2288
  let lastError: string | undefined;
2220
2289
 
@@ -2243,8 +2312,25 @@ export class WorkflowRunner {
2243
2312
  updatedAt: new Date().toISOString(),
2244
2313
  });
2245
2314
  this.emit({ type: 'step:started', runId, stepName: step.name });
2246
- this.postToChannel(`**[${step.name}]** Started (agent: ${agentDef.name})`);
2247
- await this.trajectory?.stepStarted(step, agentDef.name);
2315
+ this.postToChannel(
2316
+ `**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`
2317
+ );
2318
+ await this.trajectory?.stepStarted(step, ownerDef.name, {
2319
+ role: usesDedicatedOwner ? 'owner' : 'specialist',
2320
+ owner: ownerDef.name,
2321
+ specialist: specialistDef.name,
2322
+ reviewer: reviewDef?.name,
2323
+ });
2324
+ if (usesDedicatedOwner) {
2325
+ await this.trajectory?.stepSupervisionAssigned(step, supervised);
2326
+ }
2327
+ this.emit({
2328
+ type: 'step:owner-assigned',
2329
+ runId,
2330
+ stepName: step.name,
2331
+ ownerName: ownerDef.name,
2332
+ specialistName: specialistDef.name,
2333
+ });
2248
2334
 
2249
2335
  // Resolve step-output variables (e.g. {{steps.plan.output}}) at execution time
2250
2336
  const stepOutputContext = this.buildStepOutputContext(stepStates, runId);
@@ -2252,60 +2338,110 @@ export class WorkflowRunner {
2252
2338
 
2253
2339
  // If this is an interactive agent, append awareness of non-interactive workers
2254
2340
  // so the lead knows not to message them and to use step output chaining instead
2255
- if (agentDef.interactive !== false) {
2341
+ if (specialistDef.interactive !== false || ownerDef.interactive !== false) {
2256
2342
  const nonInteractiveInfo = this.buildNonInteractiveAwareness(agentMap, stepStates);
2257
2343
  if (nonInteractiveInfo) {
2258
2344
  resolvedTask += nonInteractiveInfo;
2259
2345
  }
2260
2346
  }
2261
2347
 
2262
- // Apply step-level workdir override to agent definition if present
2263
- let effectiveAgentDef = agentDef;
2264
- if (step.workdir) {
2265
- const stepWorkdir = this.resolveStepWorkdir(step);
2266
- if (stepWorkdir) {
2267
- effectiveAgentDef = { ...agentDef, cwd: stepWorkdir, workdir: undefined };
2348
+ // Apply step-level workdir override to agent definitions if present
2349
+ const applyStepWorkdir = (def: AgentDefinition): AgentDefinition => {
2350
+ if (step.workdir) {
2351
+ const stepWorkdir = this.resolveStepWorkdir(step);
2352
+ if (stepWorkdir) {
2353
+ return { ...def, cwd: stepWorkdir, workdir: undefined };
2354
+ }
2268
2355
  }
2356
+ return def;
2357
+ };
2358
+ const effectiveSpecialist = applyStepWorkdir(specialistDef);
2359
+ const effectiveOwner = applyStepWorkdir(ownerDef);
2360
+
2361
+ let specialistOutput: string;
2362
+ let ownerOutput: string;
2363
+ let ownerElapsed: number;
2364
+
2365
+ if (usesDedicatedOwner) {
2366
+ const result = await this.executeSupervisedAgentStep(
2367
+ step,
2368
+ { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef },
2369
+ resolvedTask,
2370
+ timeoutMs
2371
+ );
2372
+ specialistOutput = result.specialistOutput;
2373
+ ownerOutput = result.ownerOutput;
2374
+ ownerElapsed = result.ownerElapsed;
2375
+ } else {
2376
+ const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
2377
+
2378
+ this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2379
+ const resolvedStep = { ...step, task: ownerTask };
2380
+ const ownerStartTime = Date.now();
2381
+ const output = this.executor
2382
+ ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
2383
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
2384
+ ownerElapsed = Date.now() - ownerStartTime;
2385
+ this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
2386
+ if (usesOwnerFlow) {
2387
+ this.assertOwnerCompletionMarker(step, output, ownerTask);
2388
+ }
2389
+ specialistOutput = output;
2390
+ ownerOutput = output;
2269
2391
  }
2270
2392
 
2271
- // Spawn agent via AgentRelay
2272
- this.log(`[${step.name}] Spawning agent "${effectiveAgentDef.name}" (cli: ${effectiveAgentDef.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2273
- const resolvedStep = { ...step, task: resolvedTask };
2274
- const output = this.executor
2275
- ? await this.executor.executeAgentStep(resolvedStep, effectiveAgentDef, resolvedTask, timeoutMs)
2276
- : await this.spawnAndWait(effectiveAgentDef, resolvedStep, timeoutMs);
2277
- this.log(`[${step.name}] Agent "${agentDef.name}" exited`);
2278
-
2279
2393
  // Run verification if configured
2280
2394
  if (step.verification) {
2281
- this.runVerification(step.verification, output, step.name, resolvedTask);
2395
+ this.runVerification(step.verification, specialistOutput, step.name, resolvedTask);
2396
+ }
2397
+
2398
+ // Every interactive step gets a review pass; pick a dedicated reviewer when available.
2399
+ let combinedOutput = specialistOutput;
2400
+ if (usesOwnerFlow && reviewDef) {
2401
+ const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
2402
+ const reviewOutput = await this.runStepReviewGate(
2403
+ step,
2404
+ resolvedTask,
2405
+ specialistOutput,
2406
+ ownerOutput,
2407
+ ownerDef,
2408
+ reviewDef,
2409
+ remainingMs
2410
+ );
2411
+ combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
2282
2412
  }
2283
2413
 
2284
2414
  // Mark completed
2285
2415
  state.row.status = 'completed';
2286
- state.row.output = output;
2416
+ state.row.output = combinedOutput;
2287
2417
  state.row.completedAt = new Date().toISOString();
2288
2418
  await this.db.updateStep(state.row.id, {
2289
2419
  status: 'completed',
2290
- output,
2420
+ output: combinedOutput,
2291
2421
  completedAt: state.row.completedAt,
2292
2422
  updatedAt: new Date().toISOString(),
2293
2423
  });
2294
2424
 
2295
2425
  // Persist step output to disk so it survives restarts and is inspectable
2296
- await this.persistStepOutput(runId, step.name, output);
2426
+ await this.persistStepOutput(runId, step.name, combinedOutput);
2297
2427
 
2298
- this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2299
- await this.trajectory?.stepCompleted(step, output, attempt + 1);
2428
+ this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput });
2429
+ await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
2300
2430
  return;
2301
2431
  } catch (err) {
2302
2432
  lastError = err instanceof Error ? err.message : String(err);
2433
+ const ownerTimedOut = usesDedicatedOwner
2434
+ ? /\bowner timed out\b/i.test(lastError)
2435
+ : /\btimed out\b/i.test(lastError) && !lastError.includes(`${step.name}-review`);
2436
+ if (ownerTimedOut) {
2437
+ this.emit({ type: 'step:owner-timeout', runId, stepName: step.name, ownerName: ownerDef.name });
2438
+ }
2303
2439
  }
2304
2440
  }
2305
2441
 
2306
2442
  // All retries exhausted — record root-cause diagnosis and mark failed
2307
2443
  const nonInteractive =
2308
- agentDef.interactive === false || ['worker', 'reviewer', 'analyst'].includes(agentDef.preset ?? '');
2444
+ ownerDef.interactive === false || ['worker', 'reviewer', 'analyst'].includes(ownerDef.preset ?? '');
2309
2445
  const verificationValue =
2310
2446
  typeof step.verification === 'object' && 'value' in step.verification
2311
2447
  ? String(step.verification.value)
@@ -2322,6 +2458,543 @@ export class WorkflowRunner {
2322
2458
  );
2323
2459
  }
2324
2460
 
2461
+ private injectStepOwnerContract(
2462
+ step: WorkflowStep,
2463
+ resolvedTask: string,
2464
+ ownerDef: AgentDefinition,
2465
+ specialistDef: AgentDefinition
2466
+ ): string {
2467
+ if (ownerDef.interactive === false) return resolvedTask;
2468
+ const specialistNote =
2469
+ ownerDef.name === specialistDef.name
2470
+ ? ''
2471
+ : `Specialist intended for this step: "${specialistDef.name}" (${specialistDef.role ?? specialistDef.cli}).`;
2472
+ return (
2473
+ resolvedTask +
2474
+ '\n\n---\n' +
2475
+ `STEP OWNER CONTRACT:\n` +
2476
+ `- You are the accountable owner for step "${step.name}".\n` +
2477
+ (specialistNote ? `- ${specialistNote}\n` : '') +
2478
+ `- If you delegate, you must still verify completion yourself.\n` +
2479
+ `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
2480
+ `- Then self-terminate immediately with /exit.`
2481
+ );
2482
+ }
2483
+
2484
+ private buildOwnerSupervisorTask(
2485
+ step: WorkflowStep,
2486
+ originalTask: string,
2487
+ supervised: SupervisedStep,
2488
+ workerRuntimeName: string
2489
+ ): string {
2490
+ const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
2491
+ const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
2492
+ return (
2493
+ `You are the step owner/supervisor for step "${step.name}".\n\n` +
2494
+ `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
2495
+ `Task: ${originalTask}\n\n` +
2496
+ `Your job: Monitor the worker and determine when the task is complete.\n\n` +
2497
+ `How to verify completion:\n` +
2498
+ `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
2499
+ `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
2500
+ `- Ask the worker directly on ${channelLine} if you need a status update\n` +
2501
+ verificationGuide +
2502
+ `\nWhen you're satisfied the work is done correctly:\n` +
2503
+ `Output exactly: STEP_COMPLETE:${step.name}`
2504
+ );
2505
+ }
2506
+
2507
+ private buildSupervisorVerificationGuide(verification?: VerificationCheck): string {
2508
+ if (!verification) return '';
2509
+ switch (verification.type) {
2510
+ case 'output_contains':
2511
+ return `- Verification gate: confirm the worker output contains ${JSON.stringify(verification.value)}\n`;
2512
+ case 'file_exists':
2513
+ return `- Verification gate: confirm the file exists at ${JSON.stringify(verification.value)}\n`;
2514
+ case 'exit_code':
2515
+ return `- Verification gate: confirm the worker exits with code ${JSON.stringify(verification.value)}\n`;
2516
+ case 'custom':
2517
+ return `- Verification gate: apply the custom verification rule ${JSON.stringify(verification.value)}\n`;
2518
+ default:
2519
+ return '';
2520
+ }
2521
+ }
2522
+
2523
+ private async executeSupervisedAgentStep(
2524
+ step: WorkflowStep,
2525
+ supervised: SupervisedStep,
2526
+ resolvedTask: string,
2527
+ timeoutMs?: number
2528
+ ): Promise<{ specialistOutput: string; ownerOutput: string; ownerElapsed: number }> {
2529
+ if (this.executor) {
2530
+ const supervisorTask = this.buildOwnerSupervisorTask(
2531
+ step,
2532
+ resolvedTask,
2533
+ supervised,
2534
+ supervised.specialist.name
2535
+ );
2536
+ const specialistStep = { ...step, task: resolvedTask };
2537
+ const ownerStep: WorkflowStep = {
2538
+ ...step,
2539
+ name: `${step.name}-owner`,
2540
+ agent: supervised.owner.name,
2541
+ task: supervisorTask,
2542
+ };
2543
+
2544
+ this.log(
2545
+ `[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`
2546
+ );
2547
+ const specialistPromise = this.executor.executeAgentStep(
2548
+ specialistStep,
2549
+ supervised.specialist,
2550
+ resolvedTask,
2551
+ timeoutMs
2552
+ );
2553
+ // Guard against unhandled rejection if owner fails before specialist settles
2554
+ const specialistSettled = specialistPromise.catch(() => undefined);
2555
+
2556
+ try {
2557
+ const ownerStartTime = Date.now();
2558
+ const ownerOutput = await this.executor.executeAgentStep(
2559
+ ownerStep,
2560
+ supervised.owner,
2561
+ supervisorTask,
2562
+ timeoutMs
2563
+ );
2564
+ const ownerElapsed = Date.now() - ownerStartTime;
2565
+
2566
+ this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2567
+ const specialistOutput = await specialistPromise;
2568
+ return { specialistOutput, ownerOutput, ownerElapsed };
2569
+ } catch (error) {
2570
+ await specialistSettled;
2571
+ throw error;
2572
+ }
2573
+ }
2574
+
2575
+ let workerHandle: Agent | undefined;
2576
+ let workerRuntimeName = supervised.specialist.name;
2577
+ let workerSpawned = false;
2578
+ let workerReleased = false;
2579
+ let resolveWorkerSpawn!: () => void;
2580
+ let rejectWorkerSpawn!: (error: unknown) => void;
2581
+ const workerReady = new Promise<void>((resolve, reject) => {
2582
+ resolveWorkerSpawn = resolve;
2583
+ rejectWorkerSpawn = reject;
2584
+ });
2585
+
2586
+ const specialistStep = { ...step, task: resolvedTask };
2587
+ this.log(
2588
+ `[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`
2589
+ );
2590
+ const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2591
+ agentNameSuffix: 'worker',
2592
+ onSpawned: ({ actualName, agent }) => {
2593
+ workerHandle = agent;
2594
+ workerRuntimeName = actualName;
2595
+ this.supervisedRuntimeAgents.set(actualName, {
2596
+ stepName: step.name,
2597
+ role: 'specialist',
2598
+ logicalName: supervised.specialist.name,
2599
+ });
2600
+ if (!workerSpawned) {
2601
+ workerSpawned = true;
2602
+ resolveWorkerSpawn();
2603
+ }
2604
+ },
2605
+ onChunk: ({ agentName, chunk }) => {
2606
+ this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
2607
+ },
2608
+ }).catch((error) => {
2609
+ if (!workerSpawned) {
2610
+ workerSpawned = true;
2611
+ rejectWorkerSpawn(error);
2612
+ }
2613
+ throw error;
2614
+ });
2615
+
2616
+ const workerSettled = workerPromise.catch(() => undefined);
2617
+ workerPromise
2618
+ .then((output) => {
2619
+ workerReleased = true;
2620
+ this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
2621
+ if (step.verification?.type === 'output_contains' && output.includes(step.verification.value)) {
2622
+ this.postToChannel(
2623
+ `**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
2624
+ );
2625
+ }
2626
+ })
2627
+ .catch((error) => {
2628
+ const message = error instanceof Error ? error.message : String(error);
2629
+ this.postToChannel(
2630
+ `**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`
2631
+ );
2632
+ });
2633
+
2634
+ await workerReady;
2635
+
2636
+ const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
2637
+ const ownerStep: WorkflowStep = {
2638
+ ...step,
2639
+ name: `${step.name}-owner`,
2640
+ agent: supervised.owner.name,
2641
+ task: supervisorTask,
2642
+ };
2643
+
2644
+ this.log(`[${step.name}] Spawning owner "${supervised.owner.name}" (cli: ${supervised.owner.cli})`);
2645
+ const ownerStartTime = Date.now();
2646
+
2647
+ try {
2648
+ const ownerOutput = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2649
+ agentNameSuffix: 'owner',
2650
+ onSpawned: ({ actualName }) => {
2651
+ this.supervisedRuntimeAgents.set(actualName, {
2652
+ stepName: step.name,
2653
+ role: 'owner',
2654
+ logicalName: supervised.owner.name,
2655
+ });
2656
+ },
2657
+ onChunk: ({ chunk }) => {
2658
+ void this.recordOwnerMonitoringChunk(step, supervised.owner, chunk);
2659
+ },
2660
+ });
2661
+ const ownerElapsed = Date.now() - ownerStartTime;
2662
+ this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2663
+ this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2664
+
2665
+ const specialistOutput = await workerPromise;
2666
+ return { specialistOutput, ownerOutput, ownerElapsed };
2667
+ } catch (error) {
2668
+ const message = error instanceof Error ? error.message : String(error);
2669
+ if (!workerReleased && workerHandle) {
2670
+ await workerHandle.release().catch(() => undefined);
2671
+ }
2672
+ await workerSettled;
2673
+ if (/\btimed out\b/i.test(message)) {
2674
+ throw new Error(`Step "${step.name}" owner timed out after ${timeoutMs ?? 'unknown'}ms`);
2675
+ }
2676
+ throw error;
2677
+ }
2678
+ }
2679
+
2680
+ private forwardAgentChunkToChannel(
2681
+ stepName: string,
2682
+ roleLabel: string,
2683
+ agentName: string,
2684
+ chunk: string
2685
+ ): void {
2686
+ const lines = WorkflowRunner.stripAnsi(chunk)
2687
+ .split('\n')
2688
+ .map((line) => line.trim())
2689
+ .filter(Boolean)
2690
+ .slice(0, 3);
2691
+ for (const line of lines) {
2692
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
2693
+ }
2694
+ }
2695
+
2696
+ private async recordOwnerMonitoringChunk(
2697
+ step: WorkflowStep,
2698
+ ownerDef: AgentDefinition,
2699
+ chunk: string
2700
+ ): Promise<void> {
2701
+ const stripped = WorkflowRunner.stripAnsi(chunk);
2702
+ const details: string[] = [];
2703
+ if (/git diff --stat/i.test(stripped)) details.push('Checked git diff stats');
2704
+ if (/\bls -la\b/i.test(stripped)) details.push('Listed files for verification');
2705
+ if (/status update\?/i.test(stripped)) details.push('Asked the worker for a status update');
2706
+ if (/STEP_COMPLETE:/i.test(stripped)) details.push('Declared the step complete');
2707
+
2708
+ for (const detail of details) {
2709
+ await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2710
+ output: stripped.slice(0, 240),
2711
+ });
2712
+ }
2713
+ }
2714
+
2715
+ private resolveAutoStepOwner(
2716
+ specialistDef: AgentDefinition,
2717
+ agentMap: Map<string, AgentDefinition>
2718
+ ): AgentDefinition {
2719
+ if (specialistDef.interactive === false) return specialistDef;
2720
+
2721
+ const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
2722
+ const candidates = allDefs.filter((d) => d.interactive !== false);
2723
+ const matchesHubRole = (text: string): boolean =>
2724
+ [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`, 'i').test(text));
2725
+ const ownerish = (def: AgentDefinition): boolean => {
2726
+ const nameLC = def.name.toLowerCase();
2727
+ const roleLC = def.role?.toLowerCase() ?? '';
2728
+ return matchesHubRole(nameLC) || matchesHubRole(roleLC);
2729
+ };
2730
+ const ownerPriority = (def: AgentDefinition): number => {
2731
+ const roleLC = def.role?.toLowerCase() ?? '';
2732
+ const nameLC = def.name.toLowerCase();
2733
+ if (/\blead\b/.test(roleLC) || /\blead\b/.test(nameLC)) return 6;
2734
+ if (/\bcoordinator\b/.test(roleLC) || /\bcoordinator\b/.test(nameLC)) return 5;
2735
+ if (/\bsupervisor\b/.test(roleLC) || /\bsupervisor\b/.test(nameLC)) return 4;
2736
+ if (/\borchestrator\b/.test(roleLC) || /\borchestrator\b/.test(nameLC)) return 3;
2737
+ if (/\bhub\b/.test(roleLC) || /\bhub\b/.test(nameLC)) return 2;
2738
+ return ownerish(def) ? 1 : 0;
2739
+ };
2740
+ const dedicatedOwner = candidates
2741
+ .filter((d) => d.name !== specialistDef.name && ownerish(d))
2742
+ .sort((a, b) => ownerPriority(b) - ownerPriority(a) || a.name.localeCompare(b.name))[0];
2743
+ if (dedicatedOwner) return dedicatedOwner;
2744
+ return specialistDef;
2745
+ }
2746
+
2747
+ private resolveAutoReviewAgent(
2748
+ ownerDef: AgentDefinition,
2749
+ agentMap: Map<string, AgentDefinition>
2750
+ ): AgentDefinition {
2751
+ const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
2752
+ const isReviewer = (def: AgentDefinition): boolean => {
2753
+ const roleLC = def.role?.toLowerCase() ?? '';
2754
+ const nameLC = def.name.toLowerCase();
2755
+ return (
2756
+ def.preset === 'reviewer' ||
2757
+ roleLC.includes('review') ||
2758
+ roleLC.includes('critic') ||
2759
+ roleLC.includes('verifier') ||
2760
+ roleLC.includes('qa') ||
2761
+ nameLC.includes('review')
2762
+ );
2763
+ };
2764
+ const reviewerPriority = (def: AgentDefinition): number => {
2765
+ if (def.preset === 'reviewer') return 5;
2766
+ const roleLC = def.role?.toLowerCase() ?? '';
2767
+ const nameLC = def.name.toLowerCase();
2768
+ if (roleLC.includes('review') || nameLC.includes('review')) return 4;
2769
+ if (roleLC.includes('verifier') || roleLC.includes('qa')) return 3;
2770
+ if (roleLC.includes('critic')) return 2;
2771
+ return isReviewer(def) ? 1 : 0;
2772
+ };
2773
+ const dedicated = allDefs
2774
+ .filter((d) => d.name !== ownerDef.name && isReviewer(d))
2775
+ .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
2776
+ if (dedicated) return dedicated;
2777
+
2778
+ const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
2779
+ if (alternate) return alternate;
2780
+
2781
+ // Self-review fallback — log a warning since owner reviewing itself is weak.
2782
+ return ownerDef;
2783
+ }
2784
+
2785
+ private assertOwnerCompletionMarker(step: WorkflowStep, output: string, injectedTaskText: string): void {
2786
+ const marker = `STEP_COMPLETE:${step.name}`;
2787
+ const taskHasMarker = injectedTaskText.includes(marker);
2788
+ const first = output.indexOf(marker);
2789
+ if (first === -1) {
2790
+ throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
2791
+ }
2792
+ // PTY output includes injected task text, so require a second marker occurrence
2793
+ // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
2794
+ const outputLikelyContainsInjectedPrompt =
2795
+ output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
2796
+ if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2797
+ const hasSecond = output.includes(marker, first + marker.length);
2798
+ if (!hasSecond) {
2799
+ throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2800
+ }
2801
+ }
2802
+ }
2803
+
2804
+ private async runStepReviewGate(
2805
+ step: WorkflowStep,
2806
+ resolvedTask: string,
2807
+ specialistOutput: string,
2808
+ ownerOutput: string,
2809
+ ownerDef: AgentDefinition,
2810
+ reviewerDef: AgentDefinition,
2811
+ timeoutMs?: number
2812
+ ): Promise<string> {
2813
+ const reviewSnippetMax = 12_000;
2814
+ let specialistSnippet = specialistOutput;
2815
+ if (specialistOutput.length > reviewSnippetMax) {
2816
+ const head = Math.floor(reviewSnippetMax / 2);
2817
+ const tail = reviewSnippetMax - head;
2818
+ const omitted = specialistOutput.length - head - tail;
2819
+ specialistSnippet =
2820
+ `${specialistOutput.slice(0, head)}\n` +
2821
+ `...[truncated ${omitted} chars for review]...\n` +
2822
+ `${specialistOutput.slice(specialistOutput.length - tail)}`;
2823
+ }
2824
+
2825
+ let ownerSnippet = ownerOutput;
2826
+ if (ownerOutput.length > reviewSnippetMax) {
2827
+ const head = Math.floor(reviewSnippetMax / 2);
2828
+ const tail = reviewSnippetMax - head;
2829
+ const omitted = ownerOutput.length - head - tail;
2830
+ ownerSnippet =
2831
+ `${ownerOutput.slice(0, head)}\n` +
2832
+ `...[truncated ${omitted} chars for review]...\n` +
2833
+ `${ownerOutput.slice(ownerOutput.length - tail)}`;
2834
+ }
2835
+
2836
+ const reviewTask =
2837
+ `Review workflow step "${step.name}" for completion and safe handoff.\n` +
2838
+ `Step owner: ${ownerDef.name}\n` +
2839
+ `Original objective:\n${resolvedTask}\n\n` +
2840
+ `Specialist output:\n${specialistSnippet}\n\n` +
2841
+ `Owner verification notes:\n${ownerSnippet}\n\n` +
2842
+ `Return exactly:\n` +
2843
+ `REVIEW_DECISION: APPROVE or REJECT\n` +
2844
+ `REVIEW_REASON: <one sentence>\n` +
2845
+ `Then output /exit.`;
2846
+
2847
+ const safetyTimeoutMs = timeoutMs ?? 600_000;
2848
+ const reviewStep: WorkflowStep = {
2849
+ name: `${step.name}-review`,
2850
+ type: 'agent',
2851
+ agent: reviewerDef.name,
2852
+ task: reviewTask,
2853
+ };
2854
+
2855
+ await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
2856
+ this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
2857
+ const emitReviewCompleted = async (decision: 'approved' | 'rejected', reason?: string) => {
2858
+ await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
2859
+ this.emit({
2860
+ type: 'step:review-completed',
2861
+ runId: this.currentRunId ?? '',
2862
+ stepName: step.name,
2863
+ reviewerName: reviewerDef.name,
2864
+ decision,
2865
+ });
2866
+ };
2867
+
2868
+ if (this.executor) {
2869
+ const reviewOutput = await this.executor.executeAgentStep(
2870
+ reviewStep,
2871
+ reviewerDef,
2872
+ reviewTask,
2873
+ safetyTimeoutMs
2874
+ );
2875
+ const parsed = this.parseReviewDecision(reviewOutput);
2876
+ if (!parsed) {
2877
+ throw new Error(
2878
+ `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)`
2879
+ );
2880
+ }
2881
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2882
+ if (parsed.decision === 'rejected') {
2883
+ throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`);
2884
+ }
2885
+ this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``);
2886
+ return reviewOutput;
2887
+ }
2888
+
2889
+ let reviewerHandle: Agent | undefined;
2890
+ let reviewerReleased = false;
2891
+ let reviewOutput = '';
2892
+ let completedReview:
2893
+ | { decision: 'approved' | 'rejected'; reason?: string }
2894
+ | undefined;
2895
+ let reviewCompletionPromise: Promise<void> | undefined;
2896
+ const reviewCompletionStarted = { value: false };
2897
+
2898
+ const startReviewCompletion = (parsed: { decision: 'approved' | 'rejected'; reason?: string }) => {
2899
+ if (reviewCompletionStarted.value) return;
2900
+ reviewCompletionStarted.value = true;
2901
+ completedReview = parsed;
2902
+ reviewCompletionPromise = (async () => {
2903
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2904
+ if (reviewerHandle && !reviewerReleased) {
2905
+ reviewerReleased = true;
2906
+ await reviewerHandle.release().catch(() => undefined);
2907
+ }
2908
+ })();
2909
+ };
2910
+
2911
+ try {
2912
+ reviewOutput = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
2913
+ onSpawned: ({ agent }) => {
2914
+ reviewerHandle = agent;
2915
+ },
2916
+ onChunk: ({ chunk }) => {
2917
+ const nextOutput = reviewOutput + WorkflowRunner.stripAnsi(chunk);
2918
+ reviewOutput = nextOutput;
2919
+ const parsed = this.parseReviewDecision(nextOutput);
2920
+ if (parsed) {
2921
+ startReviewCompletion(parsed);
2922
+ }
2923
+ },
2924
+ });
2925
+ await reviewCompletionPromise;
2926
+ } catch (error) {
2927
+ const message = error instanceof Error ? error.message : String(error);
2928
+ if (/\btimed out\b/i.test(message)) {
2929
+ this.log(`[${step.name}] Review safety backstop timeout fired after ${safetyTimeoutMs}ms`);
2930
+ throw new Error(
2931
+ `Step "${step.name}" review safety backstop timed out after ${safetyTimeoutMs}ms`
2932
+ );
2933
+ }
2934
+ throw error;
2935
+ }
2936
+
2937
+ if (!completedReview) {
2938
+ const parsed = this.parseReviewDecision(reviewOutput);
2939
+ if (!parsed) {
2940
+ throw new Error(
2941
+ `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)`
2942
+ );
2943
+ }
2944
+ completedReview = parsed;
2945
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2946
+ }
2947
+
2948
+ if (completedReview.decision === 'rejected') {
2949
+ throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`);
2950
+ }
2951
+
2952
+ this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``);
2953
+ return reviewOutput;
2954
+ }
2955
+
2956
+ private parseReviewDecision(
2957
+ reviewOutput: string
2958
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
2959
+ const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
2960
+ const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
2961
+ if (decisionMatches.length === 0) {
2962
+ return null;
2963
+ }
2964
+
2965
+ const outputLikelyContainsEchoedPrompt =
2966
+ reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
2967
+ const decisionMatch =
2968
+ outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
2969
+ ? decisionMatches[decisionMatches.length - 1]
2970
+ : decisionMatches[0];
2971
+ const decision = decisionMatch?.[1]?.toUpperCase();
2972
+ if (decision !== 'APPROVE' && decision !== 'REJECT') {
2973
+ return null;
2974
+ }
2975
+
2976
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
2977
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
2978
+ const reasonMatch =
2979
+ outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
2980
+ ? reasonMatches[reasonMatches.length - 1]
2981
+ : reasonMatches[0];
2982
+ const reason = reasonMatch?.[1]?.trim();
2983
+
2984
+ return {
2985
+ decision: decision === 'APPROVE' ? 'approved' : 'rejected',
2986
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
2987
+ };
2988
+ }
2989
+
2990
+ private combineStepAndReviewOutput(stepOutput: string, reviewOutput: string): string {
2991
+ const primary = stepOutput.trimEnd();
2992
+ const review = reviewOutput.trim();
2993
+ if (!review) return primary;
2994
+ if (!primary) return `REVIEW_OUTPUT\n${review}\n`;
2995
+ return `${primary}\n\n---\nREVIEW_OUTPUT\n${review}\n`;
2996
+ }
2997
+
2325
2998
  /**
2326
2999
  * Build the CLI command and arguments for a non-interactive agent execution.
2327
3000
  * Each CLI has a specific flag for one-shot prompt mode.
@@ -2596,7 +3269,8 @@ export class WorkflowRunner {
2596
3269
  private async spawnAndWait(
2597
3270
  agentDef: AgentDefinition,
2598
3271
  step: WorkflowStep,
2599
- timeoutMs?: number
3272
+ timeoutMs?: number,
3273
+ options: SpawnAndWaitOptions = {}
2600
3274
  ): Promise<string> {
2601
3275
  // Branch: non-interactive agents run as simple subprocesses
2602
3276
  if (agentDef.interactive === false) {
@@ -2607,15 +3281,17 @@ export class WorkflowRunner {
2607
3281
  throw new Error('AgentRelay not initialized');
2608
3282
  }
2609
3283
 
2610
- // Deterministic name: step name + first 8 chars of run ID.
2611
- let agentName = `${step.name}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
3284
+ // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
3285
+ const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
3286
+ let agentName = requestedName;
2612
3287
 
2613
3288
  // Only inject delegation guidance for lead/coordinator agents, not spokes/workers.
2614
3289
  // In non-hub patterns (pipeline, dag, etc.) every agent is autonomous so they all get it.
2615
3290
  const role = agentDef.role?.toLowerCase() ?? '';
2616
3291
  const nameLC = agentDef.name.toLowerCase();
2617
3292
  const isHub =
2618
- WorkflowRunner.HUB_ROLES.has(nameLC) || [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r));
3293
+ WorkflowRunner.HUB_ROLES.has(nameLC) ||
3294
+ [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`).test(role));
2619
3295
  const pattern = this.currentConfig?.swarm.pattern;
2620
3296
  const isHubPattern = pattern && WorkflowRunner.HUB_PATTERNS.has(pattern);
2621
3297
  const delegationGuidance =
@@ -2651,6 +3327,7 @@ export class WorkflowRunner {
2651
3327
  // Write raw output (with ANSI codes) to log file so dashboard's
2652
3328
  // XTermLogViewer can render colors/formatting natively via xterm.js
2653
3329
  logStream.write(chunk);
3330
+ options.onChunk?.({ agentName, chunk });
2654
3331
  });
2655
3332
 
2656
3333
  const agentChannels = this.channel ? [this.channel] : agentDef.channels;
@@ -2705,12 +3382,15 @@ export class WorkflowRunner {
2705
3382
  const stripped = WorkflowRunner.stripAnsi(chunk);
2706
3383
  this.ptyOutputBuffers.get(agent.name)?.push(stripped);
2707
3384
  newLogStream.write(chunk);
3385
+ options.onChunk?.({ agentName: agent.name, chunk });
2708
3386
  });
2709
3387
  }
2710
3388
 
2711
3389
  agentName = agent.name;
2712
3390
  }
2713
3391
 
3392
+ await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
3393
+
2714
3394
  // Register in workers.json so `agents:kill` can find this agent
2715
3395
  let workerPid: number | undefined;
2716
3396
  try {
@@ -2791,6 +3471,7 @@ export class WorkflowRunner {
2791
3471
  this.ptyLogStreams.delete(agentName);
2792
3472
  }
2793
3473
  this.unregisterWorker(agentName);
3474
+ this.supervisedRuntimeAgents.delete(agentName);
2794
3475
  }
2795
3476
 
2796
3477
  let output: string;
@@ -2975,7 +3656,7 @@ export class WorkflowRunner {
2975
3656
 
2976
3657
  if (
2977
3658
  WorkflowRunner.HUB_ROLES.has(nameLC) ||
2978
- [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r))
3659
+ [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`).test(role))
2979
3660
  ) {
2980
3661
  // Found a hub candidate — check if we have a live handle
2981
3662
  const handle = this.activeAgentHandles.get(agentDef.name);