agent-relay 3.1.16 → 3.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/bin/agent-relay-broker-linux-arm64 +0 -0
  2. package/dist/index.cjs +565 -32
  3. package/package.json +8 -8
  4. package/packages/acp-bridge/package.json +2 -2
  5. package/packages/config/package.json +1 -1
  6. package/packages/hooks/package.json +4 -4
  7. package/packages/memory/package.json +2 -2
  8. package/packages/openclaw/package.json +2 -2
  9. package/packages/policy/package.json +2 -2
  10. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts +16 -0
  11. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts.map +1 -0
  12. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +640 -0
  13. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -0
  14. package/packages/sdk/dist/workflows/cli.js +10 -0
  15. package/packages/sdk/dist/workflows/cli.js.map +1 -1
  16. package/packages/sdk/dist/workflows/runner.d.ts +31 -0
  17. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  18. package/packages/sdk/dist/workflows/runner.js +534 -31
  19. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  20. package/packages/sdk/dist/workflows/trajectory.d.ts +22 -1
  21. package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
  22. package/packages/sdk/dist/workflows/trajectory.js +55 -8
  23. package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
  24. package/packages/sdk/dist/workflows/validator.d.ts.map +1 -1
  25. package/packages/sdk/dist/workflows/validator.js +29 -0
  26. package/packages/sdk/dist/workflows/validator.js.map +1 -1
  27. package/packages/sdk/package.json +2 -2
  28. package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +778 -0
  29. package/packages/sdk/src/__tests__/workflow-runner.test.ts +484 -9
  30. package/packages/sdk/src/workflows/README.md +11 -0
  31. package/packages/sdk/src/workflows/cli.ts +10 -0
  32. package/packages/sdk/src/workflows/runner.ts +706 -33
  33. package/packages/sdk/src/workflows/trajectory.ts +89 -8
  34. package/packages/sdk/src/workflows/validator.ts +29 -0
  35. package/packages/sdk-py/pyproject.toml +1 -1
  36. package/packages/telemetry/package.json +1 -1
  37. package/packages/trajectory/package.json +2 -2
  38. package/packages/user-directory/package.json +2 -2
  39. package/packages/utils/package.json +2 -2
@@ -6,6 +6,9 @@
6
6
  */
7
7
 
8
8
  import { describe, it, expect, vi, beforeEach } from 'vitest';
9
+ import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from 'node:fs';
10
+ import os from 'node:os';
11
+ import path from 'node:path';
9
12
  import type { WorkflowDb } from '../workflows/runner.js';
10
13
  import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js';
11
14
 
@@ -56,6 +59,7 @@ vi.mock('@relaycast/sdk', () => ({
56
59
 
57
60
  let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>;
58
61
  let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>;
62
+ let mockSpawnOutputs: string[] = [];
59
63
 
60
64
  const mockAgent = {
61
65
  name: 'test-agent-abc',
@@ -73,15 +77,48 @@ const mockHuman = {
73
77
  sendMessage: vi.fn().mockResolvedValue(undefined),
74
78
  };
75
79
 
80
+ const defaultSpawnPtyImplementation = async ({
81
+ name,
82
+ task,
83
+ }: {
84
+ name: string;
85
+ task?: string;
86
+ }) => {
87
+ const queued = mockSpawnOutputs.shift();
88
+ const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
89
+ const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
90
+ const output =
91
+ queued ??
92
+ (isReview
93
+ ? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'
94
+ : stepComplete
95
+ ? `STEP_COMPLETE:${stepComplete}\n`
96
+ : 'STEP_COMPLETE:unknown\n');
97
+
98
+ queueMicrotask(() => {
99
+ if (typeof mockRelayInstance.onWorkerOutput === 'function') {
100
+ mockRelayInstance.onWorkerOutput({ name, chunk: output });
101
+ }
102
+ });
103
+
104
+ return { ...mockAgent, name };
105
+ };
106
+
107
+ const mockRelayInstance = {
108
+ spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation),
109
+ human: vi.fn().mockReturnValue(mockHuman),
110
+ shutdown: vi.fn().mockResolvedValue(undefined),
111
+ onBrokerStderr: vi.fn().mockReturnValue(() => {}),
112
+ onWorkerOutput: null as ((frame: { name: string; chunk: string }) => void) | null,
113
+ onMessageReceived: null as any,
114
+ onAgentSpawned: null as any,
115
+ onAgentExited: null as any,
116
+ onAgentIdle: null as any,
117
+ listAgentsRaw: vi.fn().mockResolvedValue([]),
118
+ };
119
+
76
120
  vi.mock('../relay.js', () => ({
77
- AgentRelay: vi.fn().mockImplementation(() => ({
78
- spawnPty: vi.fn().mockResolvedValue(mockAgent),
79
- human: vi.fn().mockReturnValue(mockHuman),
80
- shutdown: vi.fn().mockResolvedValue(undefined),
81
- onBrokerStderr: vi.fn().mockReturnValue(() => {}),
82
- onWorkerOutput: null,
83
- listAgentsRaw: vi.fn().mockResolvedValue([]),
84
- })),
121
+ AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance),
85
122
  }));
86
123
 
87
124
  // Import after mocking
@@ -145,6 +182,41 @@ function never<T>(): Promise<T> {
145
182
  return new Promise(() => {});
146
183
  }
147
184
 
185
+ type WorkflowStepOverride = Partial<NonNullable<RelayYamlConfig['workflows']>[number]['steps'][number]>;
186
+
187
+ function makeSupervisedConfig(stepOverrides: WorkflowStepOverride = {}): RelayYamlConfig {
188
+ return makeConfig({
189
+ agents: [
190
+ { name: 'specialist', cli: 'claude', role: 'engineer' },
191
+ { name: 'team-lead', cli: 'claude', role: 'lead coordinator' },
192
+ { name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
193
+ ],
194
+ workflows: [
195
+ {
196
+ name: 'default',
197
+ steps: [
198
+ {
199
+ name: 'step-1',
200
+ agent: 'specialist',
201
+ task: 'Implement the requested change',
202
+ ...stepOverrides,
203
+ },
204
+ ],
205
+ },
206
+ ],
207
+ });
208
+ }
209
+
210
+ function readCompletedTrajectoryFile(dir: string): any {
211
+ const completedDir = path.join(dir, '.trajectories', 'completed');
212
+ if (!existsSync(completedDir)) return null;
213
+
214
+ const jsonFile = readdirSync(completedDir).find((file) => file.endsWith('.json'));
215
+ if (!jsonFile) return null;
216
+
217
+ return JSON.parse(readFileSync(path.join(completedDir, jsonFile), 'utf-8'));
218
+ }
219
+
148
220
  // ── Tests ────────────────────────────────────────────────────────────────────
149
221
 
150
222
  describe('WorkflowRunner', () => {
@@ -155,6 +227,10 @@ describe('WorkflowRunner', () => {
155
227
  vi.clearAllMocks();
156
228
  waitForExitFn = vi.fn().mockResolvedValue('exited');
157
229
  waitForIdleFn = vi.fn().mockImplementation(() => never());
230
+ mockSpawnOutputs = [];
231
+ mockAgent.release.mockResolvedValue(undefined);
232
+ mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation);
233
+ mockRelayInstance.onWorkerOutput = null;
158
234
  db = makeDb();
159
235
  runner = new WorkflowRunner({ db, workspaceId: 'ws-test' });
160
236
  });
@@ -304,7 +380,7 @@ agents:
304
380
 
305
381
  expect(db.insertRun).toHaveBeenCalledTimes(1);
306
382
  expect(db.insertStep).toHaveBeenCalledTimes(2);
307
- expect(run.status).toBe('completed');
383
+ expect(run.status, run.error).toBe('completed');
308
384
  });
309
385
 
310
386
  it('should throw when workflow not found', async () => {
@@ -344,11 +420,410 @@ agents:
344
420
  expect(startedSteps).toHaveLength(2);
345
421
  });
346
422
 
423
+ it('should emit owner assignment and review completion events for interactive steps', async () => {
424
+ const events: Array<{ type: string; stepName?: string }> = [];
425
+ runner.on((event) =>
426
+ events.push({ type: event.type, stepName: 'stepName' in event ? event.stepName : undefined })
427
+ );
428
+
429
+ await runner.execute(makeConfig(), 'default');
430
+
431
+ const ownerAssigned = events.filter((e) => e.type === 'step:owner-assigned');
432
+ const reviewCompleted = events.filter((e) => e.type === 'step:review-completed');
433
+ expect(ownerAssigned).toHaveLength(2);
434
+ expect(reviewCompleted).toHaveLength(2);
435
+ });
436
+
437
+ it('should prioritize lead owner when multiple hub-role candidates exist', async () => {
438
+ const ownerAssignments: string[] = [];
439
+ runner.on((event) => {
440
+ if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName);
441
+ });
442
+
443
+ const config = makeConfig({
444
+ agents: [
445
+ { name: 'specialist', cli: 'claude', role: 'engineer' },
446
+ { name: 'coord-1', cli: 'claude', role: 'coordinator' },
447
+ { name: 'lead-1', cli: 'claude', role: 'lead' },
448
+ { name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
449
+ ],
450
+ workflows: [
451
+ {
452
+ name: 'default',
453
+ steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
454
+ },
455
+ ],
456
+ });
457
+
458
+ const run = await runner.execute(config, 'default');
459
+ expect(run.status).toBe('completed');
460
+ expect(ownerAssignments).toEqual(['lead-1']);
461
+ }, 15000);
462
+
463
+ it('should not treat github role text as hub owner signal', async () => {
464
+ const ownerAssignments: string[] = [];
465
+ runner.on((event) => {
466
+ if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName);
467
+ });
468
+
469
+ const config = makeConfig({
470
+ agents: [
471
+ { name: 'specialist', cli: 'claude', role: 'engineer' },
472
+ { name: 'github-agent', cli: 'claude', role: 'github actions agent' },
473
+ { name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
474
+ ],
475
+ workflows: [
476
+ {
477
+ name: 'default',
478
+ steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
479
+ },
480
+ ],
481
+ });
482
+
483
+ const run = await runner.execute(config, 'default');
484
+ expect(run.status).toBe('completed');
485
+ expect(ownerAssignments).toEqual(['specialist']);
486
+ });
487
+
488
+ it('should not elect github-role agent as owner (hub word-boundary)', async () => {
489
+ const ownerAssignments: Array<{ owner: string; specialist: string }> = [];
490
+ runner.on((event) => {
491
+ if (event.type === 'step:owner-assigned') {
492
+ ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName });
493
+ }
494
+ });
495
+
496
+ const config = makeConfig({
497
+ agents: [
498
+ { name: 'specialist', cli: 'claude', role: 'engineer' },
499
+ { name: 'github-bot', cli: 'claude', role: 'github integration' },
500
+ { name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
501
+ ],
502
+ workflows: [
503
+ {
504
+ name: 'default',
505
+ steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
506
+ },
507
+ ],
508
+ });
509
+
510
+ const run = await runner.execute(config, 'default');
511
+ expect(run.status).toBe('completed');
512
+ // github-bot should NOT be elected as owner (role contains "hub" substring but not word)
513
+ expect(ownerAssignments[0].owner).not.toBe('github-bot');
514
+ // specialist should be its own owner since no hub-role agent exists
515
+ expect(ownerAssignments[0].owner).toBe('specialist');
516
+ }, 15000);
517
+
518
+ it('should parse REJECT from PTY-echoed review output', async () => {
519
+ const events: Array<{ type: string; decision?: string }> = [];
520
+ runner.on((event) => {
521
+ if (event.type === 'step:review-completed') {
522
+ events.push({ type: event.type, decision: event.decision });
523
+ }
524
+ });
525
+
526
+ // Simulate PTY output that echoes the review prompt before the actual response
527
+ const echoedPrompt =
528
+ 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: <one sentence>\n';
529
+ const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has bugs\n';
530
+ mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse];
531
+
532
+ const run = await runner.execute(makeConfig(), 'default');
533
+ expect(run.status).toBe('failed');
534
+ expect(run.error).toContain('review rejected');
535
+ // Should parse REJECT from actual response, not APPROVE from echoed instruction
536
+ expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
537
+ }, 15000);
538
+
347
539
  it('should resolve variables during execution', async () => {
348
540
  const config = makeConfig();
349
541
  config.workflows![0].steps[0].task = 'Build {{feature}}';
350
542
  const run = await runner.execute(config, 'default', { feature: 'auth' });
543
+ expect(run.status, run.error).toBe('completed');
544
+ });
545
+
546
+ it('should fail when owner response does not include completion marker', async () => {
547
+ mockSpawnOutputs = ['Owner completed work but forgot sentinel\n'];
548
+ const run = await runner.execute(makeConfig(), 'default');
549
+ expect(run.status).toBe('failed');
550
+ expect(run.error).toContain('owner completion marker');
551
+ });
552
+
553
+ it('should run specialist work in a separate process and mirror worker output to the channel', async () => {
554
+ mockSpawnOutputs = [
555
+ 'worker progress update\nworker finished\n',
556
+ 'Observed worker progress on the channel\nSTEP_COMPLETE:step-1\n',
557
+ 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n',
558
+ ];
559
+
560
+ const run = await runner.execute(makeSupervisedConfig(), 'default');
561
+
562
+ expect(run.status).toBe('completed');
563
+ const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls;
564
+ expect(spawnCalls[0][0].name).toContain('step-1-worker');
565
+ expect(spawnCalls[1][0].name).toContain('step-1-owner');
566
+ expect(spawnCalls[0][0].task).not.toContain('STEP_COMPLETE:step-1');
567
+ expect(spawnCalls[1][0].task).toContain('You are the step owner/supervisor for step "step-1".');
568
+ expect(spawnCalls[1][0].task).toContain('runtime: step-1-worker');
569
+
570
+ const channelMessages = (mockRelaycastAgent.send as any).mock.calls.map(
571
+ ([, text]: [string, string]) => text
572
+ );
573
+ expect(channelMessages.some((text: string) => text.includes('Worker `step-1-worker'))).toBe(true);
574
+ expect(channelMessages.some((text: string) => text.includes('worker finished'))).toBe(true);
575
+ });
576
+
577
+ it('should let the owner complete after checking file-based artifacts', async () => {
578
+ const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-owner-file-'));
579
+ const artifact = path.join(tmpDir, 'artifact.txt');
580
+ writeFileSync(artifact, 'done\n', 'utf-8');
581
+ runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir });
582
+
583
+ try {
584
+ mockSpawnOutputs = [
585
+ 'worker wrote artifact\n',
586
+ 'Bash(git diff --stat)\nSTEP_COMPLETE:step-1\n',
587
+ 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: artifact verified\n',
588
+ ];
589
+
590
+ const run = await runner.execute(
591
+ makeSupervisedConfig({ verification: { type: 'file_exists', value: 'artifact.txt' } }),
592
+ 'default'
593
+ );
594
+
595
+ expect(run.status).toBe('completed');
596
+ const ownerTask = (mockRelayInstance.spawnPty as any).mock.calls[1][0].task as string;
597
+ expect(ownerTask).toContain('Verification gate: confirm the file exists at "artifact.txt"');
598
+ } finally {
599
+ rmSync(tmpDir, { recursive: true, force: true });
600
+ }
601
+ });
602
+
603
+ it('should keep specialist output for chaining even when the owner signals later', async () => {
604
+ mockSpawnOutputs = [
605
+ 'specialist deliverable\n',
606
+ 'Worker already exited; artifacts look correct\nSTEP_COMPLETE:step-1\n',
607
+ 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: handoff is safe\n',
608
+ ];
609
+
610
+ const run = await runner.execute(makeSupervisedConfig(), 'default');
611
+ expect(run.status).toBe('completed');
612
+
613
+ const stepRows = await db.getStepsByRunId(run.id);
614
+ expect(stepRows[0].output).toContain('specialist deliverable');
615
+ expect(stepRows[0].output).not.toContain('Worker already exited; artifacts look correct');
616
+ });
617
+
618
+ it('should fail closed when review response is malformed', async () => {
619
+ mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_REASON: looks fine\n'];
620
+ const run = await runner.execute(makeConfig(), 'default');
621
+ expect(run.status).toBe('failed');
622
+ expect(run.error).toContain('review response malformed');
623
+ });
624
+
625
+ it('should fail when review explicitly rejects step output', async () => {
626
+ const events: Array<{ type: string; decision?: string }> = [];
627
+ runner.on((event) => {
628
+ if (event.type === 'step:review-completed') {
629
+ events.push({
630
+ type: event.type,
631
+ decision: event.decision,
632
+ });
633
+ }
634
+ });
635
+
636
+ mockSpawnOutputs = [
637
+ 'STEP_COMPLETE:step-1\n',
638
+ 'REVIEW_DECISION: REJECT\nREVIEW_REASON: missing checks\n',
639
+ ];
640
+ const run = await runner.execute(makeConfig(), 'default');
641
+ expect(run.status).toBe('failed');
642
+ expect(run.error).toContain('review rejected');
643
+ expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
644
+ });
645
+
646
+ it('should parse final review decision when PTY output echoes review instructions', async () => {
647
+ const events: Array<{ type: string; decision?: string }> = [];
648
+ runner.on((event) => {
649
+ if (event.type === 'step:review-completed') {
650
+ events.push({
651
+ type: event.type,
652
+ decision: event.decision,
653
+ });
654
+ }
655
+ });
656
+
657
+ mockSpawnOutputs = [
658
+ 'STEP_COMPLETE:step-1\n',
659
+ 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: <one sentence>\nREVIEW_DECISION: REJECT\nREVIEW_REASON: insufficient evidence\n',
660
+ ];
661
+ const run = await runner.execute(makeConfig(), 'default');
662
+ expect(run.status).toBe('failed');
663
+ expect(run.error).toContain('review rejected');
664
+ expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
665
+ });
666
+
667
+ it('should record review completion in trajectory with decision and reason', async () => {
668
+ const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-review-traj-'));
669
+ runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir });
670
+
671
+ try {
672
+ mockSpawnOutputs = [
673
+ 'STEP_COMPLETE:step-1\n',
674
+ 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: durable review record\n',
675
+ ];
676
+
677
+ const run = await runner.execute(makeConfig({ trajectories: {} }), 'default');
678
+ expect(run.status).toBe('completed');
679
+
680
+ const trajectory = readCompletedTrajectoryFile(tmpDir);
681
+ const events = trajectory.chapters.flatMap((chapter: any) => chapter.events);
682
+ const reviewEvent = events.find((event: any) => event.type === 'review-completed');
683
+
684
+ expect(reviewEvent).toBeTruthy();
685
+ expect(reviewEvent.raw).toMatchObject({
686
+ stepName: 'step-1',
687
+ reviewer: 'agent-b',
688
+ decision: 'approved',
689
+ reason: 'durable review record',
690
+ });
691
+ } finally {
692
+ rmSync(tmpDir, { recursive: true, force: true });
693
+ }
694
+ });
695
+
696
+ it('should not double release the worker when the owner fails after worker completion', async () => {
697
+ const workerRelease = vi.fn().mockResolvedValue(undefined);
698
+ const ownerRelease = vi.fn().mockResolvedValue(undefined);
699
+
700
+ mockRelayInstance.spawnPty.mockImplementation(async ({
701
+ name,
702
+ task,
703
+ }: {
704
+ name: string;
705
+ task?: string;
706
+ }) => {
707
+ const isOwner = name.includes('-owner-');
708
+ const output = isOwner ? 'owner checking\n' : 'worker finished\n';
709
+
710
+ queueMicrotask(() => {
711
+ if (typeof mockRelayInstance.onWorkerOutput === 'function') {
712
+ mockRelayInstance.onWorkerOutput({ name, chunk: output });
713
+ }
714
+ });
715
+
716
+ if (isOwner) {
717
+ return {
718
+ name,
719
+ waitForExit: vi.fn().mockImplementation(async () => {
720
+ await Promise.resolve();
721
+ return 'timeout';
722
+ }),
723
+ waitForIdle: vi.fn().mockResolvedValue('timeout'),
724
+ release: ownerRelease,
725
+ };
726
+ }
727
+
728
+ return {
729
+ name,
730
+ waitForExit: vi.fn().mockImplementation(async () => {
731
+ await workerRelease();
732
+ return 'released';
733
+ }),
734
+ waitForIdle: vi.fn().mockImplementation(() => never()),
735
+ release: workerRelease,
736
+ };
737
+ });
738
+
739
+ const run = await runner.execute(makeSupervisedConfig(), 'default');
740
+
741
+ expect(run.status).toBe('failed');
742
+ expect(run.error).toContain('owner timed out');
743
+ expect(workerRelease).toHaveBeenCalledTimes(1);
744
+ expect(ownerRelease).toHaveBeenCalledTimes(1);
745
+ });
746
+
747
+ it('should emit owner-timeout when owner times out', async () => {
748
+ const events: Array<{ type: string; stepName?: string }> = [];
749
+ runner.on((event) => {
750
+ if (event.type === 'step:owner-timeout') {
751
+ events.push({
752
+ type: event.type,
753
+ stepName: event.stepName,
754
+ });
755
+ }
756
+ });
757
+
758
+ waitForExitFn = vi.fn().mockResolvedValue('timeout');
759
+ waitForIdleFn = vi.fn().mockResolvedValue('timeout');
760
+
761
+ const run = await runner.execute(makeConfig(), 'default');
762
+ expect(run.status).toBe('failed');
763
+ expect(run.error).toContain('timed out');
764
+ expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' });
765
+ });
766
+
767
+ it('should emit owner-timeout for a dedicated supervisor when the worker is stuck', async () => {
768
+ const events: Array<{ type: string; stepName?: string }> = [];
769
+ runner.on((event) => {
770
+ if (event.type === 'step:owner-timeout') {
771
+ events.push({ type: event.type, stepName: event.stepName });
772
+ }
773
+ });
774
+
775
+ waitForExitFn = vi.fn().mockResolvedValue('timeout');
776
+ waitForIdleFn = vi.fn().mockResolvedValue('timeout');
777
+
778
+ const run = await runner.execute(makeSupervisedConfig(), 'default');
779
+ expect(run.status).toBe('failed');
780
+ expect(run.error).toContain('owner timed out');
781
+ expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' });
782
+ });
783
+
784
+ it('should preserve self-completion when no dedicated owner is available', async () => {
785
+ mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'];
786
+
787
+ const config = makeConfig({
788
+ agents: [
789
+ { name: 'specialist', cli: 'claude', role: 'engineer' },
790
+ { name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
791
+ ],
792
+ workflows: [
793
+ {
794
+ name: 'default',
795
+ steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
796
+ },
797
+ ],
798
+ });
799
+
800
+ const run = await runner.execute(config, 'default');
801
+
802
+ expect(run.status).toBe('completed');
803
+ const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls;
804
+ expect(spawnCalls[0][0].name).toContain('step-1-');
805
+ expect(spawnCalls[0][0].name).not.toContain('worker');
806
+ expect(spawnCalls[0][0].task).toContain('STEP OWNER CONTRACT');
807
+ expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1');
808
+ });
809
+
810
+ it('should use the full remaining timeout as the review safety backstop', async () => {
811
+ const config = makeConfig({
812
+ workflows: [
813
+ {
814
+ name: 'default',
815
+ steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }],
816
+ },
817
+ ],
818
+ });
819
+ const run = await runner.execute(config, 'default');
820
+
351
821
  expect(run.status).toBe('completed');
822
+ const waitCalls = (waitForExitFn as any).mock?.calls ?? [];
823
+ expect(waitCalls.length).toBeGreaterThanOrEqual(2);
824
+ // first call: owner timeout; second call: review timeout
825
+ expect(waitCalls[1][0]).toBeGreaterThan(60_000);
826
+ expect(waitCalls[1][0]).toBeLessThanOrEqual(90_000);
352
827
  });
353
828
  });
354
829
 
@@ -640,6 +640,17 @@ The runner emits two new events for idle nudging:
640
640
  | `step:nudged` | Fired when a nudge message is sent to an idle agent |
641
641
  | `step:force-released` | Fired when an agent is force-released after exhausting nudges |
642
642
 
643
+ ## Automatic Step Owner and Review
644
+
645
+ For interactive agent steps, the runner now hardens handoffs automatically:
646
+
647
+ 1. Elects a step owner (prefers lead/coordinator-style agents, falls back to the step agent)
648
+ 2. Requires the owner to provide an explicit completion signal (`STEP_COMPLETE:<step-name>`)
649
+ 3. Runs a review pass before marking the step complete (prefers reviewer-style agents when present)
650
+ 4. Stores primary output plus review output in the step artifact
651
+
652
+ Deterministic and worktree steps are unchanged and do not require owner/review delegation.
653
+
643
654
  ## Schema Validation
644
655
 
645
656
  A JSON Schema is available at `packages/sdk/src/workflows/schema.json` for editor autocompletion and validation of `relay.yaml` files.
@@ -51,8 +51,14 @@ function formatEvent(event: WorkflowEvent): string {
51
51
  return `[run] cancelled`;
52
52
  case 'step:started':
53
53
  return `[step] ${event.stepName} started`;
54
+ case 'step:owner-assigned':
55
+ return `[step] ${event.stepName} owner=${event.ownerName} specialist=${event.specialistName}`;
54
56
  case 'step:completed':
55
57
  return `[step] ${event.stepName} completed`;
58
+ case 'step:review-completed':
59
+ return `[step] ${event.stepName} review ${event.decision} by ${event.reviewerName}`;
60
+ case 'step:owner-timeout':
61
+ return `[step] ${event.stepName} owner ${event.ownerName} timed out`;
56
62
  case 'step:failed':
57
63
  return `[step] ${event.stepName} failed: ${event.error}`;
58
64
  case 'step:skipped':
@@ -63,6 +69,10 @@ function formatEvent(event: WorkflowEvent): string {
63
69
  return `[step] ${event.stepName} nudged (nudge #${event.nudgeCount})`;
64
70
  case 'step:force-released':
65
71
  return `[step] ${event.stepName} force-released`;
72
+ default: {
73
+ const _exhaustive: never = event;
74
+ return `[unknown event] ${(_exhaustive as WorkflowEvent).type}`;
75
+ }
66
76
  }
67
77
  }
68
78