agent-relay 3.1.16 → 3.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-relay-broker-linux-arm64 +0 -0
- package/dist/index.cjs +565 -32
- package/package.json +8 -8
- package/packages/acp-bridge/package.json +2 -2
- package/packages/config/package.json +1 -1
- package/packages/hooks/package.json +4 -4
- package/packages/memory/package.json +2 -2
- package/packages/openclaw/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts +16 -0
- package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts.map +1 -0
- package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +640 -0
- package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -0
- package/packages/sdk/dist/workflows/cli.js +10 -0
- package/packages/sdk/dist/workflows/cli.js.map +1 -1
- package/packages/sdk/dist/workflows/runner.d.ts +31 -0
- package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
- package/packages/sdk/dist/workflows/runner.js +534 -31
- package/packages/sdk/dist/workflows/runner.js.map +1 -1
- package/packages/sdk/dist/workflows/trajectory.d.ts +22 -1
- package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
- package/packages/sdk/dist/workflows/trajectory.js +55 -8
- package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
- package/packages/sdk/dist/workflows/validator.d.ts.map +1 -1
- package/packages/sdk/dist/workflows/validator.js +29 -0
- package/packages/sdk/dist/workflows/validator.js.map +1 -1
- package/packages/sdk/package.json +2 -2
- package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +778 -0
- package/packages/sdk/src/__tests__/workflow-runner.test.ts +484 -9
- package/packages/sdk/src/workflows/README.md +11 -0
- package/packages/sdk/src/workflows/cli.ts +10 -0
- package/packages/sdk/src/workflows/runner.ts +706 -33
- package/packages/sdk/src/workflows/trajectory.ts +89 -8
- package/packages/sdk/src/workflows/validator.ts +29 -0
- package/packages/sdk-py/pyproject.toml +1 -1
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +2 -2
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
9
|
+
import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from 'node:fs';
|
|
10
|
+
import os from 'node:os';
|
|
11
|
+
import path from 'node:path';
|
|
9
12
|
import type { WorkflowDb } from '../workflows/runner.js';
|
|
10
13
|
import type { RelayYamlConfig, WorkflowRunRow, WorkflowStepRow } from '../workflows/types.js';
|
|
11
14
|
|
|
@@ -56,6 +59,7 @@ vi.mock('@relaycast/sdk', () => ({
|
|
|
56
59
|
|
|
57
60
|
let waitForExitFn: (ms?: number) => Promise<'exited' | 'timeout' | 'released'>;
|
|
58
61
|
let waitForIdleFn: (ms?: number) => Promise<'idle' | 'timeout' | 'exited'>;
|
|
62
|
+
let mockSpawnOutputs: string[] = [];
|
|
59
63
|
|
|
60
64
|
const mockAgent = {
|
|
61
65
|
name: 'test-agent-abc',
|
|
@@ -73,15 +77,48 @@ const mockHuman = {
|
|
|
73
77
|
sendMessage: vi.fn().mockResolvedValue(undefined),
|
|
74
78
|
};
|
|
75
79
|
|
|
80
|
+
const defaultSpawnPtyImplementation = async ({
|
|
81
|
+
name,
|
|
82
|
+
task,
|
|
83
|
+
}: {
|
|
84
|
+
name: string;
|
|
85
|
+
task?: string;
|
|
86
|
+
}) => {
|
|
87
|
+
const queued = mockSpawnOutputs.shift();
|
|
88
|
+
const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
|
|
89
|
+
const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
90
|
+
const output =
|
|
91
|
+
queued ??
|
|
92
|
+
(isReview
|
|
93
|
+
? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'
|
|
94
|
+
: stepComplete
|
|
95
|
+
? `STEP_COMPLETE:${stepComplete}\n`
|
|
96
|
+
: 'STEP_COMPLETE:unknown\n');
|
|
97
|
+
|
|
98
|
+
queueMicrotask(() => {
|
|
99
|
+
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
100
|
+
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
return { ...mockAgent, name };
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const mockRelayInstance = {
|
|
108
|
+
spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation),
|
|
109
|
+
human: vi.fn().mockReturnValue(mockHuman),
|
|
110
|
+
shutdown: vi.fn().mockResolvedValue(undefined),
|
|
111
|
+
onBrokerStderr: vi.fn().mockReturnValue(() => {}),
|
|
112
|
+
onWorkerOutput: null as ((frame: { name: string; chunk: string }) => void) | null,
|
|
113
|
+
onMessageReceived: null as any,
|
|
114
|
+
onAgentSpawned: null as any,
|
|
115
|
+
onAgentExited: null as any,
|
|
116
|
+
onAgentIdle: null as any,
|
|
117
|
+
listAgentsRaw: vi.fn().mockResolvedValue([]),
|
|
118
|
+
};
|
|
119
|
+
|
|
76
120
|
vi.mock('../relay.js', () => ({
|
|
77
|
-
AgentRelay: vi.fn().mockImplementation(() =>
|
|
78
|
-
spawnPty: vi.fn().mockResolvedValue(mockAgent),
|
|
79
|
-
human: vi.fn().mockReturnValue(mockHuman),
|
|
80
|
-
shutdown: vi.fn().mockResolvedValue(undefined),
|
|
81
|
-
onBrokerStderr: vi.fn().mockReturnValue(() => {}),
|
|
82
|
-
onWorkerOutput: null,
|
|
83
|
-
listAgentsRaw: vi.fn().mockResolvedValue([]),
|
|
84
|
-
})),
|
|
121
|
+
AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance),
|
|
85
122
|
}));
|
|
86
123
|
|
|
87
124
|
// Import after mocking
|
|
@@ -145,6 +182,41 @@ function never<T>(): Promise<T> {
|
|
|
145
182
|
return new Promise(() => {});
|
|
146
183
|
}
|
|
147
184
|
|
|
185
|
+
type WorkflowStepOverride = Partial<NonNullable<RelayYamlConfig['workflows']>[number]['steps'][number]>;
|
|
186
|
+
|
|
187
|
+
function makeSupervisedConfig(stepOverrides: WorkflowStepOverride = {}): RelayYamlConfig {
|
|
188
|
+
return makeConfig({
|
|
189
|
+
agents: [
|
|
190
|
+
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
191
|
+
{ name: 'team-lead', cli: 'claude', role: 'lead coordinator' },
|
|
192
|
+
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
193
|
+
],
|
|
194
|
+
workflows: [
|
|
195
|
+
{
|
|
196
|
+
name: 'default',
|
|
197
|
+
steps: [
|
|
198
|
+
{
|
|
199
|
+
name: 'step-1',
|
|
200
|
+
agent: 'specialist',
|
|
201
|
+
task: 'Implement the requested change',
|
|
202
|
+
...stepOverrides,
|
|
203
|
+
},
|
|
204
|
+
],
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function readCompletedTrajectoryFile(dir: string): any {
|
|
211
|
+
const completedDir = path.join(dir, '.trajectories', 'completed');
|
|
212
|
+
if (!existsSync(completedDir)) return null;
|
|
213
|
+
|
|
214
|
+
const jsonFile = readdirSync(completedDir).find((file) => file.endsWith('.json'));
|
|
215
|
+
if (!jsonFile) return null;
|
|
216
|
+
|
|
217
|
+
return JSON.parse(readFileSync(path.join(completedDir, jsonFile), 'utf-8'));
|
|
218
|
+
}
|
|
219
|
+
|
|
148
220
|
// ── Tests ────────────────────────────────────────────────────────────────────
|
|
149
221
|
|
|
150
222
|
describe('WorkflowRunner', () => {
|
|
@@ -155,6 +227,10 @@ describe('WorkflowRunner', () => {
|
|
|
155
227
|
vi.clearAllMocks();
|
|
156
228
|
waitForExitFn = vi.fn().mockResolvedValue('exited');
|
|
157
229
|
waitForIdleFn = vi.fn().mockImplementation(() => never());
|
|
230
|
+
mockSpawnOutputs = [];
|
|
231
|
+
mockAgent.release.mockResolvedValue(undefined);
|
|
232
|
+
mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation);
|
|
233
|
+
mockRelayInstance.onWorkerOutput = null;
|
|
158
234
|
db = makeDb();
|
|
159
235
|
runner = new WorkflowRunner({ db, workspaceId: 'ws-test' });
|
|
160
236
|
});
|
|
@@ -304,7 +380,7 @@ agents:
|
|
|
304
380
|
|
|
305
381
|
expect(db.insertRun).toHaveBeenCalledTimes(1);
|
|
306
382
|
expect(db.insertStep).toHaveBeenCalledTimes(2);
|
|
307
|
-
expect(run.status).toBe('completed');
|
|
383
|
+
expect(run.status, run.error).toBe('completed');
|
|
308
384
|
});
|
|
309
385
|
|
|
310
386
|
it('should throw when workflow not found', async () => {
|
|
@@ -344,11 +420,410 @@ agents:
|
|
|
344
420
|
expect(startedSteps).toHaveLength(2);
|
|
345
421
|
});
|
|
346
422
|
|
|
423
|
+
it('should emit owner assignment and review completion events for interactive steps', async () => {
|
|
424
|
+
const events: Array<{ type: string; stepName?: string }> = [];
|
|
425
|
+
runner.on((event) =>
|
|
426
|
+
events.push({ type: event.type, stepName: 'stepName' in event ? event.stepName : undefined })
|
|
427
|
+
);
|
|
428
|
+
|
|
429
|
+
await runner.execute(makeConfig(), 'default');
|
|
430
|
+
|
|
431
|
+
const ownerAssigned = events.filter((e) => e.type === 'step:owner-assigned');
|
|
432
|
+
const reviewCompleted = events.filter((e) => e.type === 'step:review-completed');
|
|
433
|
+
expect(ownerAssigned).toHaveLength(2);
|
|
434
|
+
expect(reviewCompleted).toHaveLength(2);
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
it('should prioritize lead owner when multiple hub-role candidates exist', async () => {
|
|
438
|
+
const ownerAssignments: string[] = [];
|
|
439
|
+
runner.on((event) => {
|
|
440
|
+
if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
const config = makeConfig({
|
|
444
|
+
agents: [
|
|
445
|
+
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
446
|
+
{ name: 'coord-1', cli: 'claude', role: 'coordinator' },
|
|
447
|
+
{ name: 'lead-1', cli: 'claude', role: 'lead' },
|
|
448
|
+
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
449
|
+
],
|
|
450
|
+
workflows: [
|
|
451
|
+
{
|
|
452
|
+
name: 'default',
|
|
453
|
+
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
|
|
454
|
+
},
|
|
455
|
+
],
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
const run = await runner.execute(config, 'default');
|
|
459
|
+
expect(run.status).toBe('completed');
|
|
460
|
+
expect(ownerAssignments).toEqual(['lead-1']);
|
|
461
|
+
}, 15000);
|
|
462
|
+
|
|
463
|
+
it('should not treat github role text as hub owner signal', async () => {
|
|
464
|
+
const ownerAssignments: string[] = [];
|
|
465
|
+
runner.on((event) => {
|
|
466
|
+
if (event.type === 'step:owner-assigned') ownerAssignments.push(event.ownerName);
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
const config = makeConfig({
|
|
470
|
+
agents: [
|
|
471
|
+
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
472
|
+
{ name: 'github-agent', cli: 'claude', role: 'github actions agent' },
|
|
473
|
+
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
474
|
+
],
|
|
475
|
+
workflows: [
|
|
476
|
+
{
|
|
477
|
+
name: 'default',
|
|
478
|
+
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
|
|
479
|
+
},
|
|
480
|
+
],
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
const run = await runner.execute(config, 'default');
|
|
484
|
+
expect(run.status).toBe('completed');
|
|
485
|
+
expect(ownerAssignments).toEqual(['specialist']);
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
it('should not elect github-role agent as owner (hub word-boundary)', async () => {
|
|
489
|
+
const ownerAssignments: Array<{ owner: string; specialist: string }> = [];
|
|
490
|
+
runner.on((event) => {
|
|
491
|
+
if (event.type === 'step:owner-assigned') {
|
|
492
|
+
ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName });
|
|
493
|
+
}
|
|
494
|
+
});
|
|
495
|
+
|
|
496
|
+
const config = makeConfig({
|
|
497
|
+
agents: [
|
|
498
|
+
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
499
|
+
{ name: 'github-bot', cli: 'claude', role: 'github integration' },
|
|
500
|
+
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
501
|
+
],
|
|
502
|
+
workflows: [
|
|
503
|
+
{
|
|
504
|
+
name: 'default',
|
|
505
|
+
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
|
|
506
|
+
},
|
|
507
|
+
],
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
const run = await runner.execute(config, 'default');
|
|
511
|
+
expect(run.status).toBe('completed');
|
|
512
|
+
// github-bot should NOT be elected as owner (role contains "hub" substring but not word)
|
|
513
|
+
expect(ownerAssignments[0].owner).not.toBe('github-bot');
|
|
514
|
+
// specialist should be its own owner since no hub-role agent exists
|
|
515
|
+
expect(ownerAssignments[0].owner).toBe('specialist');
|
|
516
|
+
}, 15000);
|
|
517
|
+
|
|
518
|
+
it('should parse REJECT from PTY-echoed review output', async () => {
|
|
519
|
+
const events: Array<{ type: string; decision?: string }> = [];
|
|
520
|
+
runner.on((event) => {
|
|
521
|
+
if (event.type === 'step:review-completed') {
|
|
522
|
+
events.push({ type: event.type, decision: event.decision });
|
|
523
|
+
}
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
// Simulate PTY output that echoes the review prompt before the actual response
|
|
527
|
+
const echoedPrompt =
|
|
528
|
+
'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: <one sentence>\n';
|
|
529
|
+
const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has bugs\n';
|
|
530
|
+
mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse];
|
|
531
|
+
|
|
532
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
533
|
+
expect(run.status).toBe('failed');
|
|
534
|
+
expect(run.error).toContain('review rejected');
|
|
535
|
+
// Should parse REJECT from actual response, not APPROVE from echoed instruction
|
|
536
|
+
expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
|
|
537
|
+
}, 15000);
|
|
538
|
+
|
|
347
539
|
it('should resolve variables during execution', async () => {
|
|
348
540
|
const config = makeConfig();
|
|
349
541
|
config.workflows![0].steps[0].task = 'Build {{feature}}';
|
|
350
542
|
const run = await runner.execute(config, 'default', { feature: 'auth' });
|
|
543
|
+
expect(run.status, run.error).toBe('completed');
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
it('should fail when owner response does not include completion marker', async () => {
|
|
547
|
+
mockSpawnOutputs = ['Owner completed work but forgot sentinel\n'];
|
|
548
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
549
|
+
expect(run.status).toBe('failed');
|
|
550
|
+
expect(run.error).toContain('owner completion marker');
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
it('should run specialist work in a separate process and mirror worker output to the channel', async () => {
|
|
554
|
+
mockSpawnOutputs = [
|
|
555
|
+
'worker progress update\nworker finished\n',
|
|
556
|
+
'Observed worker progress on the channel\nSTEP_COMPLETE:step-1\n',
|
|
557
|
+
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n',
|
|
558
|
+
];
|
|
559
|
+
|
|
560
|
+
const run = await runner.execute(makeSupervisedConfig(), 'default');
|
|
561
|
+
|
|
562
|
+
expect(run.status).toBe('completed');
|
|
563
|
+
const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls;
|
|
564
|
+
expect(spawnCalls[0][0].name).toContain('step-1-worker');
|
|
565
|
+
expect(spawnCalls[1][0].name).toContain('step-1-owner');
|
|
566
|
+
expect(spawnCalls[0][0].task).not.toContain('STEP_COMPLETE:step-1');
|
|
567
|
+
expect(spawnCalls[1][0].task).toContain('You are the step owner/supervisor for step "step-1".');
|
|
568
|
+
expect(spawnCalls[1][0].task).toContain('runtime: step-1-worker');
|
|
569
|
+
|
|
570
|
+
const channelMessages = (mockRelaycastAgent.send as any).mock.calls.map(
|
|
571
|
+
([, text]: [string, string]) => text
|
|
572
|
+
);
|
|
573
|
+
expect(channelMessages.some((text: string) => text.includes('Worker `step-1-worker'))).toBe(true);
|
|
574
|
+
expect(channelMessages.some((text: string) => text.includes('worker finished'))).toBe(true);
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
it('should let the owner complete after checking file-based artifacts', async () => {
|
|
578
|
+
const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-owner-file-'));
|
|
579
|
+
const artifact = path.join(tmpDir, 'artifact.txt');
|
|
580
|
+
writeFileSync(artifact, 'done\n', 'utf-8');
|
|
581
|
+
runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir });
|
|
582
|
+
|
|
583
|
+
try {
|
|
584
|
+
mockSpawnOutputs = [
|
|
585
|
+
'worker wrote artifact\n',
|
|
586
|
+
'Bash(git diff --stat)\nSTEP_COMPLETE:step-1\n',
|
|
587
|
+
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: artifact verified\n',
|
|
588
|
+
];
|
|
589
|
+
|
|
590
|
+
const run = await runner.execute(
|
|
591
|
+
makeSupervisedConfig({ verification: { type: 'file_exists', value: 'artifact.txt' } }),
|
|
592
|
+
'default'
|
|
593
|
+
);
|
|
594
|
+
|
|
595
|
+
expect(run.status).toBe('completed');
|
|
596
|
+
const ownerTask = (mockRelayInstance.spawnPty as any).mock.calls[1][0].task as string;
|
|
597
|
+
expect(ownerTask).toContain('Verification gate: confirm the file exists at "artifact.txt"');
|
|
598
|
+
} finally {
|
|
599
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
600
|
+
}
|
|
601
|
+
});
|
|
602
|
+
|
|
603
|
+
it('should keep specialist output for chaining even when the owner signals later', async () => {
|
|
604
|
+
mockSpawnOutputs = [
|
|
605
|
+
'specialist deliverable\n',
|
|
606
|
+
'Worker already exited; artifacts look correct\nSTEP_COMPLETE:step-1\n',
|
|
607
|
+
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: handoff is safe\n',
|
|
608
|
+
];
|
|
609
|
+
|
|
610
|
+
const run = await runner.execute(makeSupervisedConfig(), 'default');
|
|
611
|
+
expect(run.status).toBe('completed');
|
|
612
|
+
|
|
613
|
+
const stepRows = await db.getStepsByRunId(run.id);
|
|
614
|
+
expect(stepRows[0].output).toContain('specialist deliverable');
|
|
615
|
+
expect(stepRows[0].output).not.toContain('Worker already exited; artifacts look correct');
|
|
616
|
+
});
|
|
617
|
+
|
|
618
|
+
it('should fail closed when review response is malformed', async () => {
|
|
619
|
+
mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_REASON: looks fine\n'];
|
|
620
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
621
|
+
expect(run.status).toBe('failed');
|
|
622
|
+
expect(run.error).toContain('review response malformed');
|
|
623
|
+
});
|
|
624
|
+
|
|
625
|
+
it('should fail when review explicitly rejects step output', async () => {
|
|
626
|
+
const events: Array<{ type: string; decision?: string }> = [];
|
|
627
|
+
runner.on((event) => {
|
|
628
|
+
if (event.type === 'step:review-completed') {
|
|
629
|
+
events.push({
|
|
630
|
+
type: event.type,
|
|
631
|
+
decision: event.decision,
|
|
632
|
+
});
|
|
633
|
+
}
|
|
634
|
+
});
|
|
635
|
+
|
|
636
|
+
mockSpawnOutputs = [
|
|
637
|
+
'STEP_COMPLETE:step-1\n',
|
|
638
|
+
'REVIEW_DECISION: REJECT\nREVIEW_REASON: missing checks\n',
|
|
639
|
+
];
|
|
640
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
641
|
+
expect(run.status).toBe('failed');
|
|
642
|
+
expect(run.error).toContain('review rejected');
|
|
643
|
+
expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
|
|
644
|
+
});
|
|
645
|
+
|
|
646
|
+
it('should parse final review decision when PTY output echoes review instructions', async () => {
|
|
647
|
+
const events: Array<{ type: string; decision?: string }> = [];
|
|
648
|
+
runner.on((event) => {
|
|
649
|
+
if (event.type === 'step:review-completed') {
|
|
650
|
+
events.push({
|
|
651
|
+
type: event.type,
|
|
652
|
+
decision: event.decision,
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
});
|
|
656
|
+
|
|
657
|
+
mockSpawnOutputs = [
|
|
658
|
+
'STEP_COMPLETE:step-1\n',
|
|
659
|
+
'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: <one sentence>\nREVIEW_DECISION: REJECT\nREVIEW_REASON: insufficient evidence\n',
|
|
660
|
+
];
|
|
661
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
662
|
+
expect(run.status).toBe('failed');
|
|
663
|
+
expect(run.error).toContain('review rejected');
|
|
664
|
+
expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
it('should record review completion in trajectory with decision and reason', async () => {
|
|
668
|
+
const tmpDir = mkdtempSync(path.join(os.tmpdir(), 'relay-review-traj-'));
|
|
669
|
+
runner = new WorkflowRunner({ db, workspaceId: 'ws-test', cwd: tmpDir });
|
|
670
|
+
|
|
671
|
+
try {
|
|
672
|
+
mockSpawnOutputs = [
|
|
673
|
+
'STEP_COMPLETE:step-1\n',
|
|
674
|
+
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: durable review record\n',
|
|
675
|
+
];
|
|
676
|
+
|
|
677
|
+
const run = await runner.execute(makeConfig({ trajectories: {} }), 'default');
|
|
678
|
+
expect(run.status).toBe('completed');
|
|
679
|
+
|
|
680
|
+
const trajectory = readCompletedTrajectoryFile(tmpDir);
|
|
681
|
+
const events = trajectory.chapters.flatMap((chapter: any) => chapter.events);
|
|
682
|
+
const reviewEvent = events.find((event: any) => event.type === 'review-completed');
|
|
683
|
+
|
|
684
|
+
expect(reviewEvent).toBeTruthy();
|
|
685
|
+
expect(reviewEvent.raw).toMatchObject({
|
|
686
|
+
stepName: 'step-1',
|
|
687
|
+
reviewer: 'agent-b',
|
|
688
|
+
decision: 'approved',
|
|
689
|
+
reason: 'durable review record',
|
|
690
|
+
});
|
|
691
|
+
} finally {
|
|
692
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
693
|
+
}
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
it('should not double release the worker when the owner fails after worker completion', async () => {
|
|
697
|
+
const workerRelease = vi.fn().mockResolvedValue(undefined);
|
|
698
|
+
const ownerRelease = vi.fn().mockResolvedValue(undefined);
|
|
699
|
+
|
|
700
|
+
mockRelayInstance.spawnPty.mockImplementation(async ({
|
|
701
|
+
name,
|
|
702
|
+
task,
|
|
703
|
+
}: {
|
|
704
|
+
name: string;
|
|
705
|
+
task?: string;
|
|
706
|
+
}) => {
|
|
707
|
+
const isOwner = name.includes('-owner-');
|
|
708
|
+
const output = isOwner ? 'owner checking\n' : 'worker finished\n';
|
|
709
|
+
|
|
710
|
+
queueMicrotask(() => {
|
|
711
|
+
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
712
|
+
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
713
|
+
}
|
|
714
|
+
});
|
|
715
|
+
|
|
716
|
+
if (isOwner) {
|
|
717
|
+
return {
|
|
718
|
+
name,
|
|
719
|
+
waitForExit: vi.fn().mockImplementation(async () => {
|
|
720
|
+
await Promise.resolve();
|
|
721
|
+
return 'timeout';
|
|
722
|
+
}),
|
|
723
|
+
waitForIdle: vi.fn().mockResolvedValue('timeout'),
|
|
724
|
+
release: ownerRelease,
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
return {
|
|
729
|
+
name,
|
|
730
|
+
waitForExit: vi.fn().mockImplementation(async () => {
|
|
731
|
+
await workerRelease();
|
|
732
|
+
return 'released';
|
|
733
|
+
}),
|
|
734
|
+
waitForIdle: vi.fn().mockImplementation(() => never()),
|
|
735
|
+
release: workerRelease,
|
|
736
|
+
};
|
|
737
|
+
});
|
|
738
|
+
|
|
739
|
+
const run = await runner.execute(makeSupervisedConfig(), 'default');
|
|
740
|
+
|
|
741
|
+
expect(run.status).toBe('failed');
|
|
742
|
+
expect(run.error).toContain('owner timed out');
|
|
743
|
+
expect(workerRelease).toHaveBeenCalledTimes(1);
|
|
744
|
+
expect(ownerRelease).toHaveBeenCalledTimes(1);
|
|
745
|
+
});
|
|
746
|
+
|
|
747
|
+
it('should emit owner-timeout when owner times out', async () => {
|
|
748
|
+
const events: Array<{ type: string; stepName?: string }> = [];
|
|
749
|
+
runner.on((event) => {
|
|
750
|
+
if (event.type === 'step:owner-timeout') {
|
|
751
|
+
events.push({
|
|
752
|
+
type: event.type,
|
|
753
|
+
stepName: event.stepName,
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
waitForExitFn = vi.fn().mockResolvedValue('timeout');
|
|
759
|
+
waitForIdleFn = vi.fn().mockResolvedValue('timeout');
|
|
760
|
+
|
|
761
|
+
const run = await runner.execute(makeConfig(), 'default');
|
|
762
|
+
expect(run.status).toBe('failed');
|
|
763
|
+
expect(run.error).toContain('timed out');
|
|
764
|
+
expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' });
|
|
765
|
+
});
|
|
766
|
+
|
|
767
|
+
it('should emit owner-timeout for a dedicated supervisor when the worker is stuck', async () => {
|
|
768
|
+
const events: Array<{ type: string; stepName?: string }> = [];
|
|
769
|
+
runner.on((event) => {
|
|
770
|
+
if (event.type === 'step:owner-timeout') {
|
|
771
|
+
events.push({ type: event.type, stepName: event.stepName });
|
|
772
|
+
}
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
waitForExitFn = vi.fn().mockResolvedValue('timeout');
|
|
776
|
+
waitForIdleFn = vi.fn().mockResolvedValue('timeout');
|
|
777
|
+
|
|
778
|
+
const run = await runner.execute(makeSupervisedConfig(), 'default');
|
|
779
|
+
expect(run.status).toBe('failed');
|
|
780
|
+
expect(run.error).toContain('owner timed out');
|
|
781
|
+
expect(events).toContainEqual({ type: 'step:owner-timeout', stepName: 'step-1' });
|
|
782
|
+
});
|
|
783
|
+
|
|
784
|
+
it('should preserve self-completion when no dedicated owner is available', async () => {
|
|
785
|
+
mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'];
|
|
786
|
+
|
|
787
|
+
const config = makeConfig({
|
|
788
|
+
agents: [
|
|
789
|
+
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
790
|
+
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
791
|
+
],
|
|
792
|
+
workflows: [
|
|
793
|
+
{
|
|
794
|
+
name: 'default',
|
|
795
|
+
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do step 1' }],
|
|
796
|
+
},
|
|
797
|
+
],
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
const run = await runner.execute(config, 'default');
|
|
801
|
+
|
|
802
|
+
expect(run.status).toBe('completed');
|
|
803
|
+
const spawnCalls = (mockRelayInstance.spawnPty as any).mock.calls;
|
|
804
|
+
expect(spawnCalls[0][0].name).toContain('step-1-');
|
|
805
|
+
expect(spawnCalls[0][0].name).not.toContain('worker');
|
|
806
|
+
expect(spawnCalls[0][0].task).toContain('STEP OWNER CONTRACT');
|
|
807
|
+
expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1');
|
|
808
|
+
});
|
|
809
|
+
|
|
810
|
+
it('should use the full remaining timeout as the review safety backstop', async () => {
|
|
811
|
+
const config = makeConfig({
|
|
812
|
+
workflows: [
|
|
813
|
+
{
|
|
814
|
+
name: 'default',
|
|
815
|
+
steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }],
|
|
816
|
+
},
|
|
817
|
+
],
|
|
818
|
+
});
|
|
819
|
+
const run = await runner.execute(config, 'default');
|
|
820
|
+
|
|
351
821
|
expect(run.status).toBe('completed');
|
|
822
|
+
const waitCalls = (waitForExitFn as any).mock?.calls ?? [];
|
|
823
|
+
expect(waitCalls.length).toBeGreaterThanOrEqual(2);
|
|
824
|
+
// first call: owner timeout; second call: review timeout
|
|
825
|
+
expect(waitCalls[1][0]).toBeGreaterThan(60_000);
|
|
826
|
+
expect(waitCalls[1][0]).toBeLessThanOrEqual(90_000);
|
|
352
827
|
});
|
|
353
828
|
});
|
|
354
829
|
|
|
@@ -640,6 +640,17 @@ The runner emits two new events for idle nudging:
|
|
|
640
640
|
| `step:nudged` | Fired when a nudge message is sent to an idle agent |
|
|
641
641
|
| `step:force-released` | Fired when an agent is force-released after exhausting nudges |
|
|
642
642
|
|
|
643
|
+
## Automatic Step Owner and Review
|
|
644
|
+
|
|
645
|
+
For interactive agent steps, the runner now hardens handoffs automatically:
|
|
646
|
+
|
|
647
|
+
1. Elects a step owner (prefers lead/coordinator-style agents, falls back to the step agent)
|
|
648
|
+
2. Requires the owner to provide an explicit completion signal (`STEP_COMPLETE:<step-name>`)
|
|
649
|
+
3. Runs a review pass before marking the step complete (prefers reviewer-style agents when present)
|
|
650
|
+
4. Stores primary output plus review output in the step artifact
|
|
651
|
+
|
|
652
|
+
Deterministic and worktree steps are unchanged and do not require owner/review delegation.
|
|
653
|
+
|
|
643
654
|
## Schema Validation
|
|
644
655
|
|
|
645
656
|
A JSON Schema is available at `packages/sdk/src/workflows/schema.json` for editor autocompletion and validation of `relay.yaml` files.
|
|
@@ -51,8 +51,14 @@ function formatEvent(event: WorkflowEvent): string {
|
|
|
51
51
|
return `[run] cancelled`;
|
|
52
52
|
case 'step:started':
|
|
53
53
|
return `[step] ${event.stepName} started`;
|
|
54
|
+
case 'step:owner-assigned':
|
|
55
|
+
return `[step] ${event.stepName} owner=${event.ownerName} specialist=${event.specialistName}`;
|
|
54
56
|
case 'step:completed':
|
|
55
57
|
return `[step] ${event.stepName} completed`;
|
|
58
|
+
case 'step:review-completed':
|
|
59
|
+
return `[step] ${event.stepName} review ${event.decision} by ${event.reviewerName}`;
|
|
60
|
+
case 'step:owner-timeout':
|
|
61
|
+
return `[step] ${event.stepName} owner ${event.ownerName} timed out`;
|
|
56
62
|
case 'step:failed':
|
|
57
63
|
return `[step] ${event.stepName} failed: ${event.error}`;
|
|
58
64
|
case 'step:skipped':
|
|
@@ -63,6 +69,10 @@ function formatEvent(event: WorkflowEvent): string {
|
|
|
63
69
|
return `[step] ${event.stepName} nudged (nudge #${event.nudgeCount})`;
|
|
64
70
|
case 'step:force-released':
|
|
65
71
|
return `[step] ${event.stepName} force-released`;
|
|
72
|
+
default: {
|
|
73
|
+
const _exhaustive: never = event;
|
|
74
|
+
return `[unknown event] ${(_exhaustive as WorkflowEvent).type}`;
|
|
75
|
+
}
|
|
66
76
|
}
|
|
67
77
|
}
|
|
68
78
|
|