@agent-relay/sdk 3.2.2 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-relay-broker-darwin-arm64 +0 -0
- package/bin/agent-relay-broker-darwin-x64 +0 -0
- package/bin/agent-relay-broker-linux-arm64 +0 -0
- package/bin/agent-relay-broker-linux-x64 +0 -0
- package/dist/client.d.ts +66 -0
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +230 -0
- package/dist/client.js.map +1 -1
- package/dist/communicate/a2a-bridge.d.ts +25 -0
- package/dist/communicate/a2a-bridge.d.ts.map +1 -0
- package/dist/communicate/a2a-bridge.js +89 -0
- package/dist/communicate/a2a-bridge.js.map +1 -0
- package/dist/communicate/a2a-server.d.ts +31 -0
- package/dist/communicate/a2a-server.d.ts.map +1 -0
- package/dist/communicate/a2a-server.js +220 -0
- package/dist/communicate/a2a-server.js.map +1 -0
- package/dist/communicate/a2a-transport.d.ts +48 -0
- package/dist/communicate/a2a-transport.d.ts.map +1 -0
- package/dist/communicate/a2a-transport.js +302 -0
- package/dist/communicate/a2a-transport.js.map +1 -0
- package/dist/communicate/a2a-types.d.ts +107 -0
- package/dist/communicate/a2a-types.d.ts.map +1 -0
- package/dist/communicate/a2a-types.js +209 -0
- package/dist/communicate/a2a-types.js.map +1 -0
- package/dist/communicate/adapters/claude-sdk.d.ts +28 -0
- package/dist/communicate/adapters/claude-sdk.d.ts.map +1 -0
- package/dist/communicate/adapters/claude-sdk.js +47 -0
- package/dist/communicate/adapters/claude-sdk.js.map +1 -0
- package/dist/communicate/adapters/crewai.d.ts +42 -0
- package/dist/communicate/adapters/crewai.d.ts.map +1 -0
- package/dist/communicate/adapters/crewai.js +95 -0
- package/dist/communicate/adapters/crewai.js.map +1 -0
- package/dist/communicate/adapters/google-adk.d.ts +53 -0
- package/dist/communicate/adapters/google-adk.d.ts.map +1 -0
- package/dist/communicate/adapters/google-adk.js +77 -0
- package/dist/communicate/adapters/google-adk.js.map +1 -0
- package/dist/communicate/adapters/index.d.ts +7 -0
- package/dist/communicate/adapters/index.d.ts.map +1 -0
- package/dist/communicate/adapters/index.js +7 -0
- package/dist/communicate/adapters/index.js.map +1 -0
- package/dist/communicate/adapters/langgraph.d.ts +40 -0
- package/dist/communicate/adapters/langgraph.d.ts.map +1 -0
- package/dist/communicate/adapters/langgraph.js +77 -0
- package/dist/communicate/adapters/langgraph.js.map +1 -0
- package/dist/communicate/adapters/openai-agents.d.ts +25 -0
- package/dist/communicate/adapters/openai-agents.d.ts.map +1 -0
- package/dist/communicate/adapters/openai-agents.js +70 -0
- package/dist/communicate/adapters/openai-agents.js.map +1 -0
- package/dist/communicate/adapters/pi.d.ts +45 -0
- package/dist/communicate/adapters/pi.d.ts.map +1 -0
- package/dist/communicate/adapters/pi.js +59 -0
- package/dist/communicate/adapters/pi.js.map +1 -0
- package/dist/communicate/core.d.ts +58 -0
- package/dist/communicate/core.d.ts.map +1 -0
- package/dist/communicate/core.js +128 -0
- package/dist/communicate/core.js.map +1 -0
- package/dist/communicate/index.d.ts +4 -0
- package/dist/communicate/index.d.ts.map +1 -0
- package/dist/communicate/index.js +4 -0
- package/dist/communicate/index.js.map +1 -0
- package/dist/communicate/transport.d.ts +36 -0
- package/dist/communicate/transport.d.ts.map +1 -0
- package/dist/communicate/transport.js +371 -0
- package/dist/communicate/transport.js.map +1 -0
- package/dist/communicate/types.d.ts +58 -0
- package/dist/communicate/types.d.ts.map +1 -0
- package/dist/communicate/types.js +66 -0
- package/dist/communicate/types.js.map +1 -0
- package/dist/workflows/builder.d.ts +35 -5
- package/dist/workflows/builder.d.ts.map +1 -1
- package/dist/workflows/builder.js +81 -7
- package/dist/workflows/builder.js.map +1 -1
- package/dist/workflows/cli.js +14 -1
- package/dist/workflows/cli.js.map +1 -1
- package/dist/workflows/runner.d.ts +10 -2
- package/dist/workflows/runner.d.ts.map +1 -1
- package/dist/workflows/runner.js +95 -1
- package/dist/workflows/runner.js.map +1 -1
- package/dist/workflows/types.d.ts +11 -0
- package/dist/workflows/types.d.ts.map +1 -1
- package/package.json +48 -2
- package/dist/__tests__/completion-pipeline.test.d.ts +0 -14
- package/dist/__tests__/completion-pipeline.test.d.ts.map +0 -1
- package/dist/__tests__/completion-pipeline.test.js +0 -1476
- package/dist/__tests__/completion-pipeline.test.js.map +0 -1
- package/dist/__tests__/contract-fixtures.test.d.ts +0 -2
- package/dist/__tests__/contract-fixtures.test.d.ts.map +0 -1
- package/dist/__tests__/contract-fixtures.test.js +0 -152
- package/dist/__tests__/contract-fixtures.test.js.map +0 -1
- package/dist/__tests__/e2e-owner-review.test.d.ts +0 -16
- package/dist/__tests__/e2e-owner-review.test.d.ts.map +0 -1
- package/dist/__tests__/e2e-owner-review.test.js +0 -640
- package/dist/__tests__/e2e-owner-review.test.js.map +0 -1
- package/dist/__tests__/facade.test.d.ts +0 -2
- package/dist/__tests__/facade.test.d.ts.map +0 -1
- package/dist/__tests__/facade.test.js +0 -305
- package/dist/__tests__/facade.test.js.map +0 -1
- package/dist/__tests__/integration.test.d.ts +0 -2
- package/dist/__tests__/integration.test.d.ts.map +0 -1
- package/dist/__tests__/integration.test.js +0 -205
- package/dist/__tests__/integration.test.js.map +0 -1
- package/dist/__tests__/pty.test.d.ts +0 -2
- package/dist/__tests__/pty.test.d.ts.map +0 -1
- package/dist/__tests__/pty.test.js +0 -20
- package/dist/__tests__/pty.test.js.map +0 -1
- package/dist/__tests__/quickstart.test.d.ts +0 -2
- package/dist/__tests__/quickstart.test.d.ts.map +0 -1
- package/dist/__tests__/quickstart.test.js +0 -176
- package/dist/__tests__/quickstart.test.js.map +0 -1
- package/dist/__tests__/spawn-from-env.test.d.ts +0 -2
- package/dist/__tests__/spawn-from-env.test.d.ts.map +0 -1
- package/dist/__tests__/spawn-from-env.test.js +0 -222
- package/dist/__tests__/spawn-from-env.test.js.map +0 -1
- package/dist/__tests__/unit.test.d.ts +0 -2
- package/dist/__tests__/unit.test.d.ts.map +0 -1
- package/dist/__tests__/unit.test.js +0 -357
- package/dist/__tests__/unit.test.js.map +0 -1
|
@@ -1,640 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* E2E test harness for PR #511: auto step owner + per-step review gating.
|
|
3
|
-
*
|
|
4
|
-
* Validates:
|
|
5
|
-
* 1. Hub-role agent auto-assigned as owner (lead matches)
|
|
6
|
-
* 2. "github-integration" agent NOT matched as hub (word-boundary)
|
|
7
|
-
* 3. Review gating — approval flow
|
|
8
|
-
* 4. Review gating — rejection flow (PTY echo handling)
|
|
9
|
-
* 5. Review timeout budgeting
|
|
10
|
-
* 6. Owner timeout emission
|
|
11
|
-
* 7. Lead + workers team with owner assignment
|
|
12
|
-
* 8. YAML workflow parsing of e2e-owner-review.yaml
|
|
13
|
-
* 9. Owner completion marker validation
|
|
14
|
-
*/
|
|
15
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
16
|
-
import { readFileSync } from 'node:fs';
|
|
17
|
-
import { resolve } from 'node:path';
|
|
18
|
-
// ── Mock fetch ──────────────────────────────────────────────────────────────
|
|
19
|
-
const mockFetch = vi.fn().mockResolvedValue({
|
|
20
|
-
ok: true,
|
|
21
|
-
json: () => Promise.resolve({ data: { api_key: 'rk_live_test', workspace_id: 'ws-test' } }),
|
|
22
|
-
text: () => Promise.resolve(''),
|
|
23
|
-
});
|
|
24
|
-
vi.stubGlobal('fetch', mockFetch);
|
|
25
|
-
// ── Mock RelayCast SDK ──────────────────────────────────────────────────────
|
|
26
|
-
const mockRelaycastAgent = {
|
|
27
|
-
send: vi.fn().mockResolvedValue(undefined),
|
|
28
|
-
heartbeat: vi.fn().mockResolvedValue(undefined),
|
|
29
|
-
channels: {
|
|
30
|
-
create: vi.fn().mockResolvedValue(undefined),
|
|
31
|
-
join: vi.fn().mockResolvedValue(undefined),
|
|
32
|
-
invite: vi.fn().mockResolvedValue(undefined),
|
|
33
|
-
},
|
|
34
|
-
};
|
|
35
|
-
const mockRelaycast = {
|
|
36
|
-
agents: { register: vi.fn().mockResolvedValue({ token: 'token-1' }) },
|
|
37
|
-
as: vi.fn().mockReturnValue(mockRelaycastAgent),
|
|
38
|
-
};
|
|
39
|
-
class MockRelayError extends Error {
|
|
40
|
-
code;
|
|
41
|
-
constructor(code, message, status = 400) {
|
|
42
|
-
super(message);
|
|
43
|
-
this.code = code;
|
|
44
|
-
this.name = 'RelayError';
|
|
45
|
-
this.status = status;
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
vi.mock('@relaycast/sdk', () => ({
|
|
49
|
-
RelayCast: vi.fn().mockImplementation(() => mockRelaycast),
|
|
50
|
-
RelayError: MockRelayError,
|
|
51
|
-
}));
|
|
52
|
-
// ── Mock AgentRelay ─────────────────────────────────────────────────────────
|
|
53
|
-
let waitForExitFn;
|
|
54
|
-
let waitForIdleFn;
|
|
55
|
-
let mockSpawnOutputs = [];
|
|
56
|
-
const mockAgent = {
|
|
57
|
-
name: 'test-agent-abc',
|
|
58
|
-
get waitForExit() {
|
|
59
|
-
return waitForExitFn;
|
|
60
|
-
},
|
|
61
|
-
get waitForIdle() {
|
|
62
|
-
return waitForIdleFn;
|
|
63
|
-
},
|
|
64
|
-
release: vi.fn().mockResolvedValue(undefined),
|
|
65
|
-
};
|
|
66
|
-
const mockHuman = {
|
|
67
|
-
name: 'WorkflowRunner',
|
|
68
|
-
sendMessage: vi.fn().mockResolvedValue(undefined),
|
|
69
|
-
};
|
|
70
|
-
const defaultSpawnPtyImplementation = async ({ name, task, }) => {
|
|
71
|
-
const queued = mockSpawnOutputs.shift();
|
|
72
|
-
const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
|
|
73
|
-
const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
74
|
-
const output = queued ??
|
|
75
|
-
(isReview
|
|
76
|
-
? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n'
|
|
77
|
-
: stepComplete
|
|
78
|
-
? `STEP_COMPLETE:${stepComplete}\n`
|
|
79
|
-
: 'STEP_COMPLETE:unknown\n');
|
|
80
|
-
queueMicrotask(() => {
|
|
81
|
-
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
82
|
-
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
83
|
-
}
|
|
84
|
-
});
|
|
85
|
-
return { ...mockAgent, name };
|
|
86
|
-
};
|
|
87
|
-
const mockRelayInstance = {
|
|
88
|
-
spawnPty: vi.fn().mockImplementation(defaultSpawnPtyImplementation),
|
|
89
|
-
human: vi.fn().mockReturnValue(mockHuman),
|
|
90
|
-
shutdown: vi.fn().mockResolvedValue(undefined),
|
|
91
|
-
onBrokerStderr: vi.fn().mockReturnValue(() => { }),
|
|
92
|
-
onWorkerOutput: null,
|
|
93
|
-
onMessageReceived: null,
|
|
94
|
-
onAgentSpawned: null,
|
|
95
|
-
onAgentExited: null,
|
|
96
|
-
onAgentIdle: null,
|
|
97
|
-
listAgentsRaw: vi.fn().mockResolvedValue([]),
|
|
98
|
-
};
|
|
99
|
-
vi.mock('../relay.js', () => ({
|
|
100
|
-
AgentRelay: vi.fn().mockImplementation(() => mockRelayInstance),
|
|
101
|
-
}));
|
|
102
|
-
// Import after mocking
|
|
103
|
-
const { WorkflowRunner } = await import('../workflows/runner.js');
|
|
104
|
-
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
105
|
-
function makeDb() {
|
|
106
|
-
const runs = new Map();
|
|
107
|
-
const steps = new Map();
|
|
108
|
-
return {
|
|
109
|
-
insertRun: vi.fn(async (run) => {
|
|
110
|
-
runs.set(run.id, { ...run });
|
|
111
|
-
}),
|
|
112
|
-
updateRun: vi.fn(async (id, patch) => {
|
|
113
|
-
const existing = runs.get(id);
|
|
114
|
-
if (existing)
|
|
115
|
-
runs.set(id, { ...existing, ...patch });
|
|
116
|
-
}),
|
|
117
|
-
getRun: vi.fn(async (id) => {
|
|
118
|
-
const run = runs.get(id);
|
|
119
|
-
return run ? { ...run } : null;
|
|
120
|
-
}),
|
|
121
|
-
insertStep: vi.fn(async (step) => {
|
|
122
|
-
steps.set(step.id, { ...step });
|
|
123
|
-
}),
|
|
124
|
-
updateStep: vi.fn(async (id, patch) => {
|
|
125
|
-
const existing = steps.get(id);
|
|
126
|
-
if (existing)
|
|
127
|
-
steps.set(id, { ...existing, ...patch });
|
|
128
|
-
}),
|
|
129
|
-
getStepsByRunId: vi.fn(async (runId) => {
|
|
130
|
-
return [...steps.values()].filter((s) => s.runId === runId);
|
|
131
|
-
}),
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
function makeConfig(overrides = {}) {
|
|
135
|
-
return {
|
|
136
|
-
version: '1',
|
|
137
|
-
name: 'e2e-owner-review-test',
|
|
138
|
-
swarm: { pattern: 'dag' },
|
|
139
|
-
agents: [
|
|
140
|
-
{ name: 'agent-a', cli: 'claude' },
|
|
141
|
-
{ name: 'agent-b', cli: 'claude' },
|
|
142
|
-
],
|
|
143
|
-
workflows: [
|
|
144
|
-
{
|
|
145
|
-
name: 'default',
|
|
146
|
-
steps: [
|
|
147
|
-
{ name: 'step-1', agent: 'agent-a', task: 'Do step 1' },
|
|
148
|
-
{ name: 'step-2', agent: 'agent-b', task: 'Do step 2', dependsOn: ['step-1'] },
|
|
149
|
-
],
|
|
150
|
-
},
|
|
151
|
-
],
|
|
152
|
-
trajectories: false,
|
|
153
|
-
...overrides,
|
|
154
|
-
};
|
|
155
|
-
}
|
|
156
|
-
function never() {
|
|
157
|
-
return new Promise(() => { });
|
|
158
|
-
}
|
|
159
|
-
function makeSupervisedConfig(stepOverrides = {}) {
|
|
160
|
-
return makeConfig({
|
|
161
|
-
agents: [
|
|
162
|
-
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
163
|
-
{ name: 'team-lead', cli: 'claude', role: 'Lead coordinator for the workflow' },
|
|
164
|
-
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
165
|
-
],
|
|
166
|
-
workflows: [
|
|
167
|
-
{
|
|
168
|
-
name: 'default',
|
|
169
|
-
steps: [
|
|
170
|
-
{ name: 'step-1', agent: 'specialist', task: 'Implement the requested change', ...stepOverrides },
|
|
171
|
-
],
|
|
172
|
-
},
|
|
173
|
-
],
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
// ── E2E Scenarios ───────────────────────────────────────────────────────────
|
|
177
|
-
describe('PR #511 E2E: Auto Step Owner + Review Gating', () => {
|
|
178
|
-
let db;
|
|
179
|
-
let runner;
|
|
180
|
-
beforeEach(() => {
|
|
181
|
-
vi.clearAllMocks();
|
|
182
|
-
waitForExitFn = vi.fn().mockResolvedValue('exited');
|
|
183
|
-
waitForIdleFn = vi.fn().mockImplementation(() => never());
|
|
184
|
-
mockSpawnOutputs = [];
|
|
185
|
-
mockAgent.release.mockResolvedValue(undefined);
|
|
186
|
-
mockRelayInstance.spawnPty.mockImplementation(defaultSpawnPtyImplementation);
|
|
187
|
-
mockRelayInstance.onWorkerOutput = null;
|
|
188
|
-
db = makeDb();
|
|
189
|
-
runner = new WorkflowRunner({ db, workspaceId: 'ws-test' });
|
|
190
|
-
});
|
|
191
|
-
// ── Scenario 1: Hub-role agent auto-assigned as owner ───────────────────
|
|
192
|
-
describe('Scenario 1: Hub-role auto-ownership', () => {
|
|
193
|
-
it('should auto-assign lead agent as owner for specialist steps', async () => {
|
|
194
|
-
const ownerAssignments = [];
|
|
195
|
-
runner.on((event) => {
|
|
196
|
-
if (event.type === 'step:owner-assigned') {
|
|
197
|
-
ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName });
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
const config = makeConfig({
|
|
201
|
-
agents: [
|
|
202
|
-
{ name: 'impl-worker', cli: 'claude', role: 'implementer' },
|
|
203
|
-
{ name: 'team-lead', cli: 'claude', role: 'Lead coordinator for the workflow' },
|
|
204
|
-
{ name: 'quality-reviewer', cli: 'claude', role: 'reviewer' },
|
|
205
|
-
],
|
|
206
|
-
workflows: [
|
|
207
|
-
{
|
|
208
|
-
name: 'default',
|
|
209
|
-
steps: [{ name: 'hub-owner-test', agent: 'impl-worker', task: 'List 3 benefits' }],
|
|
210
|
-
},
|
|
211
|
-
],
|
|
212
|
-
});
|
|
213
|
-
const run = await runner.execute(config, 'default');
|
|
214
|
-
expect(run.status).toBe('completed');
|
|
215
|
-
expect(ownerAssignments).toHaveLength(1);
|
|
216
|
-
expect(ownerAssignments[0].owner).toBe('team-lead');
|
|
217
|
-
expect(ownerAssignments[0].specialist).toBe('impl-worker');
|
|
218
|
-
}, 15000);
|
|
219
|
-
it('should prioritize lead over coordinator in owner resolution', async () => {
|
|
220
|
-
const ownerAssignments = [];
|
|
221
|
-
runner.on((event) => {
|
|
222
|
-
if (event.type === 'step:owner-assigned')
|
|
223
|
-
ownerAssignments.push(event.ownerName);
|
|
224
|
-
});
|
|
225
|
-
const config = makeConfig({
|
|
226
|
-
agents: [
|
|
227
|
-
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
228
|
-
{ name: 'coord-bot', cli: 'claude', role: 'coordinator' },
|
|
229
|
-
{ name: 'lead-bot', cli: 'claude', role: 'lead' },
|
|
230
|
-
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
231
|
-
],
|
|
232
|
-
workflows: [
|
|
233
|
-
{
|
|
234
|
-
name: 'default',
|
|
235
|
-
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do work' }],
|
|
236
|
-
},
|
|
237
|
-
],
|
|
238
|
-
});
|
|
239
|
-
const run = await runner.execute(config, 'default');
|
|
240
|
-
expect(run.status).toBe('completed');
|
|
241
|
-
expect(ownerAssignments[0]).toBe('lead-bot');
|
|
242
|
-
}, 15000);
|
|
243
|
-
it('should spawn a separate worker and supervisor for dedicated owner steps', async () => {
|
|
244
|
-
mockSpawnOutputs = [
|
|
245
|
-
'worker finished\n',
|
|
246
|
-
'Observed progress on channel\nSTEP_COMPLETE:step-1\n',
|
|
247
|
-
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n',
|
|
248
|
-
];
|
|
249
|
-
const run = await runner.execute(makeSupervisedConfig(), 'default');
|
|
250
|
-
expect(run.status).toBe('completed');
|
|
251
|
-
const spawnCalls = mockRelayInstance.spawnPty.mock.calls;
|
|
252
|
-
expect(spawnCalls[0][0].name).toContain('step-1-worker');
|
|
253
|
-
expect(spawnCalls[1][0].name).toContain('step-1-owner');
|
|
254
|
-
expect(spawnCalls[0][0].task).not.toContain('STEP_COMPLETE:step-1');
|
|
255
|
-
expect(spawnCalls[1][0].task).toContain('You are the step owner/supervisor for step "step-1".');
|
|
256
|
-
}, 15000);
|
|
257
|
-
});
|
|
258
|
-
// ── Scenario 2: github-integration NOT matched as hub ───────────────────
|
|
259
|
-
describe('Scenario 2: Hub word-boundary matching', () => {
|
|
260
|
-
it('should NOT match "github-integration" as hub-role agent', async () => {
|
|
261
|
-
const ownerAssignments = [];
|
|
262
|
-
runner.on((event) => {
|
|
263
|
-
if (event.type === 'step:owner-assigned') {
|
|
264
|
-
ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName });
|
|
265
|
-
}
|
|
266
|
-
});
|
|
267
|
-
const config = makeConfig({
|
|
268
|
-
agents: [
|
|
269
|
-
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
270
|
-
{ name: 'github-integration', cli: 'claude', role: 'GitHub integration agent' },
|
|
271
|
-
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
272
|
-
],
|
|
273
|
-
workflows: [
|
|
274
|
-
{
|
|
275
|
-
name: 'default',
|
|
276
|
-
steps: [{ name: 'github-no-hub', agent: 'specialist', task: 'Test word boundary' }],
|
|
277
|
-
},
|
|
278
|
-
],
|
|
279
|
-
});
|
|
280
|
-
const run = await runner.execute(config, 'default');
|
|
281
|
-
expect(run.status).toBe('completed');
|
|
282
|
-
expect(ownerAssignments[0].owner).not.toBe('github-integration');
|
|
283
|
-
expect(ownerAssignments[0].owner).toBe('specialist');
|
|
284
|
-
}, 15000);
|
|
285
|
-
it('should NOT match "github-bot" with role "github integration" as hub', async () => {
|
|
286
|
-
const ownerAssignments = [];
|
|
287
|
-
runner.on((event) => {
|
|
288
|
-
if (event.type === 'step:owner-assigned') {
|
|
289
|
-
ownerAssignments.push({ owner: event.ownerName, specialist: event.specialistName });
|
|
290
|
-
}
|
|
291
|
-
});
|
|
292
|
-
const config = makeConfig({
|
|
293
|
-
agents: [
|
|
294
|
-
{ name: 'specialist', cli: 'claude', role: 'engineer' },
|
|
295
|
-
{ name: 'github-bot', cli: 'claude', role: 'github integration' },
|
|
296
|
-
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
297
|
-
],
|
|
298
|
-
workflows: [
|
|
299
|
-
{
|
|
300
|
-
name: 'default',
|
|
301
|
-
steps: [{ name: 'step-1', agent: 'specialist', task: 'Do work' }],
|
|
302
|
-
},
|
|
303
|
-
],
|
|
304
|
-
});
|
|
305
|
-
const run = await runner.execute(config, 'default');
|
|
306
|
-
expect(run.status).toBe('completed');
|
|
307
|
-
expect(ownerAssignments[0].owner).not.toBe('github-bot');
|
|
308
|
-
expect(ownerAssignments[0].owner).toBe('specialist');
|
|
309
|
-
}, 15000);
|
|
310
|
-
});
|
|
311
|
-
// ── Scenario 3: Review gating — approval flow ──────────────────────────
|
|
312
|
-
describe('Scenario 3: Review gating approval', () => {
|
|
313
|
-
it('should emit step:review-completed with approved decision', async () => {
|
|
314
|
-
const reviewEvents = [];
|
|
315
|
-
runner.on((event) => {
|
|
316
|
-
if (event.type === 'step:review-completed') {
|
|
317
|
-
reviewEvents.push({ decision: event.decision, reviewerName: event.reviewerName });
|
|
318
|
-
}
|
|
319
|
-
});
|
|
320
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
321
|
-
expect(run.status).toBe('completed');
|
|
322
|
-
expect(reviewEvents.length).toBeGreaterThanOrEqual(1);
|
|
323
|
-
expect(reviewEvents[0].decision).toBe('approved');
|
|
324
|
-
}, 15000);
|
|
325
|
-
it('should gate step completion on review approval', async () => {
|
|
326
|
-
const stepEvents = [];
|
|
327
|
-
runner.on((event) => {
|
|
328
|
-
if (event.type === 'step:completed' || event.type === 'step:review-completed') {
|
|
329
|
-
stepEvents.push(event.type);
|
|
330
|
-
}
|
|
331
|
-
});
|
|
332
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
333
|
-
expect(run.status).toBe('completed');
|
|
334
|
-
const reviewIdx = stepEvents.indexOf('step:review-completed');
|
|
335
|
-
const completedIdx = stepEvents.indexOf('step:completed');
|
|
336
|
-
expect(reviewIdx).toBeLessThan(completedIdx);
|
|
337
|
-
}, 15000);
|
|
338
|
-
it('should complete review from streamed REVIEW_DECISION before normal exit', async () => {
|
|
339
|
-
mockRelayInstance.spawnPty.mockImplementation(async ({ name, task, }) => {
|
|
340
|
-
const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
341
|
-
const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
|
|
342
|
-
const output = isReview
|
|
343
|
-
? 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: streamed completion\n'
|
|
344
|
-
: stepComplete
|
|
345
|
-
? `STEP_COMPLETE:${stepComplete}\n`
|
|
346
|
-
: 'STEP_COMPLETE:unknown\n';
|
|
347
|
-
queueMicrotask(() => {
|
|
348
|
-
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
349
|
-
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
350
|
-
}
|
|
351
|
-
});
|
|
352
|
-
if (!isReview) {
|
|
353
|
-
return { ...mockAgent, name };
|
|
354
|
-
}
|
|
355
|
-
let released = false;
|
|
356
|
-
let resolveExit;
|
|
357
|
-
const waitForExit = vi.fn().mockImplementation(() => {
|
|
358
|
-
if (released) {
|
|
359
|
-
return Promise.resolve('released');
|
|
360
|
-
}
|
|
361
|
-
return new Promise((resolve) => {
|
|
362
|
-
resolveExit = resolve;
|
|
363
|
-
});
|
|
364
|
-
});
|
|
365
|
-
const release = vi.fn().mockImplementation(async () => {
|
|
366
|
-
released = true;
|
|
367
|
-
resolveExit?.('released');
|
|
368
|
-
});
|
|
369
|
-
return {
|
|
370
|
-
name,
|
|
371
|
-
waitForExit,
|
|
372
|
-
waitForIdle: vi.fn().mockImplementation(() => never()),
|
|
373
|
-
release,
|
|
374
|
-
};
|
|
375
|
-
});
|
|
376
|
-
const run = await runner.execute(makeConfig({
|
|
377
|
-
workflows: [
|
|
378
|
-
{
|
|
379
|
-
name: 'default',
|
|
380
|
-
steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1' }],
|
|
381
|
-
},
|
|
382
|
-
],
|
|
383
|
-
}), 'default');
|
|
384
|
-
expect(run.status).toBe('completed');
|
|
385
|
-
const reviewAgent = await mockRelayInstance.spawnPty.mock.results[1].value;
|
|
386
|
-
expect(reviewAgent.name).toContain('step-1-review');
|
|
387
|
-
expect(reviewAgent.release).toHaveBeenCalledTimes(1);
|
|
388
|
-
}, 15000);
|
|
389
|
-
it('should mirror worker output to the channel for owner observation', async () => {
|
|
390
|
-
mockSpawnOutputs = [
|
|
391
|
-
'worker progress update\n',
|
|
392
|
-
'STEP_COMPLETE:step-1\n',
|
|
393
|
-
'REVIEW_DECISION: APPROVE\nREVIEW_REASON: looks good\n',
|
|
394
|
-
];
|
|
395
|
-
const run = await runner.execute(makeSupervisedConfig({ verification: { type: 'output_contains', value: 'worker progress update' } }), 'default');
|
|
396
|
-
expect(run.status).toBe('completed');
|
|
397
|
-
const channelMessages = mockRelaycastAgent.send.mock.calls.map(([, text]) => text);
|
|
398
|
-
expect(channelMessages.some((text) => text.includes('worker progress update'))).toBe(true);
|
|
399
|
-
expect(channelMessages.some((text) => text.includes('Verification gate observed'))).toBe(true);
|
|
400
|
-
}, 15000);
|
|
401
|
-
});
|
|
402
|
-
// ── Scenario 4: Review gating — rejection flow ─────────────────────────
|
|
403
|
-
describe('Scenario 4: Review gating rejection', () => {
|
|
404
|
-
it('should fail the step when reviewer rejects', async () => {
|
|
405
|
-
const events = [];
|
|
406
|
-
runner.on((event) => {
|
|
407
|
-
if (event.type === 'step:review-completed') {
|
|
408
|
-
events.push({ type: event.type, decision: event.decision });
|
|
409
|
-
}
|
|
410
|
-
});
|
|
411
|
-
mockSpawnOutputs = [
|
|
412
|
-
'STEP_COMPLETE:step-1\n',
|
|
413
|
-
'REVIEW_DECISION: REJECT\nREVIEW_REASON: output is incomplete\n',
|
|
414
|
-
];
|
|
415
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
416
|
-
expect(run.status).toBe('failed');
|
|
417
|
-
expect(run.error).toContain('review rejected');
|
|
418
|
-
expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
|
|
419
|
-
}, 15000);
|
|
420
|
-
it('should fail closed when review output is malformed (no REVIEW_DECISION)', async () => {
|
|
421
|
-
mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', 'REVIEW_REASON: this is missing the decision line\n'];
|
|
422
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
423
|
-
expect(run.status).toBe('failed');
|
|
424
|
-
expect(run.error).toContain('review response malformed');
|
|
425
|
-
}, 15000);
|
|
426
|
-
it('should use last REVIEW_DECISION match when PTY echoes prompt (reject)', async () => {
|
|
427
|
-
const events = [];
|
|
428
|
-
runner.on((event) => {
|
|
429
|
-
if (event.type === 'step:review-completed') {
|
|
430
|
-
events.push({ type: event.type, decision: event.decision });
|
|
431
|
-
}
|
|
432
|
-
});
|
|
433
|
-
const echoedPrompt = 'Return exactly:\nREVIEW_DECISION: APPROVE or REJECT\nREVIEW_REASON: <one sentence>\n';
|
|
434
|
-
const actualResponse = 'REVIEW_DECISION: REJECT\nREVIEW_REASON: code has critical bugs\n';
|
|
435
|
-
mockSpawnOutputs = ['STEP_COMPLETE:step-1\n', echoedPrompt + actualResponse];
|
|
436
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
437
|
-
expect(run.status).toBe('failed');
|
|
438
|
-
expect(events).toContainEqual({ type: 'step:review-completed', decision: 'rejected' });
|
|
439
|
-
}, 15000);
|
|
440
|
-
});
|
|
441
|
-
// ── Scenario 5: Review timeout budgeting ───────────────────────────────
|
|
442
|
-
describe('Scenario 5: Review timeout budgeting', () => {
|
|
443
|
-
it('should use the full remaining step timeout as the review safety backstop', async () => {
|
|
444
|
-
const config = makeConfig({
|
|
445
|
-
workflows: [
|
|
446
|
-
{
|
|
447
|
-
name: 'default',
|
|
448
|
-
steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }],
|
|
449
|
-
},
|
|
450
|
-
],
|
|
451
|
-
});
|
|
452
|
-
mockRelayInstance.spawnPty.mockImplementation(async ({ name, task, }) => {
|
|
453
|
-
const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
454
|
-
const stepComplete = task?.match(/STEP_COMPLETE:([^\n]+)/)?.[1]?.trim();
|
|
455
|
-
const output = isReview ? '' : stepComplete ? `STEP_COMPLETE:${stepComplete}\n` : 'STEP_COMPLETE:unknown\n';
|
|
456
|
-
if (output) {
|
|
457
|
-
queueMicrotask(() => {
|
|
458
|
-
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
459
|
-
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
460
|
-
}
|
|
461
|
-
});
|
|
462
|
-
}
|
|
463
|
-
return {
|
|
464
|
-
name,
|
|
465
|
-
waitForExit: vi.fn().mockResolvedValue(isReview ? 'timeout' : 'exited'),
|
|
466
|
-
waitForIdle: vi.fn().mockImplementation(() => never()),
|
|
467
|
-
release: vi.fn().mockResolvedValue(undefined),
|
|
468
|
-
};
|
|
469
|
-
});
|
|
470
|
-
const run = await runner.execute(config, 'default');
|
|
471
|
-
expect(run.status).toBe('failed');
|
|
472
|
-
expect(run.error).toContain('review safety backstop timed out');
|
|
473
|
-
const reviewAgent = await mockRelayInstance.spawnPty.mock.results[1].value;
|
|
474
|
-
const reviewTimeout = reviewAgent.waitForExit.mock.calls[0][0];
|
|
475
|
-
expect(reviewTimeout).toBeGreaterThan(60_000);
|
|
476
|
-
expect(reviewTimeout).toBeLessThanOrEqual(90_000);
|
|
477
|
-
}, 15000);
|
|
478
|
-
it('should default the review safety backstop to 10 minutes when no step timeout is set', async () => {
|
|
479
|
-
const config = makeConfig({
|
|
480
|
-
workflows: [
|
|
481
|
-
{
|
|
482
|
-
name: 'default',
|
|
483
|
-
steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1' }],
|
|
484
|
-
},
|
|
485
|
-
],
|
|
486
|
-
});
|
|
487
|
-
mockRelayInstance.spawnPty.mockImplementation(async ({ name, task, }) => {
|
|
488
|
-
const isReview = task?.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
489
|
-
const output = isReview ? '' : 'STEP_COMPLETE:step-1\n';
|
|
490
|
-
if (output) {
|
|
491
|
-
queueMicrotask(() => {
|
|
492
|
-
if (typeof mockRelayInstance.onWorkerOutput === 'function') {
|
|
493
|
-
mockRelayInstance.onWorkerOutput({ name, chunk: output });
|
|
494
|
-
}
|
|
495
|
-
});
|
|
496
|
-
}
|
|
497
|
-
return {
|
|
498
|
-
name,
|
|
499
|
-
waitForExit: vi.fn().mockResolvedValue(isReview ? 'timeout' : 'exited'),
|
|
500
|
-
waitForIdle: vi.fn().mockImplementation(() => never()),
|
|
501
|
-
release: vi.fn().mockResolvedValue(undefined),
|
|
502
|
-
};
|
|
503
|
-
});
|
|
504
|
-
const run = await runner.execute(config, 'default');
|
|
505
|
-
expect(run.status).toBe('failed');
|
|
506
|
-
expect(run.error).toContain('review safety backstop timed out after 600000ms');
|
|
507
|
-
const reviewAgent = await mockRelayInstance.spawnPty.mock.results[1].value;
|
|
508
|
-
expect(reviewAgent.waitForExit).toHaveBeenCalledWith(600_000);
|
|
509
|
-
}, 15000);
|
|
510
|
-
});
|
|
511
|
-
// ── Scenario 6: Owner timeout emission ─────────────────────────────────
|
|
512
|
-
describe('Scenario 6: Owner timeout events', () => {
|
|
513
|
-
it('should emit step:owner-timeout when owner exceeds time limit', async () => {
|
|
514
|
-
const events = [];
|
|
515
|
-
runner.on((event) => {
|
|
516
|
-
if (event.type === 'step:owner-timeout') {
|
|
517
|
-
events.push({ type: event.type, stepName: event.stepName, ownerName: event.ownerName });
|
|
518
|
-
}
|
|
519
|
-
});
|
|
520
|
-
waitForExitFn = vi.fn().mockResolvedValue('timeout');
|
|
521
|
-
waitForIdleFn = vi.fn().mockResolvedValue('timeout');
|
|
522
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
523
|
-
expect(run.status).toBe('failed');
|
|
524
|
-
expect(run.error).toContain('timed out');
|
|
525
|
-
expect(events.length).toBeGreaterThanOrEqual(1);
|
|
526
|
-
expect(events[0].type).toBe('step:owner-timeout');
|
|
527
|
-
expect(events[0].stepName).toBe('step-1');
|
|
528
|
-
}, 15000);
|
|
529
|
-
it('should NOT emit step:owner-timeout for successful reviews', async () => {
|
|
530
|
-
const ownerTimeouts = [];
|
|
531
|
-
runner.on((event) => {
|
|
532
|
-
if (event.type === 'step:owner-timeout')
|
|
533
|
-
ownerTimeouts.push(event.stepName);
|
|
534
|
-
});
|
|
535
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
536
|
-
expect(run.status).toBe('completed');
|
|
537
|
-
expect(ownerTimeouts).toHaveLength(0);
|
|
538
|
-
}, 15000);
|
|
539
|
-
});
|
|
540
|
-
// ── Scenario 7: Multi-agent team with owner assignment ─────────────────
|
|
541
|
-
describe('Scenario 7: Lead + workers team pattern', () => {
|
|
542
|
-
it('should assign lead as owner for worker steps in a team', async () => {
|
|
543
|
-
const ownerAssignments = [];
|
|
544
|
-
runner.on((event) => {
|
|
545
|
-
if (event.type === 'step:owner-assigned') {
|
|
546
|
-
ownerAssignments.push({
|
|
547
|
-
owner: event.ownerName,
|
|
548
|
-
specialist: event.specialistName,
|
|
549
|
-
step: event.stepName,
|
|
550
|
-
});
|
|
551
|
-
}
|
|
552
|
-
});
|
|
553
|
-
const config = makeConfig({
|
|
554
|
-
agents: [
|
|
555
|
-
{ name: 'team-lead', cli: 'claude', role: 'Lead coordinator' },
|
|
556
|
-
{ name: 'worker-1', cli: 'claude', role: 'implementer' },
|
|
557
|
-
{ name: 'worker-2', cli: 'claude', role: 'implementer' },
|
|
558
|
-
{ name: 'reviewer-1', cli: 'claude', role: 'reviewer' },
|
|
559
|
-
],
|
|
560
|
-
workflows: [
|
|
561
|
-
{
|
|
562
|
-
name: 'default',
|
|
563
|
-
steps: [
|
|
564
|
-
{ name: 'work-1', agent: 'worker-1', task: 'Do task A' },
|
|
565
|
-
{ name: 'work-2', agent: 'worker-2', task: 'Do task B' },
|
|
566
|
-
{
|
|
567
|
-
name: 'lead-coord',
|
|
568
|
-
agent: 'team-lead',
|
|
569
|
-
task: 'Coordinate workers',
|
|
570
|
-
dependsOn: ['work-1', 'work-2'],
|
|
571
|
-
},
|
|
572
|
-
],
|
|
573
|
-
},
|
|
574
|
-
],
|
|
575
|
-
});
|
|
576
|
-
const run = await runner.execute(config, 'default');
|
|
577
|
-
expect(run.status).toBe('completed');
|
|
578
|
-
expect(ownerAssignments.length).toBeGreaterThanOrEqual(3);
|
|
579
|
-
const worker1Owner = ownerAssignments.find((a) => a.step === 'work-1');
|
|
580
|
-
const worker2Owner = ownerAssignments.find((a) => a.step === 'work-2');
|
|
581
|
-
expect(worker1Owner?.owner).toBe('team-lead');
|
|
582
|
-
expect(worker2Owner?.owner).toBe('team-lead');
|
|
583
|
-
const leadOwner = ownerAssignments.find((a) => a.step === 'lead-coord');
|
|
584
|
-
expect(leadOwner?.owner).toBe('team-lead');
|
|
585
|
-
}, 30000);
|
|
586
|
-
});
|
|
587
|
-
// ── Scenario 8: YAML workflow parsing ──────────────────────────────────
|
|
588
|
-
describe('Scenario 8: E2E workflow YAML validation', () => {
|
|
589
|
-
it('should parse the e2e-owner-review.yaml without errors', () => {
|
|
590
|
-
const yamlPath = resolve(__dirname, '../../../../tests/workflows/e2e-owner-review.yaml');
|
|
591
|
-
const yamlContent = readFileSync(yamlPath, 'utf-8');
|
|
592
|
-
// parseYamlString is an instance method
|
|
593
|
-
const config = runner.parseYamlString(yamlContent);
|
|
594
|
-
expect(config.name).toBe('e2e-owner-review');
|
|
595
|
-
expect(config.agents).toHaveLength(5);
|
|
596
|
-
expect(config.workflows).toHaveLength(1);
|
|
597
|
-
const agentNames = config.agents.map((a) => a.name);
|
|
598
|
-
expect(agentNames).toContain('team-lead');
|
|
599
|
-
expect(agentNames).toContain('github-integration');
|
|
600
|
-
expect(agentNames).toContain('impl-worker');
|
|
601
|
-
expect(agentNames).toContain('quality-reviewer');
|
|
602
|
-
expect(agentNames).toContain('coordinator-bot');
|
|
603
|
-
const steps = config.workflows[0].steps;
|
|
604
|
-
const stepNames = steps.map((s) => s.name);
|
|
605
|
-
expect(stepNames).toContain('hub-owner-test');
|
|
606
|
-
expect(stepNames).toContain('github-no-hub-match');
|
|
607
|
-
expect(stepNames).toContain('review-approval-gate');
|
|
608
|
-
expect(stepNames).toContain('deliberate-bad-output');
|
|
609
|
-
expect(stepNames).toContain('tight-timeout-step');
|
|
610
|
-
expect(stepNames).toContain('team-lead-coord');
|
|
611
|
-
expect(stepNames).toContain('merge-results');
|
|
612
|
-
});
|
|
613
|
-
it('should detect hub-role agents correctly from YAML', () => {
|
|
614
|
-
const yamlPath = resolve(__dirname, '../../../../tests/workflows/e2e-owner-review.yaml');
|
|
615
|
-
const yamlContent = readFileSync(yamlPath, 'utf-8');
|
|
616
|
-
const config = runner.parseYamlString(yamlContent);
|
|
617
|
-
const teamLead = config.agents.find((a) => a.name === 'team-lead');
|
|
618
|
-
expect(teamLead?.role).toMatch(/\blead\b/i);
|
|
619
|
-
const githubAgent = config.agents.find((a) => a.name === 'github-integration');
|
|
620
|
-
expect(githubAgent?.role).not.toMatch(/\bhub\b/i);
|
|
621
|
-
expect(githubAgent?.name).not.toMatch(/\bhub\b/i);
|
|
622
|
-
const coordBot = config.agents.find((a) => a.name === 'coordinator-bot');
|
|
623
|
-
expect(coordBot?.role).toMatch(/\bcoordinator\b/i);
|
|
624
|
-
});
|
|
625
|
-
});
|
|
626
|
-
// ── Scenario 9: Owner completion marker validation ─────────────────────
|
|
627
|
-
describe('Scenario 9: Owner completion marker', () => {
|
|
628
|
-
it('should fail when owner does not provide a marker, decision, or evidence', async () => {
|
|
629
|
-
mockSpawnOutputs = ['The work is done but I forgot the sentinel.\n'];
|
|
630
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
631
|
-
expect(run.status).toBe('failed');
|
|
632
|
-
expect(run.error).toContain('owner completion decision missing');
|
|
633
|
-
}, 15000);
|
|
634
|
-
it('should succeed when owner produces correct STEP_COMPLETE:step-name', async () => {
|
|
635
|
-
const run = await runner.execute(makeConfig(), 'default');
|
|
636
|
-
expect(run.status).toBe('completed');
|
|
637
|
-
}, 15000);
|
|
638
|
-
});
|
|
639
|
-
});
|
|
640
|
-
//# sourceMappingURL=e2e-owner-review.test.js.map
|