opencode-swarm-plugin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/.local_version +1 -0
- package/.beads/README.md +81 -0
- package/.beads/config.yaml +62 -0
- package/.beads/issues.jsonl +549 -0
- package/.beads/metadata.json +4 -0
- package/.gitattributes +3 -0
- package/Dockerfile +30 -0
- package/README.md +312 -0
- package/bun.lock +212 -0
- package/dist/index.js +14627 -0
- package/dist/plugin.js +14562 -0
- package/docker/agent-mail/Dockerfile +23 -0
- package/docker/agent-mail/__pycache__/server.cpython-314.pyc +0 -0
- package/docker/agent-mail/requirements.txt +3 -0
- package/docker/agent-mail/server.py +879 -0
- package/docker-compose.yml +45 -0
- package/package.json +52 -0
- package/scripts/docker-entrypoint.sh +54 -0
- package/src/agent-mail.integration.test.ts +1321 -0
- package/src/agent-mail.ts +665 -0
- package/src/anti-patterns.ts +430 -0
- package/src/beads.integration.test.ts +688 -0
- package/src/beads.ts +603 -0
- package/src/index.ts +267 -0
- package/src/learning.integration.test.ts +1104 -0
- package/src/learning.ts +438 -0
- package/src/pattern-maturity.ts +487 -0
- package/src/plugin.ts +11 -0
- package/src/schemas/bead.ts +152 -0
- package/src/schemas/evaluation.ts +133 -0
- package/src/schemas/index.test.ts +199 -0
- package/src/schemas/index.ts +77 -0
- package/src/schemas/task.ts +129 -0
- package/src/structured.ts +708 -0
- package/src/swarm.integration.test.ts +763 -0
- package/src/swarm.ts +1411 -0
- package/tsconfig.json +28 -0
- package/vitest.integration.config.ts +13 -0
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Swarm Integration Tests
|
|
3
|
+
*
|
|
4
|
+
* These tests require:
|
|
5
|
+
* - beads CLI installed and configured
|
|
6
|
+
* - Agent Mail server running at AGENT_MAIL_URL (default: http://agent-mail:8765 in Docker)
|
|
7
|
+
*
|
|
8
|
+
* Run with: pnpm test:integration (or docker:test for full Docker environment)
|
|
9
|
+
*/
|
|
10
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
11
|
+
import {
|
|
12
|
+
swarm_decompose,
|
|
13
|
+
swarm_validate_decomposition,
|
|
14
|
+
swarm_status,
|
|
15
|
+
swarm_progress,
|
|
16
|
+
swarm_complete,
|
|
17
|
+
swarm_subtask_prompt,
|
|
18
|
+
swarm_evaluation_prompt,
|
|
19
|
+
} from "./swarm";
|
|
20
|
+
import { mcpCall, setState, clearState, AGENT_MAIL_URL } from "./agent-mail";
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// Test Configuration
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
const TEST_SESSION_ID = `test-swarm-${Date.now()}`;
|
|
27
|
+
const TEST_PROJECT_PATH = `/tmp/test-swarm-${Date.now()}`;
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Mock tool context for execute functions.
|
|
31
|
+
* The real context is provided by OpenCode runtime.
|
|
32
|
+
*/
|
|
33
|
+
const mockContext = {
|
|
34
|
+
sessionID: TEST_SESSION_ID,
|
|
35
|
+
messageID: `test-message-${Date.now()}`,
|
|
36
|
+
agent: "test-agent",
|
|
37
|
+
abort: new AbortController().signal,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Check if Agent Mail is available
|
|
42
|
+
*/
|
|
43
|
+
async function isAgentMailAvailable(): Promise<boolean> {
|
|
44
|
+
try {
|
|
45
|
+
const url = process.env.AGENT_MAIL_URL || AGENT_MAIL_URL;
|
|
46
|
+
const response = await fetch(`${url}/health/liveness`);
|
|
47
|
+
return response.ok;
|
|
48
|
+
} catch {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Check if beads CLI is available
|
|
55
|
+
*/
|
|
56
|
+
async function isBeadsAvailable(): Promise<boolean> {
|
|
57
|
+
try {
|
|
58
|
+
const result = await Bun.$`bd --version`.quiet().nothrow();
|
|
59
|
+
return result.exitCode === 0;
|
|
60
|
+
} catch {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// Prompt Generation Tests (No external dependencies)
|
|
67
|
+
// ============================================================================
|
|
68
|
+
|
|
69
|
+
describe("swarm_decompose", () => {
|
|
70
|
+
it("generates valid decomposition prompt", async () => {
|
|
71
|
+
const result = await swarm_decompose.execute(
|
|
72
|
+
{
|
|
73
|
+
task: "Add user authentication with OAuth",
|
|
74
|
+
max_subtasks: 3,
|
|
75
|
+
},
|
|
76
|
+
mockContext,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const parsed = JSON.parse(result);
|
|
80
|
+
|
|
81
|
+
expect(parsed).toHaveProperty("prompt");
|
|
82
|
+
expect(parsed).toHaveProperty("expected_schema", "BeadTree");
|
|
83
|
+
expect(parsed).toHaveProperty("schema_hint");
|
|
84
|
+
expect(parsed.prompt).toContain("Add user authentication with OAuth");
|
|
85
|
+
expect(parsed.prompt).toContain("2-3 independent subtasks");
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("includes context in prompt when provided", async () => {
|
|
89
|
+
const result = await swarm_decompose.execute(
|
|
90
|
+
{
|
|
91
|
+
task: "Refactor the API routes",
|
|
92
|
+
max_subtasks: 5,
|
|
93
|
+
context: "Using Next.js App Router with RSC",
|
|
94
|
+
},
|
|
95
|
+
mockContext,
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
const parsed = JSON.parse(result);
|
|
99
|
+
|
|
100
|
+
expect(parsed.prompt).toContain("Using Next.js App Router with RSC");
|
|
101
|
+
expect(parsed.prompt).toContain("Additional Context");
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("uses default max_subtasks when not provided", async () => {
|
|
105
|
+
const result = await swarm_decompose.execute(
|
|
106
|
+
{
|
|
107
|
+
task: "Simple task",
|
|
108
|
+
max_subtasks: 5, // Explicit default since schema requires it
|
|
109
|
+
},
|
|
110
|
+
mockContext,
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
const parsed = JSON.parse(result);
|
|
114
|
+
|
|
115
|
+
// Default is 5
|
|
116
|
+
expect(parsed.prompt).toContain("2-5 independent subtasks");
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
describe("swarm_validate_decomposition", () => {
|
|
121
|
+
it("validates correct BeadTree", async () => {
|
|
122
|
+
const validBeadTree = JSON.stringify({
|
|
123
|
+
epic: {
|
|
124
|
+
title: "Add OAuth",
|
|
125
|
+
description: "Implement OAuth authentication",
|
|
126
|
+
},
|
|
127
|
+
subtasks: [
|
|
128
|
+
{
|
|
129
|
+
title: "Add OAuth provider config",
|
|
130
|
+
description: "Set up Google OAuth",
|
|
131
|
+
files: ["src/auth/google.ts", "src/auth/config.ts"],
|
|
132
|
+
dependencies: [],
|
|
133
|
+
estimated_complexity: 2,
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
title: "Add login UI",
|
|
137
|
+
description: "Create login button component",
|
|
138
|
+
files: ["src/components/LoginButton.tsx"],
|
|
139
|
+
dependencies: [0],
|
|
140
|
+
estimated_complexity: 1,
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const result = await swarm_validate_decomposition.execute(
|
|
146
|
+
{ response: validBeadTree },
|
|
147
|
+
mockContext,
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const parsed = JSON.parse(result);
|
|
151
|
+
|
|
152
|
+
expect(parsed.valid).toBe(true);
|
|
153
|
+
expect(parsed.bead_tree).toBeDefined();
|
|
154
|
+
expect(parsed.stats).toEqual({
|
|
155
|
+
subtask_count: 2,
|
|
156
|
+
total_files: 3,
|
|
157
|
+
total_complexity: 3,
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("rejects file conflicts", async () => {
|
|
162
|
+
const conflictingBeadTree = JSON.stringify({
|
|
163
|
+
epic: {
|
|
164
|
+
title: "Conflicting files",
|
|
165
|
+
},
|
|
166
|
+
subtasks: [
|
|
167
|
+
{
|
|
168
|
+
title: "Task A",
|
|
169
|
+
files: ["src/shared.ts"],
|
|
170
|
+
dependencies: [],
|
|
171
|
+
estimated_complexity: 1,
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
title: "Task B",
|
|
175
|
+
files: ["src/shared.ts"], // Conflict!
|
|
176
|
+
dependencies: [],
|
|
177
|
+
estimated_complexity: 1,
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
const result = await swarm_validate_decomposition.execute(
|
|
183
|
+
{ response: conflictingBeadTree },
|
|
184
|
+
mockContext,
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
const parsed = JSON.parse(result);
|
|
188
|
+
|
|
189
|
+
expect(parsed.valid).toBe(false);
|
|
190
|
+
expect(parsed.error).toContain("File conflicts detected");
|
|
191
|
+
expect(parsed.error).toContain("src/shared.ts");
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it("rejects invalid dependencies (forward reference)", async () => {
|
|
195
|
+
const invalidDeps = JSON.stringify({
|
|
196
|
+
epic: {
|
|
197
|
+
title: "Invalid deps",
|
|
198
|
+
},
|
|
199
|
+
subtasks: [
|
|
200
|
+
{
|
|
201
|
+
title: "Task A",
|
|
202
|
+
files: ["src/a.ts"],
|
|
203
|
+
dependencies: [1], // Invalid: depends on later task
|
|
204
|
+
estimated_complexity: 1,
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
title: "Task B",
|
|
208
|
+
files: ["src/b.ts"],
|
|
209
|
+
dependencies: [],
|
|
210
|
+
estimated_complexity: 1,
|
|
211
|
+
},
|
|
212
|
+
],
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const result = await swarm_validate_decomposition.execute(
|
|
216
|
+
{ response: invalidDeps },
|
|
217
|
+
mockContext,
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
const parsed = JSON.parse(result);
|
|
221
|
+
|
|
222
|
+
expect(parsed.valid).toBe(false);
|
|
223
|
+
expect(parsed.error).toContain("Invalid dependency");
|
|
224
|
+
expect(parsed.hint).toContain("Reorder subtasks");
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
it("rejects invalid JSON", async () => {
|
|
228
|
+
const result = await swarm_validate_decomposition.execute(
|
|
229
|
+
{ response: "not valid json {" },
|
|
230
|
+
mockContext,
|
|
231
|
+
);
|
|
232
|
+
|
|
233
|
+
const parsed = JSON.parse(result);
|
|
234
|
+
|
|
235
|
+
expect(parsed.valid).toBe(false);
|
|
236
|
+
expect(parsed.error).toContain("Invalid JSON");
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
it("rejects missing required fields", async () => {
|
|
240
|
+
const missingFields = JSON.stringify({
|
|
241
|
+
epic: { title: "Missing subtasks" },
|
|
242
|
+
// No subtasks array
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
const result = await swarm_validate_decomposition.execute(
|
|
246
|
+
{ response: missingFields },
|
|
247
|
+
mockContext,
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
const parsed = JSON.parse(result);
|
|
251
|
+
|
|
252
|
+
expect(parsed.valid).toBe(false);
|
|
253
|
+
expect(parsed.error).toContain("Schema validation failed");
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
describe("swarm_subtask_prompt", () => {
|
|
258
|
+
it("generates complete subtask prompt", async () => {
|
|
259
|
+
const result = await swarm_subtask_prompt.execute(
|
|
260
|
+
{
|
|
261
|
+
agent_name: "BlueLake",
|
|
262
|
+
bead_id: "bd-abc123.1",
|
|
263
|
+
epic_id: "bd-abc123",
|
|
264
|
+
subtask_title: "Add OAuth provider",
|
|
265
|
+
subtask_description: "Configure Google OAuth in the auth config",
|
|
266
|
+
files: ["src/auth/google.ts", "src/auth/config.ts"],
|
|
267
|
+
shared_context: "We are using NextAuth.js v5",
|
|
268
|
+
},
|
|
269
|
+
mockContext,
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
// Result is the prompt string directly
|
|
273
|
+
expect(result).toContain("BlueLake");
|
|
274
|
+
expect(result).toContain("bd-abc123.1");
|
|
275
|
+
expect(result).toContain("bd-abc123");
|
|
276
|
+
expect(result).toContain("Add OAuth provider");
|
|
277
|
+
expect(result).toContain("Configure Google OAuth");
|
|
278
|
+
expect(result).toContain("src/auth/google.ts");
|
|
279
|
+
expect(result).toContain("NextAuth.js v5");
|
|
280
|
+
expect(result).toContain("swarm:progress");
|
|
281
|
+
expect(result).toContain("swarm:complete");
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it("handles missing optional fields", async () => {
|
|
285
|
+
const result = await swarm_subtask_prompt.execute(
|
|
286
|
+
{
|
|
287
|
+
agent_name: "RedStone",
|
|
288
|
+
bead_id: "bd-xyz789.2",
|
|
289
|
+
epic_id: "bd-xyz789",
|
|
290
|
+
subtask_title: "Simple task",
|
|
291
|
+
files: [],
|
|
292
|
+
},
|
|
293
|
+
mockContext,
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
expect(result).toContain("RedStone");
|
|
297
|
+
expect(result).toContain("bd-xyz789.2");
|
|
298
|
+
expect(result).toContain("Simple task");
|
|
299
|
+
expect(result).toContain("(none)"); // For missing description/context
|
|
300
|
+
expect(result).toContain("(no files assigned)"); // Empty files
|
|
301
|
+
});
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
describe("swarm_evaluation_prompt", () => {
|
|
305
|
+
it("generates evaluation prompt with schema hint", async () => {
|
|
306
|
+
const result = await swarm_evaluation_prompt.execute(
|
|
307
|
+
{
|
|
308
|
+
bead_id: "bd-abc123.1",
|
|
309
|
+
subtask_title: "Add OAuth provider",
|
|
310
|
+
files_touched: ["src/auth/google.ts", "src/auth/config.ts"],
|
|
311
|
+
},
|
|
312
|
+
mockContext,
|
|
313
|
+
);
|
|
314
|
+
|
|
315
|
+
const parsed = JSON.parse(result);
|
|
316
|
+
|
|
317
|
+
expect(parsed).toHaveProperty("prompt");
|
|
318
|
+
expect(parsed).toHaveProperty("expected_schema", "Evaluation");
|
|
319
|
+
expect(parsed).toHaveProperty("schema_hint");
|
|
320
|
+
|
|
321
|
+
expect(parsed.prompt).toContain("bd-abc123.1");
|
|
322
|
+
expect(parsed.prompt).toContain("Add OAuth provider");
|
|
323
|
+
expect(parsed.prompt).toContain("src/auth/google.ts");
|
|
324
|
+
expect(parsed.prompt).toContain("type_safe");
|
|
325
|
+
expect(parsed.prompt).toContain("no_bugs");
|
|
326
|
+
expect(parsed.prompt).toContain("patterns");
|
|
327
|
+
expect(parsed.prompt).toContain("readable");
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
it("handles empty files list", async () => {
|
|
331
|
+
const result = await swarm_evaluation_prompt.execute(
|
|
332
|
+
{
|
|
333
|
+
bead_id: "bd-xyz789.1",
|
|
334
|
+
subtask_title: "Documentation only",
|
|
335
|
+
files_touched: [],
|
|
336
|
+
},
|
|
337
|
+
mockContext,
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
const parsed = JSON.parse(result);
|
|
341
|
+
|
|
342
|
+
expect(parsed.prompt).toContain("(no files recorded)");
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
// ============================================================================
|
|
347
|
+
// Integration Tests (Require Agent Mail + beads)
|
|
348
|
+
// ============================================================================
|
|
349
|
+
|
|
350
|
+
describe("swarm_status (integration)", () => {
|
|
351
|
+
let beadsAvailable = false;
|
|
352
|
+
|
|
353
|
+
beforeAll(async () => {
|
|
354
|
+
beadsAvailable = await isBeadsAvailable();
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
it.skipIf(!beadsAvailable)(
|
|
358
|
+
"returns status for non-existent epic",
|
|
359
|
+
async () => {
|
|
360
|
+
// This should fail gracefully - no epic exists
|
|
361
|
+
try {
|
|
362
|
+
await swarm_status.execute(
|
|
363
|
+
{
|
|
364
|
+
epic_id: "bd-nonexistent",
|
|
365
|
+
project_key: TEST_PROJECT_PATH,
|
|
366
|
+
},
|
|
367
|
+
mockContext,
|
|
368
|
+
);
|
|
369
|
+
// If it doesn't throw, that's fine too - it might return empty status
|
|
370
|
+
} catch (error) {
|
|
371
|
+
expect(error).toBeInstanceOf(Error);
|
|
372
|
+
// SwarmError should have operation property
|
|
373
|
+
if (error instanceof Error && "operation" in error) {
|
|
374
|
+
expect((error as { operation: string }).operation).toBe(
|
|
375
|
+
"query_subtasks",
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
},
|
|
380
|
+
);
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
describe("swarm_progress (integration)", () => {
|
|
384
|
+
let agentMailAvailable = false;
|
|
385
|
+
|
|
386
|
+
beforeAll(async () => {
|
|
387
|
+
agentMailAvailable = await isAgentMailAvailable();
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
it.skipIf(!agentMailAvailable)("reports progress to Agent Mail", async () => {
|
|
391
|
+
const uniqueProjectKey = `${TEST_PROJECT_PATH}-progress-${Date.now()}`;
|
|
392
|
+
const sessionID = `progress-session-${Date.now()}`;
|
|
393
|
+
|
|
394
|
+
// Initialize Agent Mail state for this session
|
|
395
|
+
try {
|
|
396
|
+
// Ensure project exists
|
|
397
|
+
await mcpCall("ensure_project", { human_key: uniqueProjectKey });
|
|
398
|
+
|
|
399
|
+
// Register agent
|
|
400
|
+
const agent = await mcpCall<{ name: string }>("register_agent", {
|
|
401
|
+
project_key: uniqueProjectKey,
|
|
402
|
+
program: "opencode-test",
|
|
403
|
+
model: "test",
|
|
404
|
+
task_description: "Integration test",
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// Set state for the session
|
|
408
|
+
setState(sessionID, {
|
|
409
|
+
projectKey: uniqueProjectKey,
|
|
410
|
+
agentName: agent.name,
|
|
411
|
+
reservations: [],
|
|
412
|
+
startedAt: new Date().toISOString(),
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
const ctx = {
|
|
416
|
+
...mockContext,
|
|
417
|
+
sessionID,
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
const result = await swarm_progress.execute(
|
|
421
|
+
{
|
|
422
|
+
project_key: uniqueProjectKey,
|
|
423
|
+
agent_name: agent.name,
|
|
424
|
+
bead_id: "bd-test123.1",
|
|
425
|
+
status: "in_progress",
|
|
426
|
+
message: "Working on the feature",
|
|
427
|
+
progress_percent: 50,
|
|
428
|
+
files_touched: ["src/test.ts"],
|
|
429
|
+
},
|
|
430
|
+
ctx,
|
|
431
|
+
);
|
|
432
|
+
|
|
433
|
+
expect(result).toContain("Progress reported");
|
|
434
|
+
expect(result).toContain("in_progress");
|
|
435
|
+
expect(result).toContain("50%");
|
|
436
|
+
} finally {
|
|
437
|
+
clearState(sessionID);
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
});
|
|
441
|
+
|
|
442
|
+
describe("swarm_complete (integration)", () => {
|
|
443
|
+
let agentMailAvailable = false;
|
|
444
|
+
let beadsAvailable = false;
|
|
445
|
+
|
|
446
|
+
beforeAll(async () => {
|
|
447
|
+
agentMailAvailable = await isAgentMailAvailable();
|
|
448
|
+
beadsAvailable = await isBeadsAvailable();
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
it.skipIf(!agentMailAvailable || !beadsAvailable)(
|
|
452
|
+
"completes subtask with passing evaluation",
|
|
453
|
+
async () => {
|
|
454
|
+
const uniqueProjectKey = `${TEST_PROJECT_PATH}-complete-${Date.now()}`;
|
|
455
|
+
const sessionID = `complete-session-${Date.now()}`;
|
|
456
|
+
|
|
457
|
+
try {
|
|
458
|
+
// Set up Agent Mail
|
|
459
|
+
await mcpCall("ensure_project", { human_key: uniqueProjectKey });
|
|
460
|
+
const agent = await mcpCall<{ name: string }>("register_agent", {
|
|
461
|
+
project_key: uniqueProjectKey,
|
|
462
|
+
program: "opencode-test",
|
|
463
|
+
model: "test",
|
|
464
|
+
task_description: "Integration test",
|
|
465
|
+
});
|
|
466
|
+
|
|
467
|
+
setState(sessionID, {
|
|
468
|
+
projectKey: uniqueProjectKey,
|
|
469
|
+
agentName: agent.name,
|
|
470
|
+
reservations: [],
|
|
471
|
+
startedAt: new Date().toISOString(),
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
const ctx = {
|
|
475
|
+
...mockContext,
|
|
476
|
+
sessionID,
|
|
477
|
+
};
|
|
478
|
+
|
|
479
|
+
// Create a test bead first
|
|
480
|
+
const createResult =
|
|
481
|
+
await Bun.$`bd create "Test subtask" -t task --json`
|
|
482
|
+
.quiet()
|
|
483
|
+
.nothrow();
|
|
484
|
+
|
|
485
|
+
if (createResult.exitCode !== 0) {
|
|
486
|
+
console.warn(
|
|
487
|
+
"Could not create test bead:",
|
|
488
|
+
createResult.stderr.toString(),
|
|
489
|
+
);
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const bead = JSON.parse(createResult.stdout.toString());
|
|
494
|
+
|
|
495
|
+
const passingEvaluation = JSON.stringify({
|
|
496
|
+
passed: true,
|
|
497
|
+
criteria: {
|
|
498
|
+
type_safe: { passed: true, feedback: "All types correct" },
|
|
499
|
+
no_bugs: { passed: true, feedback: "No issues found" },
|
|
500
|
+
patterns: { passed: true, feedback: "Follows conventions" },
|
|
501
|
+
readable: { passed: true, feedback: "Clear code" },
|
|
502
|
+
},
|
|
503
|
+
overall_feedback: "Great work!",
|
|
504
|
+
retry_suggestion: null,
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
const result = await swarm_complete.execute(
|
|
508
|
+
{
|
|
509
|
+
project_key: uniqueProjectKey,
|
|
510
|
+
agent_name: agent.name,
|
|
511
|
+
bead_id: bead.id,
|
|
512
|
+
summary: "Completed the test subtask",
|
|
513
|
+
evaluation: passingEvaluation,
|
|
514
|
+
},
|
|
515
|
+
ctx,
|
|
516
|
+
);
|
|
517
|
+
|
|
518
|
+
const parsed = JSON.parse(result);
|
|
519
|
+
|
|
520
|
+
expect(parsed.success).toBe(true);
|
|
521
|
+
expect(parsed.bead_id).toBe(bead.id);
|
|
522
|
+
expect(parsed.closed).toBe(true);
|
|
523
|
+
expect(parsed.reservations_released).toBe(true);
|
|
524
|
+
expect(parsed.message_sent).toBe(true);
|
|
525
|
+
} finally {
|
|
526
|
+
clearState(sessionID);
|
|
527
|
+
}
|
|
528
|
+
},
|
|
529
|
+
);
|
|
530
|
+
|
|
531
|
+
it.skipIf(!agentMailAvailable)(
|
|
532
|
+
"rejects completion with failing evaluation",
|
|
533
|
+
async () => {
|
|
534
|
+
const uniqueProjectKey = `${TEST_PROJECT_PATH}-fail-${Date.now()}`;
|
|
535
|
+
const sessionID = `fail-session-${Date.now()}`;
|
|
536
|
+
|
|
537
|
+
try {
|
|
538
|
+
// Set up Agent Mail
|
|
539
|
+
await mcpCall("ensure_project", { human_key: uniqueProjectKey });
|
|
540
|
+
const agent = await mcpCall<{ name: string }>("register_agent", {
|
|
541
|
+
project_key: uniqueProjectKey,
|
|
542
|
+
program: "opencode-test",
|
|
543
|
+
model: "test",
|
|
544
|
+
task_description: "Integration test",
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
setState(sessionID, {
|
|
548
|
+
projectKey: uniqueProjectKey,
|
|
549
|
+
agentName: agent.name,
|
|
550
|
+
reservations: [],
|
|
551
|
+
startedAt: new Date().toISOString(),
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
const ctx = {
|
|
555
|
+
...mockContext,
|
|
556
|
+
sessionID,
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
const failingEvaluation = JSON.stringify({
|
|
560
|
+
passed: false,
|
|
561
|
+
criteria: {
|
|
562
|
+
type_safe: { passed: false, feedback: "Missing types on line 42" },
|
|
563
|
+
},
|
|
564
|
+
overall_feedback: "Needs work",
|
|
565
|
+
retry_suggestion: "Add explicit types to the handler function",
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
const result = await swarm_complete.execute(
|
|
569
|
+
{
|
|
570
|
+
project_key: uniqueProjectKey,
|
|
571
|
+
agent_name: agent.name,
|
|
572
|
+
bead_id: "bd-test-fail.1",
|
|
573
|
+
summary: "Attempted completion",
|
|
574
|
+
evaluation: failingEvaluation,
|
|
575
|
+
},
|
|
576
|
+
ctx,
|
|
577
|
+
);
|
|
578
|
+
|
|
579
|
+
const parsed = JSON.parse(result);
|
|
580
|
+
|
|
581
|
+
expect(parsed.success).toBe(false);
|
|
582
|
+
expect(parsed.error).toContain("Self-evaluation failed");
|
|
583
|
+
expect(parsed.retry_suggestion).toBe(
|
|
584
|
+
"Add explicit types to the handler function",
|
|
585
|
+
);
|
|
586
|
+
} finally {
|
|
587
|
+
clearState(sessionID);
|
|
588
|
+
}
|
|
589
|
+
},
|
|
590
|
+
);
|
|
591
|
+
});
|
|
592
|
+
|
|
593
|
+
// ============================================================================
|
|
594
|
+
// Full Swarm Flow (End-to-End)
|
|
595
|
+
// ============================================================================
|
|
596
|
+
|
|
597
|
+
describe("full swarm flow (integration)", () => {
|
|
598
|
+
let agentMailAvailable = false;
|
|
599
|
+
let beadsAvailable = false;
|
|
600
|
+
|
|
601
|
+
beforeAll(async () => {
|
|
602
|
+
agentMailAvailable = await isAgentMailAvailable();
|
|
603
|
+
beadsAvailable = await isBeadsAvailable();
|
|
604
|
+
});
|
|
605
|
+
|
|
606
|
+
it.skipIf(!agentMailAvailable || !beadsAvailable)(
|
|
607
|
+
"creates epic, reports progress, completes subtask",
|
|
608
|
+
async () => {
|
|
609
|
+
const uniqueProjectKey = `${TEST_PROJECT_PATH}-flow-${Date.now()}`;
|
|
610
|
+
const sessionID = `flow-session-${Date.now()}`;
|
|
611
|
+
|
|
612
|
+
try {
|
|
613
|
+
// 1. Set up Agent Mail session
|
|
614
|
+
await mcpCall("ensure_project", { human_key: uniqueProjectKey });
|
|
615
|
+
const agent = await mcpCall<{ name: string }>("register_agent", {
|
|
616
|
+
project_key: uniqueProjectKey,
|
|
617
|
+
program: "opencode-test",
|
|
618
|
+
model: "test",
|
|
619
|
+
task_description: "E2E swarm test",
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
setState(sessionID, {
|
|
623
|
+
projectKey: uniqueProjectKey,
|
|
624
|
+
agentName: agent.name,
|
|
625
|
+
reservations: [],
|
|
626
|
+
startedAt: new Date().toISOString(),
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
const ctx = {
|
|
630
|
+
...mockContext,
|
|
631
|
+
sessionID,
|
|
632
|
+
};
|
|
633
|
+
|
|
634
|
+
// 2. Generate decomposition prompt
|
|
635
|
+
const decomposeResult = await swarm_decompose.execute(
|
|
636
|
+
{
|
|
637
|
+
task: "Add unit tests for auth module",
|
|
638
|
+
max_subtasks: 2,
|
|
639
|
+
},
|
|
640
|
+
ctx,
|
|
641
|
+
);
|
|
642
|
+
|
|
643
|
+
const decomposition = JSON.parse(decomposeResult);
|
|
644
|
+
expect(decomposition.prompt).toContain("Add unit tests");
|
|
645
|
+
|
|
646
|
+
// 3. Create an epic with bd CLI
|
|
647
|
+
const epicResult =
|
|
648
|
+
await Bun.$`bd create "Add unit tests for auth module" -t epic --json`
|
|
649
|
+
.quiet()
|
|
650
|
+
.nothrow();
|
|
651
|
+
|
|
652
|
+
if (epicResult.exitCode !== 0) {
|
|
653
|
+
console.warn("Could not create epic:", epicResult.stderr.toString());
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const epic = JSON.parse(epicResult.stdout.toString());
|
|
658
|
+
expect(epic.id).toMatch(/^[a-z0-9-]+-[a-z0-9]+$/);
|
|
659
|
+
|
|
660
|
+
// 4. Create a subtask
|
|
661
|
+
const subtaskResult =
|
|
662
|
+
await Bun.$`bd create "Test login flow" -t task --json`
|
|
663
|
+
.quiet()
|
|
664
|
+
.nothrow();
|
|
665
|
+
|
|
666
|
+
if (subtaskResult.exitCode !== 0) {
|
|
667
|
+
console.warn(
|
|
668
|
+
"Could not create subtask:",
|
|
669
|
+
subtaskResult.stderr.toString(),
|
|
670
|
+
);
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
const subtask = JSON.parse(subtaskResult.stdout.toString());
|
|
675
|
+
|
|
676
|
+
// 5. Generate subtask prompt
|
|
677
|
+
const subtaskPrompt = await swarm_subtask_prompt.execute(
|
|
678
|
+
{
|
|
679
|
+
agent_name: agent.name,
|
|
680
|
+
bead_id: subtask.id,
|
|
681
|
+
epic_id: epic.id,
|
|
682
|
+
subtask_title: "Test login flow",
|
|
683
|
+
files: ["src/auth/__tests__/login.test.ts"],
|
|
684
|
+
},
|
|
685
|
+
ctx,
|
|
686
|
+
);
|
|
687
|
+
|
|
688
|
+
expect(subtaskPrompt).toContain(agent.name);
|
|
689
|
+
expect(subtaskPrompt).toContain(subtask.id);
|
|
690
|
+
|
|
691
|
+
// 6. Report progress
|
|
692
|
+
const progressResult = await swarm_progress.execute(
|
|
693
|
+
{
|
|
694
|
+
project_key: uniqueProjectKey,
|
|
695
|
+
agent_name: agent.name,
|
|
696
|
+
bead_id: subtask.id,
|
|
697
|
+
status: "in_progress",
|
|
698
|
+
progress_percent: 50,
|
|
699
|
+
message: "Writing test cases",
|
|
700
|
+
},
|
|
701
|
+
ctx,
|
|
702
|
+
);
|
|
703
|
+
|
|
704
|
+
expect(progressResult).toContain("Progress reported");
|
|
705
|
+
|
|
706
|
+
// 7. Generate evaluation prompt
|
|
707
|
+
const evalPromptResult = await swarm_evaluation_prompt.execute(
|
|
708
|
+
{
|
|
709
|
+
bead_id: subtask.id,
|
|
710
|
+
subtask_title: "Test login flow",
|
|
711
|
+
files_touched: ["src/auth/__tests__/login.test.ts"],
|
|
712
|
+
},
|
|
713
|
+
ctx,
|
|
714
|
+
);
|
|
715
|
+
|
|
716
|
+
const evalPrompt = JSON.parse(evalPromptResult);
|
|
717
|
+
expect(evalPrompt.expected_schema).toBe("Evaluation");
|
|
718
|
+
|
|
719
|
+
// 8. Complete the subtask
|
|
720
|
+
const completeResult = await swarm_complete.execute(
|
|
721
|
+
{
|
|
722
|
+
project_key: uniqueProjectKey,
|
|
723
|
+
agent_name: agent.name,
|
|
724
|
+
bead_id: subtask.id,
|
|
725
|
+
summary: "Added comprehensive login tests",
|
|
726
|
+
evaluation: JSON.stringify({
|
|
727
|
+
passed: true,
|
|
728
|
+
criteria: {
|
|
729
|
+
type_safe: { passed: true, feedback: "TypeScript compiles" },
|
|
730
|
+
no_bugs: { passed: true, feedback: "Tests pass" },
|
|
731
|
+
patterns: { passed: true, feedback: "Follows test patterns" },
|
|
732
|
+
readable: { passed: true, feedback: "Clear test names" },
|
|
733
|
+
},
|
|
734
|
+
overall_feedback: "Good test coverage",
|
|
735
|
+
retry_suggestion: null,
|
|
736
|
+
}),
|
|
737
|
+
},
|
|
738
|
+
ctx,
|
|
739
|
+
);
|
|
740
|
+
|
|
741
|
+
const completion = JSON.parse(completeResult);
|
|
742
|
+
expect(completion.success).toBe(true);
|
|
743
|
+
expect(completion.closed).toBe(true);
|
|
744
|
+
expect(completion.message_sent).toBe(true);
|
|
745
|
+
|
|
746
|
+
// 9. Check swarm status
|
|
747
|
+
const statusResult = await swarm_status.execute(
|
|
748
|
+
{
|
|
749
|
+
epic_id: epic.id,
|
|
750
|
+
project_key: uniqueProjectKey,
|
|
751
|
+
},
|
|
752
|
+
ctx,
|
|
753
|
+
);
|
|
754
|
+
|
|
755
|
+
const status = JSON.parse(statusResult);
|
|
756
|
+
expect(status.epic_id).toBe(epic.id);
|
|
757
|
+
// Status may show completed subtasks now
|
|
758
|
+
} finally {
|
|
759
|
+
clearState(sessionID);
|
|
760
|
+
}
|
|
761
|
+
},
|
|
762
|
+
);
|
|
763
|
+
});
|