@clawswarm/core 0.1.0-alpha → 0.2.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -1
- package/src/__tests__/integration.test.ts +0 -686
- package/src/agent.ts +0 -163
- package/src/chief.ts +0 -264
- package/src/clawswarm.ts +0 -257
- package/src/goal.ts +0 -183
- package/src/index.ts +0 -62
- package/src/swarm.ts +0 -225
- package/src/task.ts +0 -204
- package/src/types.ts +0 -240
- package/tsconfig.json +0 -11
- package/tsconfig.tsbuildinfo +0 -1
|
@@ -1,686 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Integration test — Full ClawSwarm task lifecycle
|
|
3
|
-
*
|
|
4
|
-
* Validates the complete workflow:
|
|
5
|
-
* 1. Swarm creation with ResearchClaw + CodeClaw agents
|
|
6
|
-
* 2. Goal creation and task decomposition
|
|
7
|
-
* 3. Task assignment to agents
|
|
8
|
-
* 4. Chief review scoring (auto-approve ≥8, human review 5-7, auto-reject <5)
|
|
9
|
-
* 5. Full task lifecycle: pending → assigned → in_progress → review → done/rework
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { describe, it, expect, beforeEach } from 'vitest';
|
|
13
|
-
import { ClawSwarm } from '../clawswarm.js';
|
|
14
|
-
import { Agent } from '../agent.js';
|
|
15
|
-
import { TaskManager } from '../task.js';
|
|
16
|
-
import { GoalManager } from '../goal.js';
|
|
17
|
-
import { ChiefReviewer } from '../chief.js';
|
|
18
|
-
import type {
|
|
19
|
-
Task,
|
|
20
|
-
Deliverable,
|
|
21
|
-
AgentConfig,
|
|
22
|
-
ReviewResult,
|
|
23
|
-
TaskStatus,
|
|
24
|
-
GoalStatus,
|
|
25
|
-
AgentType,
|
|
26
|
-
} from '../types.js';
|
|
27
|
-
|
|
28
|
-
// ─── Mock Agents ──────────────────────────────────────────────────────────────
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* A controllable mock agent for testing.
|
|
32
|
-
* You can set `deliverables` and `shouldThrow` before each test.
|
|
33
|
-
*/
|
|
34
|
-
class MockAgent extends Agent {
|
|
35
|
-
public deliverables: Deliverable[] = [
|
|
36
|
-
{
|
|
37
|
-
type: 'text',
|
|
38
|
-
label: 'Mock Output',
|
|
39
|
-
content: 'This is a mock deliverable with enough content to pass review. '.repeat(5),
|
|
40
|
-
},
|
|
41
|
-
];
|
|
42
|
-
public shouldThrow = false;
|
|
43
|
-
public executeCallCount = 0;
|
|
44
|
-
|
|
45
|
-
constructor(config: AgentConfig) {
|
|
46
|
-
super(config);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
async execute(_task: Task): Promise<Deliverable[]> {
|
|
50
|
-
this.executeCallCount++;
|
|
51
|
-
if (this.shouldThrow) throw new Error('MockAgent: simulated execute failure');
|
|
52
|
-
return this.deliverables;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
57
|
-
|
|
58
|
-
function makeResearchAgent(overrides: Partial<AgentConfig> = {}): MockAgent {
|
|
59
|
-
return new MockAgent({
|
|
60
|
-
type: 'research',
|
|
61
|
-
model: 'claude-sonnet-4',
|
|
62
|
-
name: 'ResearchClaw',
|
|
63
|
-
...overrides,
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function makeCodeAgent(overrides: Partial<AgentConfig> = {}): MockAgent {
|
|
68
|
-
return new MockAgent({
|
|
69
|
-
type: 'code',
|
|
70
|
-
model: 'gpt-4o',
|
|
71
|
-
name: 'CodeClaw',
|
|
72
|
-
...overrides,
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// ─── TestableSwarm: inject mock agents ───────────────────────────────────────
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Subclass that replaces internal agents Map with our MockAgents.
|
|
80
|
-
* This avoids needing to mock the constructor while still testing
|
|
81
|
-
* the real orchestration logic.
|
|
82
|
-
*/
|
|
83
|
-
class TestableSwarm extends ClawSwarm {
|
|
84
|
-
constructor(
|
|
85
|
-
config: Parameters<typeof ClawSwarm>[0],
|
|
86
|
-
mockAgents: MockAgent[]
|
|
87
|
-
) {
|
|
88
|
-
super(config);
|
|
89
|
-
// Replace the internal agents Map with our mock agents
|
|
90
|
-
const agentsMap = new Map<AgentType, Agent>();
|
|
91
|
-
for (const agent of mockAgents) {
|
|
92
|
-
agentsMap.set(agent.type, agent);
|
|
93
|
-
}
|
|
94
|
-
(this as unknown as { agents: Map<AgentType, Agent> }).agents = agentsMap;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function buildSwarm(
|
|
99
|
-
researchAgent: MockAgent,
|
|
100
|
-
codeAgent: MockAgent,
|
|
101
|
-
reviewConfig = { autoApproveThreshold: 8, humanReviewThreshold: 5 }
|
|
102
|
-
): TestableSwarm {
|
|
103
|
-
return new TestableSwarm(
|
|
104
|
-
{
|
|
105
|
-
agents: [
|
|
106
|
-
Agent.research({ model: 'claude-sonnet-4' }),
|
|
107
|
-
Agent.code({ model: 'gpt-4o' }),
|
|
108
|
-
],
|
|
109
|
-
chiefReview: reviewConfig,
|
|
110
|
-
},
|
|
111
|
-
[researchAgent, codeAgent]
|
|
112
|
-
);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// ─── Unit Tests: ChiefReviewer scoring thresholds ─────────────────────────────
|
|
116
|
-
|
|
117
|
-
describe('ChiefReviewer — score thresholds', () => {
|
|
118
|
-
let reviewer: ChiefReviewer;
|
|
119
|
-
|
|
120
|
-
beforeEach(() => {
|
|
121
|
-
reviewer = new ChiefReviewer({
|
|
122
|
-
autoApproveThreshold: 8,
|
|
123
|
-
humanReviewThreshold: 5,
|
|
124
|
-
});
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
it('auto-approves scores ≥ 8', () => {
|
|
128
|
-
expect(reviewer.scoreToDecision(8)).toBe('approved');
|
|
129
|
-
expect(reviewer.scoreToDecision(9)).toBe('approved');
|
|
130
|
-
expect(reviewer.scoreToDecision(10)).toBe('approved');
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
it('routes scores 5–7 to human review', () => {
|
|
134
|
-
expect(reviewer.scoreToDecision(5)).toBe('human_review');
|
|
135
|
-
expect(reviewer.scoreToDecision(6)).toBe('human_review');
|
|
136
|
-
expect(reviewer.scoreToDecision(7)).toBe('human_review');
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
it('auto-rejects scores < 5', () => {
|
|
140
|
-
expect(reviewer.scoreToDecision(4)).toBe('rejected');
|
|
141
|
-
expect(reviewer.scoreToDecision(1)).toBe('rejected');
|
|
142
|
-
expect(reviewer.scoreToDecision(0)).toBe('rejected');
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
it('exposes its config', () => {
|
|
146
|
-
expect(reviewer.config.autoApproveThreshold).toBe(8);
|
|
147
|
-
expect(reviewer.config.humanReviewThreshold).toBe(5);
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
it('throws if autoApproveThreshold < humanReviewThreshold', () => {
|
|
151
|
-
expect(
|
|
152
|
-
() => new ChiefReviewer({ autoApproveThreshold: 3, humanReviewThreshold: 7 })
|
|
153
|
-
).toThrow();
|
|
154
|
-
});
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
// ─── Unit Tests: TaskManager lifecycle ────────────────────────────────────────
|
|
158
|
-
|
|
159
|
-
describe('TaskManager — task lifecycle', () => {
|
|
160
|
-
let tm: TaskManager;
|
|
161
|
-
const GOAL_ID = 'goal-test-001';
|
|
162
|
-
|
|
163
|
-
beforeEach(() => {
|
|
164
|
-
tm = new TaskManager();
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
it('creates a task in pending state', () => {
|
|
168
|
-
const task = tm.create({
|
|
169
|
-
goalId: GOAL_ID,
|
|
170
|
-
title: 'Research AI trends',
|
|
171
|
-
description: 'Find top 5 AI trends in 2026',
|
|
172
|
-
dependsOn: [],
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
expect(task.status).toBe('pending');
|
|
176
|
-
expect(task.id).toBeTruthy();
|
|
177
|
-
expect(task.goalId).toBe(GOAL_ID);
|
|
178
|
-
expect(task.deliverables).toHaveLength(0);
|
|
179
|
-
expect(task.reworkCount).toBe(0);
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
it('transitions: pending → assigned → in_progress → review → approved → completed', () => {
|
|
183
|
-
const task = tm.create({
|
|
184
|
-
goalId: GOAL_ID,
|
|
185
|
-
title: 'Build feature',
|
|
186
|
-
description: 'Implement the feature',
|
|
187
|
-
dependsOn: [],
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
const checkStatus = (status: TaskStatus) =>
|
|
191
|
-
expect(tm.get(task.id)!.status).toBe(status);
|
|
192
|
-
|
|
193
|
-
tm.assign(task.id, 'code');
|
|
194
|
-
checkStatus('assigned');
|
|
195
|
-
expect(tm.get(task.id)!.assignedTo).toBe('code');
|
|
196
|
-
|
|
197
|
-
tm.start(task.id);
|
|
198
|
-
checkStatus('in_progress');
|
|
199
|
-
|
|
200
|
-
const deliverables: Deliverable[] = [
|
|
201
|
-
{ type: 'code', label: 'Implementation', content: 'function add(a, b) { return a + b; }' },
|
|
202
|
-
];
|
|
203
|
-
tm.submitForReview(task.id, deliverables);
|
|
204
|
-
checkStatus('review');
|
|
205
|
-
|
|
206
|
-
tm.approve(task.id);
|
|
207
|
-
checkStatus('approved');
|
|
208
|
-
|
|
209
|
-
tm.complete(task.id);
|
|
210
|
-
checkStatus('completed');
|
|
211
|
-
});
|
|
212
|
-
|
|
213
|
-
it('handles rework cycle', () => {
|
|
214
|
-
const task = tm.create({
|
|
215
|
-
goalId: GOAL_ID,
|
|
216
|
-
title: 'Rework task',
|
|
217
|
-
description: 'Will need rework',
|
|
218
|
-
dependsOn: [],
|
|
219
|
-
});
|
|
220
|
-
|
|
221
|
-
tm.assign(task.id, 'research');
|
|
222
|
-
tm.start(task.id);
|
|
223
|
-
tm.submitForReview(task.id, []);
|
|
224
|
-
tm.rework(task.id, 'Output was incomplete.');
|
|
225
|
-
|
|
226
|
-
const updated = tm.get(task.id)!;
|
|
227
|
-
expect(updated.status).toBe('rework');
|
|
228
|
-
expect(updated.reworkCount).toBe(1);
|
|
229
|
-
expect(
|
|
230
|
-
updated.deliverables.some(d => d.label.startsWith('Rework Feedback'))
|
|
231
|
-
).toBe(true);
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
it('throws after max rework cycles exceeded', () => {
|
|
235
|
-
const task = tm.create({
|
|
236
|
-
goalId: GOAL_ID,
|
|
237
|
-
title: 'Exceed rework',
|
|
238
|
-
description: 'Will exceed max rework',
|
|
239
|
-
dependsOn: [],
|
|
240
|
-
});
|
|
241
|
-
|
|
242
|
-
tm.assign(task.id, 'code');
|
|
243
|
-
tm.start(task.id);
|
|
244
|
-
tm.submitForReview(task.id, []);
|
|
245
|
-
|
|
246
|
-
// exhaust all 3 rework cycles
|
|
247
|
-
tm.rework(task.id, 'Round 1');
|
|
248
|
-
tm.submitForReview(task.id, []);
|
|
249
|
-
tm.rework(task.id, 'Round 2');
|
|
250
|
-
tm.submitForReview(task.id, []);
|
|
251
|
-
tm.rework(task.id, 'Round 3');
|
|
252
|
-
tm.submitForReview(task.id, []);
|
|
253
|
-
|
|
254
|
-
expect(() => tm.rework(task.id, 'Round 4')).toThrow(/max rework cycles/i);
|
|
255
|
-
});
|
|
256
|
-
|
|
257
|
-
it('isGoalDone returns false when tasks are pending', () => {
|
|
258
|
-
tm.create({ goalId: GOAL_ID, title: 'T1', description: 'd', dependsOn: [] });
|
|
259
|
-
expect(tm.isGoalDone(GOAL_ID)).toBe(false);
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
it('isGoalDone returns true when all tasks are completed', () => {
|
|
263
|
-
const t1 = tm.create({ goalId: GOAL_ID, title: 'T1', description: 'd', dependsOn: [] });
|
|
264
|
-
tm.assign(t1.id, 'code');
|
|
265
|
-
tm.start(t1.id);
|
|
266
|
-
tm.submitForReview(t1.id, [{ type: 'text', label: 'x', content: 'x' }]);
|
|
267
|
-
tm.approve(t1.id);
|
|
268
|
-
tm.complete(t1.id);
|
|
269
|
-
|
|
270
|
-
expect(tm.isGoalDone(GOAL_ID)).toBe(true);
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
it('getReady only returns tasks whose dependencies are completed', () => {
|
|
274
|
-
const t1 = tm.create({
|
|
275
|
-
goalId: GOAL_ID,
|
|
276
|
-
title: 'T1',
|
|
277
|
-
description: 'd',
|
|
278
|
-
dependsOn: [],
|
|
279
|
-
});
|
|
280
|
-
const t2 = tm.create({
|
|
281
|
-
goalId: GOAL_ID,
|
|
282
|
-
title: 'T2',
|
|
283
|
-
description: 'd',
|
|
284
|
-
dependsOn: [t1.id],
|
|
285
|
-
});
|
|
286
|
-
|
|
287
|
-
// Initially only T1 is ready
|
|
288
|
-
let ready = tm.getReady(GOAL_ID);
|
|
289
|
-
expect(ready.map(t => t.id)).toContain(t1.id);
|
|
290
|
-
expect(ready.map(t => t.id)).not.toContain(t2.id);
|
|
291
|
-
|
|
292
|
-
// Complete T1, now T2 should be ready
|
|
293
|
-
tm.assign(t1.id, 'code');
|
|
294
|
-
tm.start(t1.id);
|
|
295
|
-
tm.submitForReview(t1.id, [{ type: 'text', label: 'x', content: 'x' }]);
|
|
296
|
-
tm.approve(t1.id);
|
|
297
|
-
tm.complete(t1.id);
|
|
298
|
-
|
|
299
|
-
ready = tm.getReady(GOAL_ID);
|
|
300
|
-
expect(ready.map(t => t.id)).toContain(t2.id);
|
|
301
|
-
});
|
|
302
|
-
});
|
|
303
|
-
|
|
304
|
-
// ─── Unit Tests: GoalManager ──────────────────────────────────────────────────
|
|
305
|
-
|
|
306
|
-
describe('GoalManager — goal lifecycle', () => {
|
|
307
|
-
let gm: GoalManager;
|
|
308
|
-
|
|
309
|
-
beforeEach(() => {
|
|
310
|
-
gm = new GoalManager();
|
|
311
|
-
});
|
|
312
|
-
|
|
313
|
-
it('creates a goal in created status', () => {
|
|
314
|
-
const goal = gm.create({ title: 'Test Goal', description: 'Do the thing' });
|
|
315
|
-
|
|
316
|
-
expect(goal.id).toBeTruthy();
|
|
317
|
-
expect(goal.status).toBe('created');
|
|
318
|
-
expect(goal.tasks).toHaveLength(0);
|
|
319
|
-
expect(goal.deliverables).toHaveLength(0);
|
|
320
|
-
expect(goal.cost.totalTokens).toBe(0);
|
|
321
|
-
});
|
|
322
|
-
|
|
323
|
-
it('transitions through goal statuses', () => {
|
|
324
|
-
const goal = gm.create({ title: 'Goal', description: 'Description' });
|
|
325
|
-
const statuses: GoalStatus[] = ['planning', 'in_progress', 'completed'];
|
|
326
|
-
|
|
327
|
-
for (const status of statuses) {
|
|
328
|
-
const updated = gm.setStatus(goal.id, status);
|
|
329
|
-
expect(updated.status).toBe(status);
|
|
330
|
-
}
|
|
331
|
-
});
|
|
332
|
-
|
|
333
|
-
it('sets completedAt when goal is completed', () => {
|
|
334
|
-
const goal = gm.create({ title: 'G', description: 'D' });
|
|
335
|
-
const completed = gm.setStatus(goal.id, 'completed');
|
|
336
|
-
expect(completed.completedAt).toBeTruthy();
|
|
337
|
-
});
|
|
338
|
-
|
|
339
|
-
it('throws for unknown goal ID', () => {
|
|
340
|
-
expect(() => gm.setStatus('nonexistent-id', 'planning')).toThrow(/not found/i);
|
|
341
|
-
});
|
|
342
|
-
});
|
|
343
|
-
|
|
344
|
-
// ─── Unit Tests: Agent factory methods ───────────────────────────────────────
|
|
345
|
-
|
|
346
|
-
describe('Agent — factory methods', () => {
|
|
347
|
-
it('creates ResearchClaw config', () => {
|
|
348
|
-
const config = Agent.research({ model: 'claude-sonnet-4' });
|
|
349
|
-
expect(config.type).toBe('research');
|
|
350
|
-
expect(config.name).toBe('ResearchClaw');
|
|
351
|
-
expect(config.tools).toContain('web_search');
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
it('creates CodeClaw config', () => {
|
|
355
|
-
const config = Agent.code({ model: 'gpt-4o' });
|
|
356
|
-
expect(config.type).toBe('code');
|
|
357
|
-
expect(config.name).toBe('CodeClaw');
|
|
358
|
-
expect(config.tools).toContain('write_file');
|
|
359
|
-
});
|
|
360
|
-
|
|
361
|
-
it('creates OpsClaw config', () => {
|
|
362
|
-
const config = Agent.ops({ model: 'gemini-pro' });
|
|
363
|
-
expect(config.type).toBe('ops');
|
|
364
|
-
expect(config.name).toBe('OpsClaw');
|
|
365
|
-
});
|
|
366
|
-
|
|
367
|
-
it('base Agent.execute() throws (must be overridden)', async () => {
|
|
368
|
-
const agent = new Agent({ type: 'custom', model: 'gpt-4o' });
|
|
369
|
-
await expect(agent.execute({} as Task)).rejects.toThrow(/must be implemented/i);
|
|
370
|
-
});
|
|
371
|
-
|
|
372
|
-
it('agent starts in idle status', () => {
|
|
373
|
-
const agent = makeResearchAgent();
|
|
374
|
-
expect(agent.status).toBe('idle');
|
|
375
|
-
expect(agent.type).toBe('research');
|
|
376
|
-
expect(agent.name).toBe('ResearchClaw');
|
|
377
|
-
});
|
|
378
|
-
});
|
|
379
|
-
|
|
380
|
-
// ─── Integration: Full swarm with mocked agents ───────────────────────────────
|
|
381
|
-
|
|
382
|
-
describe('ClawSwarm — full task lifecycle integration', () => {
|
|
383
|
-
it('creates a swarm with two agents accessible via listAgents()', () => {
|
|
384
|
-
const research = makeResearchAgent();
|
|
385
|
-
const code = makeCodeAgent();
|
|
386
|
-
const swarm = buildSwarm(research, code);
|
|
387
|
-
|
|
388
|
-
const agents = swarm.listAgents();
|
|
389
|
-
expect(agents).toHaveLength(2);
|
|
390
|
-
expect(agents.map(a => a.type)).toContain('research');
|
|
391
|
-
expect(agents.map(a => a.type)).toContain('code');
|
|
392
|
-
});
|
|
393
|
-
|
|
394
|
-
it('getAgent returns the correct mock agent by type', () => {
|
|
395
|
-
const research = makeResearchAgent();
|
|
396
|
-
const code = makeCodeAgent();
|
|
397
|
-
const swarm = buildSwarm(research, code);
|
|
398
|
-
|
|
399
|
-
expect(swarm.getAgent('research')).toBe(research);
|
|
400
|
-
expect(swarm.getAgent('code')).toBe(code);
|
|
401
|
-
expect(swarm.getAgent('ops')).toBeUndefined();
|
|
402
|
-
});
|
|
403
|
-
|
|
404
|
-
it('emits goal:created when a goal is created', () => {
|
|
405
|
-
const swarm = buildSwarm(makeResearchAgent(), makeCodeAgent());
|
|
406
|
-
const emitted: string[] = [];
|
|
407
|
-
swarm.on('goal:created', () => emitted.push('goal:created'));
|
|
408
|
-
|
|
409
|
-
swarm.createGoal({ title: 'Test', description: 'Test goal' });
|
|
410
|
-
expect(emitted).toContain('goal:created');
|
|
411
|
-
});
|
|
412
|
-
|
|
413
|
-
it('executes goal: created → planning → in_progress → completed', async () => {
|
|
414
|
-
const research = makeResearchAgent();
|
|
415
|
-
const code = makeCodeAgent();
|
|
416
|
-
const swarm = buildSwarm(research, code);
|
|
417
|
-
|
|
418
|
-
const goalStatuses: string[] = [];
|
|
419
|
-
swarm.on('goal:created', g => goalStatuses.push(g.status));
|
|
420
|
-
swarm.on('goal:planning', g => goalStatuses.push(g.status));
|
|
421
|
-
swarm.on('goal:completed', g => goalStatuses.push(g.status));
|
|
422
|
-
|
|
423
|
-
const goal = swarm.createGoal({
|
|
424
|
-
title: 'Build a hello world API',
|
|
425
|
-
description: 'Create a simple REST API with GET /hello returning { message: "world" }',
|
|
426
|
-
});
|
|
427
|
-
|
|
428
|
-
const result = await swarm.execute(goal);
|
|
429
|
-
|
|
430
|
-
expect(result.goal.status).toBe('completed');
|
|
431
|
-
expect(result.durationMs).toBeGreaterThanOrEqual(0);
|
|
432
|
-
expect(goalStatuses).toContain('created');
|
|
433
|
-
expect(goalStatuses).toContain('completed');
|
|
434
|
-
}, 15_000);
|
|
435
|
-
|
|
436
|
-
it('emits task lifecycle events in correct order', async () => {
|
|
437
|
-
const research = makeResearchAgent();
|
|
438
|
-
const code = makeCodeAgent();
|
|
439
|
-
const swarm = buildSwarm(research, code);
|
|
440
|
-
|
|
441
|
-
const events: string[] = [];
|
|
442
|
-
swarm.on('task:assigned', () => events.push('task:assigned'));
|
|
443
|
-
swarm.on('task:started', () => events.push('task:started'));
|
|
444
|
-
swarm.on('task:review', () => events.push('task:review'));
|
|
445
|
-
swarm.on('task:completed', () => events.push('task:completed'));
|
|
446
|
-
|
|
447
|
-
const goal = swarm.createGoal({
|
|
448
|
-
title: 'Research project',
|
|
449
|
-
description: 'Analyze competitive landscape in AI tooling',
|
|
450
|
-
});
|
|
451
|
-
|
|
452
|
-
await swarm.execute(goal);
|
|
453
|
-
|
|
454
|
-
expect(events).toContain('task:assigned');
|
|
455
|
-
expect(events).toContain('task:started');
|
|
456
|
-
expect(events).toContain('task:review');
|
|
457
|
-
expect(events).toContain('task:completed');
|
|
458
|
-
|
|
459
|
-
// assigned always before started
|
|
460
|
-
const assignIdx = events.indexOf('task:assigned');
|
|
461
|
-
const startIdx = events.indexOf('task:started');
|
|
462
|
-
expect(assignIdx).toBeLessThan(startIdx);
|
|
463
|
-
}, 15_000);
|
|
464
|
-
|
|
465
|
-
it('routes to human review when stub scores 5–7', async () => {
|
|
466
|
-
const research = makeResearchAgent();
|
|
467
|
-
const code = makeCodeAgent();
|
|
468
|
-
// Default stub gives score 7 for non-empty deliverables → human_review
|
|
469
|
-
const swarm = buildSwarm(research, code);
|
|
470
|
-
|
|
471
|
-
const humanReviewEvents: string[] = [];
|
|
472
|
-
swarm.on('human:review_required', () => humanReviewEvents.push('human:review_required'));
|
|
473
|
-
|
|
474
|
-
const goal = swarm.createGoal({
|
|
475
|
-
title: 'Medium quality research',
|
|
476
|
-
description: 'Gather some info',
|
|
477
|
-
});
|
|
478
|
-
|
|
479
|
-
await swarm.execute(goal);
|
|
480
|
-
|
|
481
|
-
// The stub scorer gives score=7 for content of ~300 chars → human_review
|
|
482
|
-
expect(humanReviewEvents.length).toBeGreaterThan(0);
|
|
483
|
-
}, 15_000);
|
|
484
|
-
|
|
485
|
-
it('auto-approves when deliverables are high quality (score ≥8)', async () => {
|
|
486
|
-
const research = makeResearchAgent();
|
|
487
|
-
const code = makeCodeAgent();
|
|
488
|
-
|
|
489
|
-
// Override deliverables to produce rich code content → stub gives score 8
|
|
490
|
-
const richCode =
|
|
491
|
-
'function fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n'.repeat(
|
|
492
|
-
15
|
|
493
|
-
);
|
|
494
|
-
research.deliverables = [{ type: 'code', label: 'Code', content: richCode }];
|
|
495
|
-
code.deliverables = [{ type: 'code', label: 'Code', content: richCode }];
|
|
496
|
-
|
|
497
|
-
const swarm = buildSwarm(research, code);
|
|
498
|
-
const approvedReviews: ReviewResult[] = [];
|
|
499
|
-
const humanReviews: ReviewResult[] = [];
|
|
500
|
-
|
|
501
|
-
swarm.on('task:review', (_task, review) => {
|
|
502
|
-
if (review.decision === 'approved') approvedReviews.push(review);
|
|
503
|
-
});
|
|
504
|
-
swarm.on('human:review_required', (_task, review) => {
|
|
505
|
-
humanReviews.push(review);
|
|
506
|
-
});
|
|
507
|
-
|
|
508
|
-
const goal = swarm.createGoal({
|
|
509
|
-
title: 'Implement Fibonacci',
|
|
510
|
-
description: 'Write a recursive fibonacci function with tests',
|
|
511
|
-
});
|
|
512
|
-
|
|
513
|
-
await swarm.execute(goal);
|
|
514
|
-
|
|
515
|
-
// At least some tasks should auto-approve (score ≥8 from rich code content)
|
|
516
|
-
expect(approvedReviews.length + humanReviews.length).toBeGreaterThan(0);
|
|
517
|
-
}, 15_000);
|
|
518
|
-
|
|
519
|
-
it('collects deliverables from all completed tasks', async () => {
|
|
520
|
-
const research = makeResearchAgent();
|
|
521
|
-
const code = makeCodeAgent();
|
|
522
|
-
research.deliverables = [
|
|
523
|
-
{ type: 'text', label: 'Research Report', content: 'Extensive research findings. '.repeat(10) },
|
|
524
|
-
];
|
|
525
|
-
code.deliverables = [
|
|
526
|
-
{ type: 'code', label: 'Implementation', content: 'const hello = () => "world";\n'.repeat(20) },
|
|
527
|
-
];
|
|
528
|
-
|
|
529
|
-
const swarm = buildSwarm(research, code);
|
|
530
|
-
const goal = swarm.createGoal({
|
|
531
|
-
title: 'Full stack feature',
|
|
532
|
-
description: 'Research and implement a new feature',
|
|
533
|
-
});
|
|
534
|
-
|
|
535
|
-
const result = await swarm.execute(goal);
|
|
536
|
-
|
|
537
|
-
expect(result.deliverables.length).toBeGreaterThan(0);
|
|
538
|
-
expect(result.goal.status).toBe('completed');
|
|
539
|
-
}, 15_000);
|
|
540
|
-
});
|
|
541
|
-
|
|
542
|
-
// ─── Integration: Manual task lifecycle without full swarm ────────────────────
|
|
543
|
-
|
|
544
|
-
describe('Manual task lifecycle — step-by-step inbox → done', () => {
|
|
545
|
-
it('walks a task through the full lifecycle', async () => {
|
|
546
|
-
const tm = new TaskManager();
|
|
547
|
-
const reviewer = new ChiefReviewer({
|
|
548
|
-
autoApproveThreshold: 8,
|
|
549
|
-
humanReviewThreshold: 5,
|
|
550
|
-
});
|
|
551
|
-
|
|
552
|
-
const GOAL_ID = 'manual-goal-001';
|
|
553
|
-
|
|
554
|
-
// 1. Create task (inbox / pending)
|
|
555
|
-
const task = tm.create({
|
|
556
|
-
goalId: GOAL_ID,
|
|
557
|
-
title: 'Write unit tests for auth module',
|
|
558
|
-
description: 'Add comprehensive tests for login, logout, and token refresh flows',
|
|
559
|
-
dependsOn: [],
|
|
560
|
-
});
|
|
561
|
-
expect(task.status).toBe('pending'); // inbox
|
|
562
|
-
|
|
563
|
-
// 2. Assign to CodeClaw
|
|
564
|
-
tm.assign(task.id, 'code');
|
|
565
|
-
expect(tm.get(task.id)!.status).toBe('assigned');
|
|
566
|
-
expect(tm.get(task.id)!.assignedTo).toBe('code');
|
|
567
|
-
|
|
568
|
-
// 3. Start work → in_progress
|
|
569
|
-
tm.start(task.id);
|
|
570
|
-
expect(tm.get(task.id)!.status).toBe('in_progress');
|
|
571
|
-
|
|
572
|
-
// 4. Submit deliverables for chief review
|
|
573
|
-
const testCode = `
|
|
574
|
-
describe('Auth', () => {
|
|
575
|
-
it('logs in successfully', async () => {
|
|
576
|
-
const result = await auth.login('user', 'pass');
|
|
577
|
-
expect(result.token).toBeTruthy();
|
|
578
|
-
});
|
|
579
|
-
it('rejects invalid credentials', async () => {
|
|
580
|
-
await expect(auth.login('user', 'wrong')).rejects.toThrow();
|
|
581
|
-
});
|
|
582
|
-
it('refreshes tokens', async () => {
|
|
583
|
-
const refreshed = await auth.refresh(validToken);
|
|
584
|
-
expect(refreshed.token).not.toBe(validToken);
|
|
585
|
-
});
|
|
586
|
-
});
|
|
587
|
-
`.repeat(3);
|
|
588
|
-
|
|
589
|
-
const deliverables: Deliverable[] = [
|
|
590
|
-
{ type: 'code', label: 'Test Suite', content: testCode },
|
|
591
|
-
];
|
|
592
|
-
tm.submitForReview(task.id, deliverables);
|
|
593
|
-
expect(tm.get(task.id)!.status).toBe('review'); // chief_review
|
|
594
|
-
|
|
595
|
-
// 5. Run chief review
|
|
596
|
-
const reviewTask = tm.get(task.id)!;
|
|
597
|
-
const review = await reviewer.review(reviewTask);
|
|
598
|
-
|
|
599
|
-
expect(review.taskId).toBe(task.id);
|
|
600
|
-
expect(review.score).toBeGreaterThanOrEqual(0);
|
|
601
|
-
expect(review.score).toBeLessThanOrEqual(10);
|
|
602
|
-
expect(['approved', 'human_review', 'rejected']).toContain(review.decision);
|
|
603
|
-
expect(review.reviewedAt).toBeTruthy();
|
|
604
|
-
|
|
605
|
-
// 6. Act on review decision → done / rework
|
|
606
|
-
if (review.decision === 'approved') {
|
|
607
|
-
tm.approve(task.id);
|
|
608
|
-
tm.complete(task.id);
|
|
609
|
-
expect(tm.get(task.id)!.status).toBe('completed'); // done ✅
|
|
610
|
-
} else if (review.decision === 'human_review') {
|
|
611
|
-
// Simulate human approving
|
|
612
|
-
tm.approve(task.id);
|
|
613
|
-
tm.complete(task.id);
|
|
614
|
-
expect(tm.get(task.id)!.status).toBe('completed'); // done ✅
|
|
615
|
-
} else {
|
|
616
|
-
// rejected → rework
|
|
617
|
-
tm.rework(task.id, review.feedback);
|
|
618
|
-
expect(tm.get(task.id)!.status).toBe('rework'); // rework 🔄
|
|
619
|
-
expect(tm.get(task.id)!.reworkCount).toBe(1);
|
|
620
|
-
}
|
|
621
|
-
});
|
|
622
|
-
|
|
623
|
-
it('demonstrates auto-reject path with empty deliverables', async () => {
|
|
624
|
-
const tm = new TaskManager();
|
|
625
|
-
const reviewer = new ChiefReviewer({
|
|
626
|
-
autoApproveThreshold: 8,
|
|
627
|
-
humanReviewThreshold: 5,
|
|
628
|
-
});
|
|
629
|
-
|
|
630
|
-
const task = tm.create({
|
|
631
|
-
goalId: 'rework-goal',
|
|
632
|
-
title: 'Minimal task',
|
|
633
|
-
description: 'Empty output to trigger rejection',
|
|
634
|
-
dependsOn: [],
|
|
635
|
-
});
|
|
636
|
-
|
|
637
|
-
tm.assign(task.id, 'code');
|
|
638
|
-
tm.start(task.id);
|
|
639
|
-
// Submit empty deliverables → score 0 → rejected
|
|
640
|
-
tm.submitForReview(task.id, []);
|
|
641
|
-
|
|
642
|
-
const review = await reviewer.review(tm.get(task.id)!);
|
|
643
|
-
expect(review.score).toBe(0);
|
|
644
|
-
expect(review.decision).toBe('rejected');
|
|
645
|
-
// chief.ts _buildResult for empty deliverables puts message in suggestions
|
|
646
|
-
expect(review.suggestions.length + review.issues.length).toBeGreaterThan(0);
|
|
647
|
-
|
|
648
|
-
// Rework cycle
|
|
649
|
-
tm.rework(task.id, review.feedback);
|
|
650
|
-
const reworked = tm.get(task.id)!;
|
|
651
|
-
expect(reworked.status).toBe('rework');
|
|
652
|
-
expect(reworked.reworkCount).toBe(1);
|
|
653
|
-
});
|
|
654
|
-
|
|
655
|
-
it('demonstrates the human_review path (score 5–7)', () => {
|
|
656
|
-
const reviewer = new ChiefReviewer({
|
|
657
|
-
autoApproveThreshold: 8,
|
|
658
|
-
humanReviewThreshold: 5,
|
|
659
|
-
});
|
|
660
|
-
|
|
661
|
-
// Scores 5–7 go to human review
|
|
662
|
-
for (const score of [5, 6, 7]) {
|
|
663
|
-
expect(reviewer.scoreToDecision(score)).toBe('human_review');
|
|
664
|
-
}
|
|
665
|
-
});
|
|
666
|
-
|
|
667
|
-
it('demonstrates task failure when agent throws', () => {
|
|
668
|
-
const tm = new TaskManager();
|
|
669
|
-
const task = tm.create({
|
|
670
|
-
goalId: 'fail-goal',
|
|
671
|
-
title: 'Failing task',
|
|
672
|
-
description: 'Will fail',
|
|
673
|
-
dependsOn: [],
|
|
674
|
-
});
|
|
675
|
-
|
|
676
|
-
tm.assign(task.id, 'code');
|
|
677
|
-
tm.start(task.id);
|
|
678
|
-
tm.fail(task.id, new Error('Agent crashed'));
|
|
679
|
-
|
|
680
|
-
const failed = tm.get(task.id)!;
|
|
681
|
-
expect(failed.status).toBe('failed');
|
|
682
|
-
expect(
|
|
683
|
-
failed.deliverables.some(d => d.label === 'Error' && d.content === 'Agent crashed')
|
|
684
|
-
).toBe(true);
|
|
685
|
-
});
|
|
686
|
-
});
|