@clawswarm/core 0.1.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,686 @@
1
+ /**
2
+ * Integration test — Full ClawSwarm task lifecycle
3
+ *
4
+ * Validates the complete workflow:
5
+ * 1. Swarm creation with ResearchClaw + CodeClaw agents
6
+ * 2. Goal creation and task decomposition
7
+ * 3. Task assignment to agents
8
+ * 4. Chief review scoring (auto-approve ≥8, human review 5-7, auto-reject <5)
9
+ * 5. Full task lifecycle: pending → assigned → in_progress → review → done/rework
10
+ */
11
+
12
+ import { describe, it, expect, beforeEach } from 'vitest';
13
+ import { ClawSwarm } from '../clawswarm.js';
14
+ import { Agent } from '../agent.js';
15
+ import { TaskManager } from '../task.js';
16
+ import { GoalManager } from '../goal.js';
17
+ import { ChiefReviewer } from '../chief.js';
18
+ import type {
19
+ Task,
20
+ Deliverable,
21
+ AgentConfig,
22
+ ReviewResult,
23
+ TaskStatus,
24
+ GoalStatus,
25
+ AgentType,
26
+ } from '../types.js';
27
+
28
+ // ─── Mock Agents ──────────────────────────────────────────────────────────────
29
+
30
+ /**
31
+ * A controllable mock agent for testing.
32
+ * You can set `deliverables` and `shouldThrow` before each test.
33
+ */
34
+ class MockAgent extends Agent {
35
+ public deliverables: Deliverable[] = [
36
+ {
37
+ type: 'text',
38
+ label: 'Mock Output',
39
+ content: 'This is a mock deliverable with enough content to pass review. '.repeat(5),
40
+ },
41
+ ];
42
+ public shouldThrow = false;
43
+ public executeCallCount = 0;
44
+
45
+ constructor(config: AgentConfig) {
46
+ super(config);
47
+ }
48
+
49
+ async execute(_task: Task): Promise<Deliverable[]> {
50
+ this.executeCallCount++;
51
+ if (this.shouldThrow) throw new Error('MockAgent: simulated execute failure');
52
+ return this.deliverables;
53
+ }
54
+ }
55
+
56
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
57
+
58
+ function makeResearchAgent(overrides: Partial<AgentConfig> = {}): MockAgent {
59
+ return new MockAgent({
60
+ type: 'research',
61
+ model: 'claude-sonnet-4',
62
+ name: 'ResearchClaw',
63
+ ...overrides,
64
+ });
65
+ }
66
+
67
+ function makeCodeAgent(overrides: Partial<AgentConfig> = {}): MockAgent {
68
+ return new MockAgent({
69
+ type: 'code',
70
+ model: 'gpt-4o',
71
+ name: 'CodeClaw',
72
+ ...overrides,
73
+ });
74
+ }
75
+
76
+ // ─── TestableSwarm: inject mock agents ───────────────────────────────────────
77
+
78
+ /**
79
+ * Subclass that replaces internal agents Map with our MockAgents.
80
+ * This avoids needing to mock the constructor while still testing
81
+ * the real orchestration logic.
82
+ */
83
+ class TestableSwarm extends ClawSwarm {
84
+ constructor(
85
+ config: Parameters<typeof ClawSwarm>[0],
86
+ mockAgents: MockAgent[]
87
+ ) {
88
+ super(config);
89
+ // Replace the internal agents Map with our mock agents
90
+ const agentsMap = new Map<AgentType, Agent>();
91
+ for (const agent of mockAgents) {
92
+ agentsMap.set(agent.type, agent);
93
+ }
94
+ (this as unknown as { agents: Map<AgentType, Agent> }).agents = agentsMap;
95
+ }
96
+ }
97
+
98
+ function buildSwarm(
99
+ researchAgent: MockAgent,
100
+ codeAgent: MockAgent,
101
+ reviewConfig = { autoApproveThreshold: 8, humanReviewThreshold: 5 }
102
+ ): TestableSwarm {
103
+ return new TestableSwarm(
104
+ {
105
+ agents: [
106
+ Agent.research({ model: 'claude-sonnet-4' }),
107
+ Agent.code({ model: 'gpt-4o' }),
108
+ ],
109
+ chiefReview: reviewConfig,
110
+ },
111
+ [researchAgent, codeAgent]
112
+ );
113
+ }
114
+
115
+ // ─── Unit Tests: ChiefReviewer scoring thresholds ─────────────────────────────
116
+
117
+ describe('ChiefReviewer — score thresholds', () => {
118
+ let reviewer: ChiefReviewer;
119
+
120
+ beforeEach(() => {
121
+ reviewer = new ChiefReviewer({
122
+ autoApproveThreshold: 8,
123
+ humanReviewThreshold: 5,
124
+ });
125
+ });
126
+
127
+ it('auto-approves scores ≥ 8', () => {
128
+ expect(reviewer.scoreToDecision(8)).toBe('approved');
129
+ expect(reviewer.scoreToDecision(9)).toBe('approved');
130
+ expect(reviewer.scoreToDecision(10)).toBe('approved');
131
+ });
132
+
133
+ it('routes scores 5–7 to human review', () => {
134
+ expect(reviewer.scoreToDecision(5)).toBe('human_review');
135
+ expect(reviewer.scoreToDecision(6)).toBe('human_review');
136
+ expect(reviewer.scoreToDecision(7)).toBe('human_review');
137
+ });
138
+
139
+ it('auto-rejects scores < 5', () => {
140
+ expect(reviewer.scoreToDecision(4)).toBe('rejected');
141
+ expect(reviewer.scoreToDecision(1)).toBe('rejected');
142
+ expect(reviewer.scoreToDecision(0)).toBe('rejected');
143
+ });
144
+
145
+ it('exposes its config', () => {
146
+ expect(reviewer.config.autoApproveThreshold).toBe(8);
147
+ expect(reviewer.config.humanReviewThreshold).toBe(5);
148
+ });
149
+
150
+ it('throws if autoApproveThreshold < humanReviewThreshold', () => {
151
+ expect(
152
+ () => new ChiefReviewer({ autoApproveThreshold: 3, humanReviewThreshold: 7 })
153
+ ).toThrow();
154
+ });
155
+ });
156
+
157
+ // ─── Unit Tests: TaskManager lifecycle ────────────────────────────────────────
158
+
159
+ describe('TaskManager — task lifecycle', () => {
160
+ let tm: TaskManager;
161
+ const GOAL_ID = 'goal-test-001';
162
+
163
+ beforeEach(() => {
164
+ tm = new TaskManager();
165
+ });
166
+
167
+ it('creates a task in pending state', () => {
168
+ const task = tm.create({
169
+ goalId: GOAL_ID,
170
+ title: 'Research AI trends',
171
+ description: 'Find top 5 AI trends in 2026',
172
+ dependsOn: [],
173
+ });
174
+
175
+ expect(task.status).toBe('pending');
176
+ expect(task.id).toBeTruthy();
177
+ expect(task.goalId).toBe(GOAL_ID);
178
+ expect(task.deliverables).toHaveLength(0);
179
+ expect(task.reworkCount).toBe(0);
180
+ });
181
+
182
+ it('transitions: pending → assigned → in_progress → review → approved → completed', () => {
183
+ const task = tm.create({
184
+ goalId: GOAL_ID,
185
+ title: 'Build feature',
186
+ description: 'Implement the feature',
187
+ dependsOn: [],
188
+ });
189
+
190
+ const checkStatus = (status: TaskStatus) =>
191
+ expect(tm.get(task.id)!.status).toBe(status);
192
+
193
+ tm.assign(task.id, 'code');
194
+ checkStatus('assigned');
195
+ expect(tm.get(task.id)!.assignedTo).toBe('code');
196
+
197
+ tm.start(task.id);
198
+ checkStatus('in_progress');
199
+
200
+ const deliverables: Deliverable[] = [
201
+ { type: 'code', label: 'Implementation', content: 'function add(a, b) { return a + b; }' },
202
+ ];
203
+ tm.submitForReview(task.id, deliverables);
204
+ checkStatus('review');
205
+
206
+ tm.approve(task.id);
207
+ checkStatus('approved');
208
+
209
+ tm.complete(task.id);
210
+ checkStatus('completed');
211
+ });
212
+
213
+ it('handles rework cycle', () => {
214
+ const task = tm.create({
215
+ goalId: GOAL_ID,
216
+ title: 'Rework task',
217
+ description: 'Will need rework',
218
+ dependsOn: [],
219
+ });
220
+
221
+ tm.assign(task.id, 'research');
222
+ tm.start(task.id);
223
+ tm.submitForReview(task.id, []);
224
+ tm.rework(task.id, 'Output was incomplete.');
225
+
226
+ const updated = tm.get(task.id)!;
227
+ expect(updated.status).toBe('rework');
228
+ expect(updated.reworkCount).toBe(1);
229
+ expect(
230
+ updated.deliverables.some(d => d.label.startsWith('Rework Feedback'))
231
+ ).toBe(true);
232
+ });
233
+
234
+ it('throws after max rework cycles exceeded', () => {
235
+ const task = tm.create({
236
+ goalId: GOAL_ID,
237
+ title: 'Exceed rework',
238
+ description: 'Will exceed max rework',
239
+ dependsOn: [],
240
+ });
241
+
242
+ tm.assign(task.id, 'code');
243
+ tm.start(task.id);
244
+ tm.submitForReview(task.id, []);
245
+
246
+ // exhaust all 3 rework cycles
247
+ tm.rework(task.id, 'Round 1');
248
+ tm.submitForReview(task.id, []);
249
+ tm.rework(task.id, 'Round 2');
250
+ tm.submitForReview(task.id, []);
251
+ tm.rework(task.id, 'Round 3');
252
+ tm.submitForReview(task.id, []);
253
+
254
+ expect(() => tm.rework(task.id, 'Round 4')).toThrow(/max rework cycles/i);
255
+ });
256
+
257
+ it('isGoalDone returns false when tasks are pending', () => {
258
+ tm.create({ goalId: GOAL_ID, title: 'T1', description: 'd', dependsOn: [] });
259
+ expect(tm.isGoalDone(GOAL_ID)).toBe(false);
260
+ });
261
+
262
+ it('isGoalDone returns true when all tasks are completed', () => {
263
+ const t1 = tm.create({ goalId: GOAL_ID, title: 'T1', description: 'd', dependsOn: [] });
264
+ tm.assign(t1.id, 'code');
265
+ tm.start(t1.id);
266
+ tm.submitForReview(t1.id, [{ type: 'text', label: 'x', content: 'x' }]);
267
+ tm.approve(t1.id);
268
+ tm.complete(t1.id);
269
+
270
+ expect(tm.isGoalDone(GOAL_ID)).toBe(true);
271
+ });
272
+
273
+ it('getReady only returns tasks whose dependencies are completed', () => {
274
+ const t1 = tm.create({
275
+ goalId: GOAL_ID,
276
+ title: 'T1',
277
+ description: 'd',
278
+ dependsOn: [],
279
+ });
280
+ const t2 = tm.create({
281
+ goalId: GOAL_ID,
282
+ title: 'T2',
283
+ description: 'd',
284
+ dependsOn: [t1.id],
285
+ });
286
+
287
+ // Initially only T1 is ready
288
+ let ready = tm.getReady(GOAL_ID);
289
+ expect(ready.map(t => t.id)).toContain(t1.id);
290
+ expect(ready.map(t => t.id)).not.toContain(t2.id);
291
+
292
+ // Complete T1, now T2 should be ready
293
+ tm.assign(t1.id, 'code');
294
+ tm.start(t1.id);
295
+ tm.submitForReview(t1.id, [{ type: 'text', label: 'x', content: 'x' }]);
296
+ tm.approve(t1.id);
297
+ tm.complete(t1.id);
298
+
299
+ ready = tm.getReady(GOAL_ID);
300
+ expect(ready.map(t => t.id)).toContain(t2.id);
301
+ });
302
+ });
303
+
304
+ // ─── Unit Tests: GoalManager ──────────────────────────────────────────────────
305
+
306
+ describe('GoalManager — goal lifecycle', () => {
307
+ let gm: GoalManager;
308
+
309
+ beforeEach(() => {
310
+ gm = new GoalManager();
311
+ });
312
+
313
+ it('creates a goal in created status', () => {
314
+ const goal = gm.create({ title: 'Test Goal', description: 'Do the thing' });
315
+
316
+ expect(goal.id).toBeTruthy();
317
+ expect(goal.status).toBe('created');
318
+ expect(goal.tasks).toHaveLength(0);
319
+ expect(goal.deliverables).toHaveLength(0);
320
+ expect(goal.cost.totalTokens).toBe(0);
321
+ });
322
+
323
+ it('transitions through goal statuses', () => {
324
+ const goal = gm.create({ title: 'Goal', description: 'Description' });
325
+ const statuses: GoalStatus[] = ['planning', 'in_progress', 'completed'];
326
+
327
+ for (const status of statuses) {
328
+ const updated = gm.setStatus(goal.id, status);
329
+ expect(updated.status).toBe(status);
330
+ }
331
+ });
332
+
333
+ it('sets completedAt when goal is completed', () => {
334
+ const goal = gm.create({ title: 'G', description: 'D' });
335
+ const completed = gm.setStatus(goal.id, 'completed');
336
+ expect(completed.completedAt).toBeTruthy();
337
+ });
338
+
339
+ it('throws for unknown goal ID', () => {
340
+ expect(() => gm.setStatus('nonexistent-id', 'planning')).toThrow(/not found/i);
341
+ });
342
+ });
343
+
344
+ // ─── Unit Tests: Agent factory methods ───────────────────────────────────────
345
+
346
+ describe('Agent — factory methods', () => {
347
+ it('creates ResearchClaw config', () => {
348
+ const config = Agent.research({ model: 'claude-sonnet-4' });
349
+ expect(config.type).toBe('research');
350
+ expect(config.name).toBe('ResearchClaw');
351
+ expect(config.tools).toContain('web_search');
352
+ });
353
+
354
+ it('creates CodeClaw config', () => {
355
+ const config = Agent.code({ model: 'gpt-4o' });
356
+ expect(config.type).toBe('code');
357
+ expect(config.name).toBe('CodeClaw');
358
+ expect(config.tools).toContain('write_file');
359
+ });
360
+
361
+ it('creates OpsClaw config', () => {
362
+ const config = Agent.ops({ model: 'gemini-pro' });
363
+ expect(config.type).toBe('ops');
364
+ expect(config.name).toBe('OpsClaw');
365
+ });
366
+
367
+ it('base Agent.execute() throws (must be overridden)', async () => {
368
+ const agent = new Agent({ type: 'custom', model: 'gpt-4o' });
369
+ await expect(agent.execute({} as Task)).rejects.toThrow(/must be implemented/i);
370
+ });
371
+
372
+ it('agent starts in idle status', () => {
373
+ const agent = makeResearchAgent();
374
+ expect(agent.status).toBe('idle');
375
+ expect(agent.type).toBe('research');
376
+ expect(agent.name).toBe('ResearchClaw');
377
+ });
378
+ });
379
+
380
+ // ─── Integration: Full swarm with mocked agents ───────────────────────────────
381
+
382
+ describe('ClawSwarm — full task lifecycle integration', () => {
383
+ it('creates a swarm with two agents accessible via listAgents()', () => {
384
+ const research = makeResearchAgent();
385
+ const code = makeCodeAgent();
386
+ const swarm = buildSwarm(research, code);
387
+
388
+ const agents = swarm.listAgents();
389
+ expect(agents).toHaveLength(2);
390
+ expect(agents.map(a => a.type)).toContain('research');
391
+ expect(agents.map(a => a.type)).toContain('code');
392
+ });
393
+
394
+ it('getAgent returns the correct mock agent by type', () => {
395
+ const research = makeResearchAgent();
396
+ const code = makeCodeAgent();
397
+ const swarm = buildSwarm(research, code);
398
+
399
+ expect(swarm.getAgent('research')).toBe(research);
400
+ expect(swarm.getAgent('code')).toBe(code);
401
+ expect(swarm.getAgent('ops')).toBeUndefined();
402
+ });
403
+
404
+ it('emits goal:created when a goal is created', () => {
405
+ const swarm = buildSwarm(makeResearchAgent(), makeCodeAgent());
406
+ const emitted: string[] = [];
407
+ swarm.on('goal:created', () => emitted.push('goal:created'));
408
+
409
+ swarm.createGoal({ title: 'Test', description: 'Test goal' });
410
+ expect(emitted).toContain('goal:created');
411
+ });
412
+
413
+ it('executes goal: created → planning → in_progress → completed', async () => {
414
+ const research = makeResearchAgent();
415
+ const code = makeCodeAgent();
416
+ const swarm = buildSwarm(research, code);
417
+
418
+ const goalStatuses: string[] = [];
419
+ swarm.on('goal:created', g => goalStatuses.push(g.status));
420
+ swarm.on('goal:planning', g => goalStatuses.push(g.status));
421
+ swarm.on('goal:completed', g => goalStatuses.push(g.status));
422
+
423
+ const goal = swarm.createGoal({
424
+ title: 'Build a hello world API',
425
+ description: 'Create a simple REST API with GET /hello returning { message: "world" }',
426
+ });
427
+
428
+ const result = await swarm.execute(goal);
429
+
430
+ expect(result.goal.status).toBe('completed');
431
+ expect(result.durationMs).toBeGreaterThanOrEqual(0);
432
+ expect(goalStatuses).toContain('created');
433
+ expect(goalStatuses).toContain('completed');
434
+ }, 15_000);
435
+
436
+ it('emits task lifecycle events in correct order', async () => {
437
+ const research = makeResearchAgent();
438
+ const code = makeCodeAgent();
439
+ const swarm = buildSwarm(research, code);
440
+
441
+ const events: string[] = [];
442
+ swarm.on('task:assigned', () => events.push('task:assigned'));
443
+ swarm.on('task:started', () => events.push('task:started'));
444
+ swarm.on('task:review', () => events.push('task:review'));
445
+ swarm.on('task:completed', () => events.push('task:completed'));
446
+
447
+ const goal = swarm.createGoal({
448
+ title: 'Research project',
449
+ description: 'Analyze competitive landscape in AI tooling',
450
+ });
451
+
452
+ await swarm.execute(goal);
453
+
454
+ expect(events).toContain('task:assigned');
455
+ expect(events).toContain('task:started');
456
+ expect(events).toContain('task:review');
457
+ expect(events).toContain('task:completed');
458
+
459
+ // assigned always before started
460
+ const assignIdx = events.indexOf('task:assigned');
461
+ const startIdx = events.indexOf('task:started');
462
+ expect(assignIdx).toBeLessThan(startIdx);
463
+ }, 15_000);
464
+
465
+ it('routes to human review when stub scores 5–7', async () => {
466
+ const research = makeResearchAgent();
467
+ const code = makeCodeAgent();
468
+ // Default stub gives score 7 for non-empty deliverables → human_review
469
+ const swarm = buildSwarm(research, code);
470
+
471
+ const humanReviewEvents: string[] = [];
472
+ swarm.on('human:review_required', () => humanReviewEvents.push('human:review_required'));
473
+
474
+ const goal = swarm.createGoal({
475
+ title: 'Medium quality research',
476
+ description: 'Gather some info',
477
+ });
478
+
479
+ await swarm.execute(goal);
480
+
481
+ // The stub scorer gives score=7 for content of ~300 chars → human_review
482
+ expect(humanReviewEvents.length).toBeGreaterThan(0);
483
+ }, 15_000);
484
+
485
+ it('auto-approves when deliverables are high quality (score ≥8)', async () => {
486
+ const research = makeResearchAgent();
487
+ const code = makeCodeAgent();
488
+
489
+ // Override deliverables to produce rich code content → stub gives score 8
490
+ const richCode =
491
+ 'function fibonacci(n) {\n if (n <= 1) return n;\n return fibonacci(n - 1) + fibonacci(n - 2);\n}\n'.repeat(
492
+ 15
493
+ );
494
+ research.deliverables = [{ type: 'code', label: 'Code', content: richCode }];
495
+ code.deliverables = [{ type: 'code', label: 'Code', content: richCode }];
496
+
497
+ const swarm = buildSwarm(research, code);
498
+ const approvedReviews: ReviewResult[] = [];
499
+ const humanReviews: ReviewResult[] = [];
500
+
501
+ swarm.on('task:review', (_task, review) => {
502
+ if (review.decision === 'approved') approvedReviews.push(review);
503
+ });
504
+ swarm.on('human:review_required', (_task, review) => {
505
+ humanReviews.push(review);
506
+ });
507
+
508
+ const goal = swarm.createGoal({
509
+ title: 'Implement Fibonacci',
510
+ description: 'Write a recursive fibonacci function with tests',
511
+ });
512
+
513
+ await swarm.execute(goal);
514
+
515
+ // At least some tasks should auto-approve (score ≥8 from rich code content)
516
+ expect(approvedReviews.length + humanReviews.length).toBeGreaterThan(0);
517
+ }, 15_000);
518
+
519
+ it('collects deliverables from all completed tasks', async () => {
520
+ const research = makeResearchAgent();
521
+ const code = makeCodeAgent();
522
+ research.deliverables = [
523
+ { type: 'text', label: 'Research Report', content: 'Extensive research findings. '.repeat(10) },
524
+ ];
525
+ code.deliverables = [
526
+ { type: 'code', label: 'Implementation', content: 'const hello = () => "world";\n'.repeat(20) },
527
+ ];
528
+
529
+ const swarm = buildSwarm(research, code);
530
+ const goal = swarm.createGoal({
531
+ title: 'Full stack feature',
532
+ description: 'Research and implement a new feature',
533
+ });
534
+
535
+ const result = await swarm.execute(goal);
536
+
537
+ expect(result.deliverables.length).toBeGreaterThan(0);
538
+ expect(result.goal.status).toBe('completed');
539
+ }, 15_000);
540
+ });
541
+
542
+ // ─── Integration: Manual task lifecycle without full swarm ────────────────────
543
+
544
+ describe('Manual task lifecycle — step-by-step inbox → done', () => {
545
+ it('walks a task through the full lifecycle', async () => {
546
+ const tm = new TaskManager();
547
+ const reviewer = new ChiefReviewer({
548
+ autoApproveThreshold: 8,
549
+ humanReviewThreshold: 5,
550
+ });
551
+
552
+ const GOAL_ID = 'manual-goal-001';
553
+
554
+ // 1. Create task (inbox / pending)
555
+ const task = tm.create({
556
+ goalId: GOAL_ID,
557
+ title: 'Write unit tests for auth module',
558
+ description: 'Add comprehensive tests for login, logout, and token refresh flows',
559
+ dependsOn: [],
560
+ });
561
+ expect(task.status).toBe('pending'); // inbox
562
+
563
+ // 2. Assign to CodeClaw
564
+ tm.assign(task.id, 'code');
565
+ expect(tm.get(task.id)!.status).toBe('assigned');
566
+ expect(tm.get(task.id)!.assignedTo).toBe('code');
567
+
568
+ // 3. Start work → in_progress
569
+ tm.start(task.id);
570
+ expect(tm.get(task.id)!.status).toBe('in_progress');
571
+
572
+ // 4. Submit deliverables for chief review
573
+ const testCode = `
574
+ describe('Auth', () => {
575
+ it('logs in successfully', async () => {
576
+ const result = await auth.login('user', 'pass');
577
+ expect(result.token).toBeTruthy();
578
+ });
579
+ it('rejects invalid credentials', async () => {
580
+ await expect(auth.login('user', 'wrong')).rejects.toThrow();
581
+ });
582
+ it('refreshes tokens', async () => {
583
+ const refreshed = await auth.refresh(validToken);
584
+ expect(refreshed.token).not.toBe(validToken);
585
+ });
586
+ });
587
+ `.repeat(3);
588
+
589
+ const deliverables: Deliverable[] = [
590
+ { type: 'code', label: 'Test Suite', content: testCode },
591
+ ];
592
+ tm.submitForReview(task.id, deliverables);
593
+ expect(tm.get(task.id)!.status).toBe('review'); // chief_review
594
+
595
+ // 5. Run chief review
596
+ const reviewTask = tm.get(task.id)!;
597
+ const review = await reviewer.review(reviewTask);
598
+
599
+ expect(review.taskId).toBe(task.id);
600
+ expect(review.score).toBeGreaterThanOrEqual(0);
601
+ expect(review.score).toBeLessThanOrEqual(10);
602
+ expect(['approved', 'human_review', 'rejected']).toContain(review.decision);
603
+ expect(review.reviewedAt).toBeTruthy();
604
+
605
+ // 6. Act on review decision → done / rework
606
+ if (review.decision === 'approved') {
607
+ tm.approve(task.id);
608
+ tm.complete(task.id);
609
+ expect(tm.get(task.id)!.status).toBe('completed'); // done ✅
610
+ } else if (review.decision === 'human_review') {
611
+ // Simulate human approving
612
+ tm.approve(task.id);
613
+ tm.complete(task.id);
614
+ expect(tm.get(task.id)!.status).toBe('completed'); // done ✅
615
+ } else {
616
+ // rejected → rework
617
+ tm.rework(task.id, review.feedback);
618
+ expect(tm.get(task.id)!.status).toBe('rework'); // rework 🔄
619
+ expect(tm.get(task.id)!.reworkCount).toBe(1);
620
+ }
621
+ });
622
+
623
+ it('demonstrates auto-reject path with empty deliverables', async () => {
624
+ const tm = new TaskManager();
625
+ const reviewer = new ChiefReviewer({
626
+ autoApproveThreshold: 8,
627
+ humanReviewThreshold: 5,
628
+ });
629
+
630
+ const task = tm.create({
631
+ goalId: 'rework-goal',
632
+ title: 'Minimal task',
633
+ description: 'Empty output to trigger rejection',
634
+ dependsOn: [],
635
+ });
636
+
637
+ tm.assign(task.id, 'code');
638
+ tm.start(task.id);
639
+ // Submit empty deliverables → score 0 → rejected
640
+ tm.submitForReview(task.id, []);
641
+
642
+ const review = await reviewer.review(tm.get(task.id)!);
643
+ expect(review.score).toBe(0);
644
+ expect(review.decision).toBe('rejected');
645
+ // chief.ts _buildResult for empty deliverables puts message in suggestions
646
+ expect(review.suggestions.length + review.issues.length).toBeGreaterThan(0);
647
+
648
+ // Rework cycle
649
+ tm.rework(task.id, review.feedback);
650
+ const reworked = tm.get(task.id)!;
651
+ expect(reworked.status).toBe('rework');
652
+ expect(reworked.reworkCount).toBe(1);
653
+ });
654
+
655
+ it('demonstrates the human_review path (score 5–7)', () => {
656
+ const reviewer = new ChiefReviewer({
657
+ autoApproveThreshold: 8,
658
+ humanReviewThreshold: 5,
659
+ });
660
+
661
+ // Scores 5–7 go to human review
662
+ for (const score of [5, 6, 7]) {
663
+ expect(reviewer.scoreToDecision(score)).toBe('human_review');
664
+ }
665
+ });
666
+
667
+ it('demonstrates task failure when agent throws', () => {
668
+ const tm = new TaskManager();
669
+ const task = tm.create({
670
+ goalId: 'fail-goal',
671
+ title: 'Failing task',
672
+ description: 'Will fail',
673
+ dependsOn: [],
674
+ });
675
+
676
+ tm.assign(task.id, 'code');
677
+ tm.start(task.id);
678
+ tm.fail(task.id, new Error('Agent crashed'));
679
+
680
+ const failed = tm.get(task.id)!;
681
+ expect(failed.status).toBe('failed');
682
+ expect(
683
+ failed.deliverables.some(d => d.label === 'Error' && d.content === 'Agent crashed')
684
+ ).toBe(true);
685
+ });
686
+ });