opencode-swarm-plugin 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.hive/issues.jsonl +16 -4
  2. package/.hive/memories.jsonl +274 -1
  3. package/.turbo/turbo-build.log +4 -4
  4. package/.turbo/turbo-test.log +318 -318
  5. package/CHANGELOG.md +113 -0
  6. package/bin/swarm.test.ts +106 -0
  7. package/bin/swarm.ts +413 -179
  8. package/dist/compaction-hook.d.ts +54 -4
  9. package/dist/compaction-hook.d.ts.map +1 -1
  10. package/dist/eval-capture.d.ts +122 -17
  11. package/dist/eval-capture.d.ts.map +1 -1
  12. package/dist/index.d.ts +1 -7
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +1278 -619
  15. package/dist/planning-guardrails.d.ts +121 -0
  16. package/dist/planning-guardrails.d.ts.map +1 -1
  17. package/dist/plugin.d.ts +9 -9
  18. package/dist/plugin.d.ts.map +1 -1
  19. package/dist/plugin.js +1283 -329
  20. package/dist/schemas/task.d.ts +0 -1
  21. package/dist/schemas/task.d.ts.map +1 -1
  22. package/dist/swarm-decompose.d.ts +0 -8
  23. package/dist/swarm-decompose.d.ts.map +1 -1
  24. package/dist/swarm-orchestrate.d.ts.map +1 -1
  25. package/dist/swarm-prompts.d.ts +0 -4
  26. package/dist/swarm-prompts.d.ts.map +1 -1
  27. package/dist/swarm-review.d.ts.map +1 -1
  28. package/dist/swarm.d.ts +0 -6
  29. package/dist/swarm.d.ts.map +1 -1
  30. package/evals/README.md +38 -0
  31. package/evals/coordinator-session.eval.ts +154 -0
  32. package/evals/fixtures/coordinator-sessions.ts +328 -0
  33. package/evals/lib/data-loader.ts +69 -0
  34. package/evals/scorers/coordinator-discipline.evalite-test.ts +536 -0
  35. package/evals/scorers/coordinator-discipline.ts +315 -0
  36. package/evals/scorers/index.ts +12 -0
  37. package/examples/plugin-wrapper-template.ts +303 -4
  38. package/package.json +2 -2
  39. package/src/compaction-hook.test.ts +8 -1
  40. package/src/compaction-hook.ts +31 -21
  41. package/src/eval-capture.test.ts +390 -0
  42. package/src/eval-capture.ts +163 -4
  43. package/src/hive.integration.test.ts +148 -0
  44. package/src/hive.ts +89 -0
  45. package/src/index.ts +68 -1
  46. package/src/planning-guardrails.test.ts +387 -2
  47. package/src/planning-guardrails.ts +289 -0
  48. package/src/plugin.ts +10 -10
  49. package/src/swarm-decompose.test.ts +195 -0
  50. package/src/swarm-decompose.ts +72 -1
  51. package/src/swarm-orchestrate.ts +44 -0
  52. package/src/swarm-prompts.ts +20 -0
  53. package/src/swarm-review.integration.test.ts +24 -29
  54. package/src/swarm-review.ts +41 -0
@@ -0,0 +1,536 @@
1
+ /**
2
+ * Tests for coordinator-discipline scorers
3
+ */
4
+ import { describe, expect, it } from "bun:test";
5
+ import type { CoordinatorSession } from "../../src/eval-capture.js";
6
+ import {
7
+ overallDiscipline,
8
+ reviewThoroughness,
9
+ spawnEfficiency,
10
+ timeToFirstSpawn,
11
+ violationCount,
12
+ } from "./coordinator-discipline.js";
13
+
14
+ describe("violationCount", () => {
15
+ it("scores 1.0 for zero violations", () => {
16
+ const session: CoordinatorSession = {
17
+ session_id: "test-session",
18
+ epic_id: "test-epic",
19
+ start_time: "2025-01-01T00:00:00Z",
20
+ end_time: "2025-01-01T01:00:00Z",
21
+ events: [
22
+ {
23
+ session_id: "test-session",
24
+ epic_id: "test-epic",
25
+ timestamp: "2025-01-01T00:00:00Z",
26
+ event_type: "DECISION",
27
+ decision_type: "strategy_selected",
28
+ payload: { strategy: "file-based" },
29
+ },
30
+ ],
31
+ };
32
+
33
+ const result = violationCount.scorer({
34
+ output: JSON.stringify(session),
35
+ expected: {},
36
+ });
37
+
38
+ expect(result.score).toBe(1.0);
39
+ expect(result.message).toContain("0 violations");
40
+ });
41
+
42
+ it("decreases score by 0.2 per violation", () => {
43
+ const session: CoordinatorSession = {
44
+ session_id: "test-session",
45
+ epic_id: "test-epic",
46
+ start_time: "2025-01-01T00:00:00Z",
47
+ end_time: "2025-01-01T01:00:00Z",
48
+ events: [
49
+ {
50
+ session_id: "test-session",
51
+ epic_id: "test-epic",
52
+ timestamp: "2025-01-01T00:00:10Z",
53
+ event_type: "VIOLATION",
54
+ violation_type: "coordinator_edited_file",
55
+ payload: { file: "test.ts" },
56
+ },
57
+ {
58
+ session_id: "test-session",
59
+ epic_id: "test-epic",
60
+ timestamp: "2025-01-01T00:00:20Z",
61
+ event_type: "VIOLATION",
62
+ violation_type: "coordinator_ran_tests",
63
+ payload: { command: "bun test" },
64
+ },
65
+ ],
66
+ };
67
+
68
+ const result = violationCount.scorer({
69
+ output: JSON.stringify(session),
70
+ expected: {},
71
+ });
72
+
73
+ expect(result.score).toBe(0.6); // 1.0 - 0.2 * 2
74
+ expect(result.message).toContain("2 violations");
75
+ });
76
+
77
+ it("floors score at 0.0 for many violations", () => {
78
+ const session: CoordinatorSession = {
79
+ session_id: "test-session",
80
+ epic_id: "test-epic",
81
+ start_time: "2025-01-01T00:00:00Z",
82
+ events: Array.from({ length: 10 }, (_, i) => ({
83
+ session_id: "test-session",
84
+ epic_id: "test-epic",
85
+ timestamp: `2025-01-01T00:00:${String(i).padStart(2, "0")}Z`,
86
+ event_type: "VIOLATION" as const,
87
+ violation_type: "coordinator_edited_file" as const,
88
+ payload: { file: `test${i}.ts` },
89
+ })),
90
+ };
91
+
92
+ const result = violationCount.scorer({
93
+ output: JSON.stringify(session),
94
+ expected: {},
95
+ });
96
+
97
+ expect(result.score).toBe(0.0);
98
+ expect(result.message).toContain("10 violations");
99
+ });
100
+ });
101
+
102
+ describe("spawnEfficiency", () => {
103
+ it("scores 1.0 when all subtasks have workers spawned", () => {
104
+ const session: CoordinatorSession = {
105
+ session_id: "test-session",
106
+ epic_id: "test-epic",
107
+ start_time: "2025-01-01T00:00:00Z",
108
+ events: [
109
+ {
110
+ session_id: "test-session",
111
+ epic_id: "test-epic",
112
+ timestamp: "2025-01-01T00:00:00Z",
113
+ event_type: "DECISION",
114
+ decision_type: "decomposition_complete",
115
+ payload: { subtask_count: 3 },
116
+ },
117
+ {
118
+ session_id: "test-session",
119
+ epic_id: "test-epic",
120
+ timestamp: "2025-01-01T00:00:10Z",
121
+ event_type: "DECISION",
122
+ decision_type: "worker_spawned",
123
+ payload: { bead_id: "bd-1" },
124
+ },
125
+ {
126
+ session_id: "test-session",
127
+ epic_id: "test-epic",
128
+ timestamp: "2025-01-01T00:00:20Z",
129
+ event_type: "DECISION",
130
+ decision_type: "worker_spawned",
131
+ payload: { bead_id: "bd-2" },
132
+ },
133
+ {
134
+ session_id: "test-session",
135
+ epic_id: "test-epic",
136
+ timestamp: "2025-01-01T00:00:30Z",
137
+ event_type: "DECISION",
138
+ decision_type: "worker_spawned",
139
+ payload: { bead_id: "bd-3" },
140
+ },
141
+ ],
142
+ };
143
+
144
+ const result = spawnEfficiency.scorer({
145
+ output: JSON.stringify(session),
146
+ expected: {},
147
+ });
148
+
149
+ expect(result.score).toBe(1.0);
150
+ expect(result.message).toContain("3/3");
151
+ });
152
+
153
+ it("scores less than 1.0 when some workers not spawned", () => {
154
+ const session: CoordinatorSession = {
155
+ session_id: "test-session",
156
+ epic_id: "test-epic",
157
+ start_time: "2025-01-01T00:00:00Z",
158
+ events: [
159
+ {
160
+ session_id: "test-session",
161
+ epic_id: "test-epic",
162
+ timestamp: "2025-01-01T00:00:00Z",
163
+ event_type: "DECISION",
164
+ decision_type: "decomposition_complete",
165
+ payload: { subtask_count: 4 },
166
+ },
167
+ {
168
+ session_id: "test-session",
169
+ epic_id: "test-epic",
170
+ timestamp: "2025-01-01T00:00:10Z",
171
+ event_type: "DECISION",
172
+ decision_type: "worker_spawned",
173
+ payload: { bead_id: "bd-1" },
174
+ },
175
+ {
176
+ session_id: "test-session",
177
+ epic_id: "test-epic",
178
+ timestamp: "2025-01-01T00:00:20Z",
179
+ event_type: "DECISION",
180
+ decision_type: "worker_spawned",
181
+ payload: { bead_id: "bd-2" },
182
+ },
183
+ ],
184
+ };
185
+
186
+ const result = spawnEfficiency.scorer({
187
+ output: JSON.stringify(session),
188
+ expected: {},
189
+ });
190
+
191
+ expect(result.score).toBe(0.5); // 2/4
192
+ expect(result.message).toContain("2/4");
193
+ });
194
+
195
+ it("returns 0 when no decomposition event found", () => {
196
+ const session: CoordinatorSession = {
197
+ session_id: "test-session",
198
+ epic_id: "test-epic",
199
+ start_time: "2025-01-01T00:00:00Z",
200
+ events: [
201
+ {
202
+ session_id: "test-session",
203
+ epic_id: "test-epic",
204
+ timestamp: "2025-01-01T00:00:10Z",
205
+ event_type: "DECISION",
206
+ decision_type: "worker_spawned",
207
+ payload: { bead_id: "bd-1" },
208
+ },
209
+ ],
210
+ };
211
+
212
+ const result = spawnEfficiency.scorer({
213
+ output: JSON.stringify(session),
214
+ expected: {},
215
+ });
216
+
217
+ expect(result.score).toBe(0);
218
+ expect(result.message).toContain("No decomposition");
219
+ });
220
+ });
221
+
222
+ describe("reviewThoroughness", () => {
223
+ it("scores 1.0 when all workers have reviews", () => {
224
+ const session: CoordinatorSession = {
225
+ session_id: "test-session",
226
+ epic_id: "test-epic",
227
+ start_time: "2025-01-01T00:00:00Z",
228
+ events: [
229
+ {
230
+ session_id: "test-session",
231
+ epic_id: "test-epic",
232
+ timestamp: "2025-01-01T00:00:00Z",
233
+ event_type: "OUTCOME",
234
+ outcome_type: "subtask_success",
235
+ payload: { bead_id: "bd-1" },
236
+ },
237
+ {
238
+ session_id: "test-session",
239
+ epic_id: "test-epic",
240
+ timestamp: "2025-01-01T00:00:10Z",
241
+ event_type: "OUTCOME",
242
+ outcome_type: "subtask_success",
243
+ payload: { bead_id: "bd-2" },
244
+ },
245
+ {
246
+ session_id: "test-session",
247
+ epic_id: "test-epic",
248
+ timestamp: "2025-01-01T00:00:20Z",
249
+ event_type: "DECISION",
250
+ decision_type: "review_completed",
251
+ payload: { bead_id: "bd-1" },
252
+ },
253
+ {
254
+ session_id: "test-session",
255
+ epic_id: "test-epic",
256
+ timestamp: "2025-01-01T00:00:30Z",
257
+ event_type: "DECISION",
258
+ decision_type: "review_completed",
259
+ payload: { bead_id: "bd-2" },
260
+ },
261
+ ],
262
+ };
263
+
264
+ const result = reviewThoroughness.scorer({
265
+ output: JSON.stringify(session),
266
+ expected: {},
267
+ });
268
+
269
+ expect(result.score).toBe(1.0);
270
+ expect(result.message).toContain("2/2");
271
+ });
272
+
273
+ it("scores less than 1.0 when some workers missing reviews", () => {
274
+ const session: CoordinatorSession = {
275
+ session_id: "test-session",
276
+ epic_id: "test-epic",
277
+ start_time: "2025-01-01T00:00:00Z",
278
+ events: [
279
+ {
280
+ session_id: "test-session",
281
+ epic_id: "test-epic",
282
+ timestamp: "2025-01-01T00:00:00Z",
283
+ event_type: "OUTCOME",
284
+ outcome_type: "subtask_success",
285
+ payload: { bead_id: "bd-1" },
286
+ },
287
+ {
288
+ session_id: "test-session",
289
+ epic_id: "test-epic",
290
+ timestamp: "2025-01-01T00:00:10Z",
291
+ event_type: "OUTCOME",
292
+ outcome_type: "subtask_success",
293
+ payload: { bead_id: "bd-2" },
294
+ },
295
+ {
296
+ session_id: "test-session",
297
+ epic_id: "test-epic",
298
+ timestamp: "2025-01-01T00:00:20Z",
299
+ event_type: "DECISION",
300
+ decision_type: "review_completed",
301
+ payload: { bead_id: "bd-1" },
302
+ },
303
+ ],
304
+ };
305
+
306
+ const result = reviewThoroughness.scorer({
307
+ output: JSON.stringify(session),
308
+ expected: {},
309
+ });
310
+
311
+ expect(result.score).toBe(0.5); // 1/2
312
+ expect(result.message).toContain("1/2");
313
+ });
314
+
315
+ it("returns 1.0 when no workers finished", () => {
316
+ const session: CoordinatorSession = {
317
+ session_id: "test-session",
318
+ epic_id: "test-epic",
319
+ start_time: "2025-01-01T00:00:00Z",
320
+ events: [
321
+ {
322
+ session_id: "test-session",
323
+ epic_id: "test-epic",
324
+ timestamp: "2025-01-01T00:00:00Z",
325
+ event_type: "DECISION",
326
+ decision_type: "strategy_selected",
327
+ payload: { strategy: "file-based" },
328
+ },
329
+ ],
330
+ };
331
+
332
+ const result = reviewThoroughness.scorer({
333
+ output: JSON.stringify(session),
334
+ expected: {},
335
+ });
336
+
337
+ expect(result.score).toBe(1.0);
338
+ expect(result.message).toContain("No finished workers");
339
+ });
340
+ });
341
+
342
+ describe("timeToFirstSpawn", () => {
343
+ it("normalizes time to 0-1 range (faster is better)", () => {
344
+ // 30 seconds to first spawn
345
+ const session: CoordinatorSession = {
346
+ session_id: "test-session",
347
+ epic_id: "test-epic",
348
+ start_time: "2025-01-01T00:00:00Z",
349
+ events: [
350
+ {
351
+ session_id: "test-session",
352
+ epic_id: "test-epic",
353
+ timestamp: "2025-01-01T00:00:00Z",
354
+ event_type: "DECISION",
355
+ decision_type: "decomposition_complete",
356
+ payload: { subtask_count: 3 },
357
+ },
358
+ {
359
+ session_id: "test-session",
360
+ epic_id: "test-epic",
361
+ timestamp: "2025-01-01T00:00:30Z",
362
+ event_type: "DECISION",
363
+ decision_type: "worker_spawned",
364
+ payload: { bead_id: "bd-1" },
365
+ },
366
+ ],
367
+ };
368
+
369
+ const result = timeToFirstSpawn.scorer({
370
+ output: JSON.stringify(session),
371
+ expected: {},
372
+ });
373
+
374
+ // 30s should score around 0.95 (fast spawn)
375
+ expect(result.score).toBeGreaterThan(0.9);
376
+ expect(result.message).toContain("30000ms");
377
+ });
378
+
379
+ it("returns 0 when no worker spawned", () => {
380
+ const session: CoordinatorSession = {
381
+ session_id: "test-session",
382
+ epic_id: "test-epic",
383
+ start_time: "2025-01-01T00:00:00Z",
384
+ events: [
385
+ {
386
+ session_id: "test-session",
387
+ epic_id: "test-epic",
388
+ timestamp: "2025-01-01T00:00:00Z",
389
+ event_type: "DECISION",
390
+ decision_type: "decomposition_complete",
391
+ payload: { subtask_count: 3 },
392
+ },
393
+ ],
394
+ };
395
+
396
+ const result = timeToFirstSpawn.scorer({
397
+ output: JSON.stringify(session),
398
+ expected: {},
399
+ });
400
+
401
+ expect(result.score).toBe(0);
402
+ expect(result.message).toContain("No worker spawned");
403
+ });
404
+
405
+ it("returns 0 when no decomposition event", () => {
406
+ const session: CoordinatorSession = {
407
+ session_id: "test-session",
408
+ epic_id: "test-epic",
409
+ start_time: "2025-01-01T00:00:00Z",
410
+ events: [
411
+ {
412
+ session_id: "test-session",
413
+ epic_id: "test-epic",
414
+ timestamp: "2025-01-01T00:00:10Z",
415
+ event_type: "DECISION",
416
+ decision_type: "worker_spawned",
417
+ payload: { bead_id: "bd-1" },
418
+ },
419
+ ],
420
+ };
421
+
422
+ const result = timeToFirstSpawn.scorer({
423
+ output: JSON.stringify(session),
424
+ expected: {},
425
+ });
426
+
427
+ expect(result.score).toBe(0);
428
+ expect(result.message).toContain("No decomposition");
429
+ });
430
+ });
431
+
432
+ describe("overallDiscipline", () => {
433
+ it("computes weighted composite score", () => {
434
+ // Perfect session
435
+ const session: CoordinatorSession = {
436
+ session_id: "test-session",
437
+ epic_id: "test-epic",
438
+ start_time: "2025-01-01T00:00:00Z",
439
+ events: [
440
+ {
441
+ session_id: "test-session",
442
+ epic_id: "test-epic",
443
+ timestamp: "2025-01-01T00:00:00Z",
444
+ event_type: "DECISION",
445
+ decision_type: "decomposition_complete",
446
+ payload: { subtask_count: 2 },
447
+ },
448
+ {
449
+ session_id: "test-session",
450
+ epic_id: "test-epic",
451
+ timestamp: "2025-01-01T00:00:10Z",
452
+ event_type: "DECISION",
453
+ decision_type: "worker_spawned",
454
+ payload: { bead_id: "bd-1" },
455
+ },
456
+ {
457
+ session_id: "test-session",
458
+ epic_id: "test-epic",
459
+ timestamp: "2025-01-01T00:00:20Z",
460
+ event_type: "DECISION",
461
+ decision_type: "worker_spawned",
462
+ payload: { bead_id: "bd-2" },
463
+ },
464
+ {
465
+ session_id: "test-session",
466
+ epic_id: "test-epic",
467
+ timestamp: "2025-01-01T00:10:00Z",
468
+ event_type: "OUTCOME",
469
+ outcome_type: "subtask_success",
470
+ payload: { bead_id: "bd-1" },
471
+ },
472
+ {
473
+ session_id: "test-session",
474
+ epic_id: "test-epic",
475
+ timestamp: "2025-01-01T00:10:10Z",
476
+ event_type: "OUTCOME",
477
+ outcome_type: "subtask_success",
478
+ payload: { bead_id: "bd-2" },
479
+ },
480
+ {
481
+ session_id: "test-session",
482
+ epic_id: "test-epic",
483
+ timestamp: "2025-01-01T00:10:20Z",
484
+ event_type: "DECISION",
485
+ decision_type: "review_completed",
486
+ payload: { bead_id: "bd-1" },
487
+ },
488
+ {
489
+ session_id: "test-session",
490
+ epic_id: "test-epic",
491
+ timestamp: "2025-01-01T00:10:30Z",
492
+ event_type: "DECISION",
493
+ decision_type: "review_completed",
494
+ payload: { bead_id: "bd-2" },
495
+ },
496
+ ],
497
+ };
498
+
499
+ const result = overallDiscipline.scorer({
500
+ output: JSON.stringify(session),
501
+ expected: {},
502
+ });
503
+
504
+ // Perfect session should score very high (close to 1.0)
505
+ expect(result.score).toBeGreaterThan(0.95);
506
+ expect(result.message).toContain("Overall");
507
+ });
508
+
509
+ it("includes breakdown in message", () => {
510
+ const session: CoordinatorSession = {
511
+ session_id: "test-session",
512
+ epic_id: "test-epic",
513
+ start_time: "2025-01-01T00:00:00Z",
514
+ events: [
515
+ {
516
+ session_id: "test-session",
517
+ epic_id: "test-epic",
518
+ timestamp: "2025-01-01T00:00:00Z",
519
+ event_type: "DECISION",
520
+ decision_type: "strategy_selected",
521
+ payload: { strategy: "file-based" },
522
+ },
523
+ ],
524
+ };
525
+
526
+ const result = overallDiscipline.scorer({
527
+ output: JSON.stringify(session),
528
+ expected: {},
529
+ });
530
+
531
+ expect(result.message).toContain("Violations:");
532
+ expect(result.message).toContain("Spawn:");
533
+ expect(result.message).toContain("Review:");
534
+ expect(result.message).toContain("Speed:");
535
+ });
536
+ });