opencode-swarm-plugin 0.39.1 → 0.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.hive/issues.jsonl +16 -0
  2. package/CHANGELOG.md +52 -0
  3. package/bin/swarm.test.ts +406 -0
  4. package/bin/swarm.ts +303 -0
  5. package/dist/compaction-hook.d.ts +8 -1
  6. package/dist/compaction-hook.d.ts.map +1 -1
  7. package/dist/compaction-observability.d.ts +173 -0
  8. package/dist/compaction-observability.d.ts.map +1 -0
  9. package/dist/eval-capture.d.ts +93 -0
  10. package/dist/eval-capture.d.ts.map +1 -1
  11. package/dist/hive.d.ts.map +1 -1
  12. package/dist/index.d.ts +36 -1
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +15670 -580
  15. package/dist/plugin.js +15623 -557
  16. package/dist/schemas/task.d.ts +3 -3
  17. package/evals/README.md +113 -0
  18. package/evals/scorers/coordinator-discipline.evalite-test.ts +163 -0
  19. package/evals/scorers/coordinator-discipline.ts +335 -2
  20. package/evals/scorers/index.test.ts +146 -0
  21. package/evals/scorers/index.ts +104 -0
  22. package/evals/swarm-decomposition.eval.ts +9 -2
  23. package/examples/commands/swarm.md +291 -21
  24. package/package.json +1 -1
  25. package/src/compaction-hook.ts +258 -110
  26. package/src/compaction-observability.integration.test.ts +139 -0
  27. package/src/compaction-observability.test.ts +187 -0
  28. package/src/compaction-observability.ts +324 -0
  29. package/src/eval-capture.test.ts +204 -1
  30. package/src/eval-capture.ts +194 -2
  31. package/src/eval-runner.test.ts +96 -0
  32. package/src/eval-runner.ts +356 -0
  33. package/src/hive.ts +34 -0
  34. package/src/index.ts +54 -1
  35. package/src/memory.test.ts +110 -0
  36. package/src/memory.ts +34 -0
  37. package/dist/beads.d.ts +0 -386
  38. package/dist/beads.d.ts.map +0 -1
  39. package/dist/schemas/bead-events.d.ts +0 -698
  40. package/dist/schemas/bead-events.d.ts.map +0 -1
  41. package/dist/schemas/bead.d.ts +0 -255
  42. package/dist/schemas/bead.d.ts.map +0 -1
@@ -13,7 +13,7 @@ import {
13
13
  captureCoordinatorEvent,
14
14
  captureCompactionEvent,
15
15
  saveSession,
16
- } from "./eval-capture.js";
16
+ } from "./eval-capture.ts";
17
17
 
18
18
  describe("CoordinatorEvent schemas", () => {
19
19
  describe("DECISION events", () => {
@@ -82,6 +82,110 @@ describe("CoordinatorEvent schemas", () => {
82
82
 
83
83
  expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
84
84
  });
85
+
86
+ test("validates researcher_spawned event", () => {
87
+ const event: CoordinatorEvent = {
88
+ session_id: "test-session",
89
+ epic_id: "bd-123",
90
+ timestamp: new Date().toISOString(),
91
+ event_type: "DECISION",
92
+ decision_type: "researcher_spawned",
93
+ payload: {
94
+ researcher_id: "BlueLake",
95
+ research_topic: "Next.js Cache Components",
96
+ tools_used: ["pdf-brain", "context7"],
97
+ },
98
+ };
99
+
100
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
101
+ });
102
+
103
+ test("validates skill_loaded event", () => {
104
+ const event: CoordinatorEvent = {
105
+ session_id: "test-session",
106
+ epic_id: "bd-123",
107
+ timestamp: new Date().toISOString(),
108
+ event_type: "DECISION",
109
+ decision_type: "skill_loaded",
110
+ payload: {
111
+ skill_name: "testing-patterns",
112
+ context: "Adding tests to legacy code",
113
+ },
114
+ };
115
+
116
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
117
+ });
118
+
119
+ test("validates inbox_checked event", () => {
120
+ const event: CoordinatorEvent = {
121
+ session_id: "test-session",
122
+ epic_id: "bd-123",
123
+ timestamp: new Date().toISOString(),
124
+ event_type: "DECISION",
125
+ decision_type: "inbox_checked",
126
+ payload: {
127
+ message_count: 3,
128
+ urgent_count: 1,
129
+ },
130
+ };
131
+
132
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
133
+ });
134
+
135
+ test("validates blocker_resolved event", () => {
136
+ const event: CoordinatorEvent = {
137
+ session_id: "test-session",
138
+ epic_id: "bd-123",
139
+ timestamp: new Date().toISOString(),
140
+ event_type: "DECISION",
141
+ decision_type: "blocker_resolved",
142
+ payload: {
143
+ worker_id: "GreenStorm",
144
+ subtask_id: "bd-123.2",
145
+ blocker_type: "dependency",
146
+ resolution: "Unblocked via coordinator action",
147
+ },
148
+ };
149
+
150
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
151
+ });
152
+
153
+ test("validates scope_change_approved event", () => {
154
+ const event: CoordinatorEvent = {
155
+ session_id: "test-session",
156
+ epic_id: "bd-123",
157
+ timestamp: new Date().toISOString(),
158
+ event_type: "DECISION",
159
+ decision_type: "scope_change_approved",
160
+ payload: {
161
+ worker_id: "BlueLake",
162
+ subtask_id: "bd-123.1",
163
+ original_scope: "Add auth service",
164
+ new_scope: "Add auth service + email validation",
165
+ estimated_time_add: 900000, // 15 min in ms
166
+ },
167
+ };
168
+
169
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
170
+ });
171
+
172
+ test("validates scope_change_rejected event", () => {
173
+ const event: CoordinatorEvent = {
174
+ session_id: "test-session",
175
+ epic_id: "bd-123",
176
+ timestamp: new Date().toISOString(),
177
+ event_type: "DECISION",
178
+ decision_type: "scope_change_rejected",
179
+ payload: {
180
+ worker_id: "BlueLake",
181
+ subtask_id: "bd-123.1",
182
+ requested_scope: "Add auth service + OAuth + SSO",
183
+ rejection_reason: "Too large for single subtask",
184
+ },
185
+ };
186
+
187
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
188
+ });
85
189
  });
86
190
 
87
191
  describe("VIOLATION events", () => {
@@ -215,6 +319,25 @@ describe("CoordinatorEvent schemas", () => {
215
319
 
216
320
  expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
217
321
  });
322
+
323
+ test("validates blocker_detected event", () => {
324
+ const event: CoordinatorEvent = {
325
+ session_id: "test-session",
326
+ epic_id: "bd-123",
327
+ timestamp: new Date().toISOString(),
328
+ event_type: "OUTCOME",
329
+ outcome_type: "blocker_detected",
330
+ payload: {
331
+ worker_id: "GreenStorm",
332
+ subtask_id: "bd-123.2",
333
+ blocker_type: "dependency",
334
+ blocker_description: "Waiting for database schema from bd-123.1",
335
+ reported_at: new Date().toISOString(),
336
+ },
337
+ };
338
+
339
+ expect(() => CoordinatorEventSchema.parse(event)).not.toThrow();
340
+ });
218
341
  });
219
342
  });
220
343
 
@@ -810,3 +933,83 @@ describe("captureCompactionEvent", () => {
810
933
  expect(capturedEvents[4].compaction_type).toBe("tool_call_tracked");
811
934
  });
812
935
  });
936
+
937
+ describe("hive_create_epic integration - decomposition_complete event", () => {
938
+ let sessionDir: string;
939
+ let sessionId: string;
940
+ const testProjectPath = "/tmp/test-epic-decomposition";
941
+
942
+ beforeEach(() => {
943
+ sessionDir = path.join(os.homedir(), ".config", "swarm-tools", "sessions");
944
+ sessionId = `test-epic-${Date.now()}`;
945
+ });
946
+
947
+ afterEach(() => {
948
+ // Clean up test session file
949
+ const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
950
+ if (fs.existsSync(sessionPath)) {
951
+ fs.unlinkSync(sessionPath);
952
+ }
953
+ });
954
+
955
+ test("captures decomposition_complete event after hive_create_epic succeeds", async () => {
956
+ // Test the event capture by calling captureCoordinatorEvent directly
957
+ // Testing hive_create_epic directly would require full plugin infrastructure
958
+
959
+ // GIVEN: We simulate what hive_create_epic does after epic creation
960
+ const epicId = `test-epic-${Date.now()}`;
961
+ const subtasks = [
962
+ { title: "Subtask 1", files: ["src/a.ts"] },
963
+ { title: "Subtask 2", files: ["src/b.ts", "src/c.ts"] },
964
+ { title: "Subtask 3", files: ["src/d.ts"] },
965
+ ];
966
+
967
+ // Build files_per_subtask map (same logic as hive.ts)
968
+ const filesPerSubtask: Record<number, string[]> = {};
969
+ subtasks.forEach((subtask, index) => {
970
+ if (subtask.files && subtask.files.length > 0) {
971
+ filesPerSubtask[index] = subtask.files;
972
+ }
973
+ });
974
+
975
+ // WHEN: decomposition_complete event is captured
976
+ captureCoordinatorEvent({
977
+ session_id: sessionId,
978
+ epic_id: epicId,
979
+ timestamp: new Date().toISOString(),
980
+ event_type: "DECISION",
981
+ decision_type: "decomposition_complete",
982
+ payload: {
983
+ subtask_count: subtasks.length,
984
+ strategy_used: "file-based",
985
+ files_per_subtask: filesPerSubtask,
986
+ epic_title: "Test Epic for Event Capture",
987
+ task: "Original task description",
988
+ },
989
+ });
990
+
991
+ // THEN: Event should be written to session file
992
+ const sessionPath = path.join(sessionDir, `${sessionId}.jsonl`);
993
+ expect(fs.existsSync(sessionPath)).toBe(true);
994
+
995
+ const content = fs.readFileSync(sessionPath, "utf-8");
996
+ const lines = content.trim().split("\n").filter(Boolean);
997
+ expect(lines.length).toBe(1);
998
+
999
+ // Verify event structure
1000
+ const event = JSON.parse(lines[0]);
1001
+ expect(event.session_id).toBe(sessionId);
1002
+ expect(event.epic_id).toBe(epicId);
1003
+ expect(event.event_type).toBe("DECISION");
1004
+ expect(event.decision_type).toBe("decomposition_complete");
1005
+ expect(event.payload.subtask_count).toBe(3);
1006
+ expect(event.payload.strategy_used).toBe("file-based");
1007
+ expect(event.payload.files_per_subtask).toEqual({
1008
+ 0: ["src/a.ts"],
1009
+ 1: ["src/b.ts", "src/c.ts"],
1010
+ 2: ["src/d.ts"],
1011
+ });
1012
+ expect(event.payload.epic_title).toBe("Test Epic for Event Capture");
1013
+ expect(event.payload.task).toBe("Original task description");
1014
+ });
1015
+ });
@@ -13,9 +13,9 @@
13
13
  * 6. Session capture: full coordinator session to ~/.config/swarm-tools/sessions/
14
14
  *
15
15
  * Event types:
16
- * - DECISION: strategy_selected, worker_spawned, review_completed, decomposition_complete
16
+ * - DECISION: strategy_selected, worker_spawned, review_completed, decomposition_complete, researcher_spawned, skill_loaded, inbox_checked, blocker_resolved, scope_change_approved, scope_change_rejected
17
17
  * - VIOLATION: coordinator_edited_file, coordinator_ran_tests, coordinator_reserved_files, no_worker_spawned
18
- * - OUTCOME: subtask_success, subtask_retry, subtask_failed, epic_complete
18
+ * - OUTCOME: subtask_success, subtask_retry, subtask_failed, epic_complete, blocker_detected
19
19
  * - COMPACTION: detection_complete, prompt_generated, context_injected, resumption_started, tool_call_tracked
20
20
  *
21
21
  * @module eval-capture
@@ -143,6 +143,12 @@ export const CoordinatorEventSchema = z.discriminatedUnion("event_type", [
143
143
  "worker_spawned",
144
144
  "review_completed",
145
145
  "decomposition_complete",
146
+ "researcher_spawned",
147
+ "skill_loaded",
148
+ "inbox_checked",
149
+ "blocker_resolved",
150
+ "scope_change_approved",
151
+ "scope_change_rejected",
146
152
  ]),
147
153
  payload: z.any(),
148
154
  }),
@@ -171,6 +177,7 @@ export const CoordinatorEventSchema = z.discriminatedUnion("event_type", [
171
177
  "subtask_retry",
172
178
  "subtask_failed",
173
179
  "epic_complete",
180
+ "blocker_detected",
174
181
  ]),
175
182
  payload: z.any(),
176
183
  }),
@@ -687,6 +694,191 @@ export function captureCompactionEvent(params: {
687
694
  captureCoordinatorEvent(event);
688
695
  }
689
696
 
697
+ /**
698
+ * Capture a researcher spawned event
699
+ *
700
+ * Called when coordinator spawns a swarm-researcher to handle unfamiliar technology
701
+ * or gather documentation before decomposition.
702
+ */
703
+ export function captureResearcherSpawned(params: {
704
+ session_id: string;
705
+ epic_id: string;
706
+ researcher_id: string;
707
+ research_topic: string;
708
+ tools_used?: string[];
709
+ }): void {
710
+ const event: CoordinatorEvent = {
711
+ session_id: params.session_id,
712
+ epic_id: params.epic_id,
713
+ timestamp: new Date().toISOString(),
714
+ event_type: "DECISION",
715
+ decision_type: "researcher_spawned",
716
+ payload: {
717
+ researcher_id: params.researcher_id,
718
+ research_topic: params.research_topic,
719
+ tools_used: params.tools_used || [],
720
+ },
721
+ };
722
+
723
+ captureCoordinatorEvent(event);
724
+ }
725
+
726
+ /**
727
+ * Capture a skill loaded event
728
+ *
729
+ * Called when coordinator loads domain knowledge via skills_use().
730
+ */
731
+ export function captureSkillLoaded(params: {
732
+ session_id: string;
733
+ epic_id: string;
734
+ skill_name: string;
735
+ context?: string;
736
+ }): void {
737
+ const event: CoordinatorEvent = {
738
+ session_id: params.session_id,
739
+ epic_id: params.epic_id,
740
+ timestamp: new Date().toISOString(),
741
+ event_type: "DECISION",
742
+ decision_type: "skill_loaded",
743
+ payload: {
744
+ skill_name: params.skill_name,
745
+ context: params.context,
746
+ },
747
+ };
748
+
749
+ captureCoordinatorEvent(event);
750
+ }
751
+
752
+ /**
753
+ * Capture an inbox checked event
754
+ *
755
+ * Called when coordinator checks swarmmail inbox for worker messages.
756
+ * Tracks monitoring frequency and responsiveness.
757
+ */
758
+ export function captureInboxChecked(params: {
759
+ session_id: string;
760
+ epic_id: string;
761
+ message_count: number;
762
+ urgent_count: number;
763
+ }): void {
764
+ const event: CoordinatorEvent = {
765
+ session_id: params.session_id,
766
+ epic_id: params.epic_id,
767
+ timestamp: new Date().toISOString(),
768
+ event_type: "DECISION",
769
+ decision_type: "inbox_checked",
770
+ payload: {
771
+ message_count: params.message_count,
772
+ urgent_count: params.urgent_count,
773
+ },
774
+ };
775
+
776
+ captureCoordinatorEvent(event);
777
+ }
778
+
779
+ /**
780
+ * Capture a blocker resolved event
781
+ *
782
+ * Called when coordinator successfully unblocks a worker.
783
+ */
784
+ export function captureBlockerResolved(params: {
785
+ session_id: string;
786
+ epic_id: string;
787
+ worker_id: string;
788
+ subtask_id: string;
789
+ blocker_type: string;
790
+ resolution: string;
791
+ }): void {
792
+ const event: CoordinatorEvent = {
793
+ session_id: params.session_id,
794
+ epic_id: params.epic_id,
795
+ timestamp: new Date().toISOString(),
796
+ event_type: "DECISION",
797
+ decision_type: "blocker_resolved",
798
+ payload: {
799
+ worker_id: params.worker_id,
800
+ subtask_id: params.subtask_id,
801
+ blocker_type: params.blocker_type,
802
+ resolution: params.resolution,
803
+ },
804
+ };
805
+
806
+ captureCoordinatorEvent(event);
807
+ }
808
+
809
+ /**
810
+ * Capture a scope change decision event
811
+ *
812
+ * Called when coordinator approves or rejects a worker's scope expansion request.
813
+ */
814
+ export function captureScopeChangeDecision(params: {
815
+ session_id: string;
816
+ epic_id: string;
817
+ worker_id: string;
818
+ subtask_id: string;
819
+ approved: boolean;
820
+ original_scope?: string;
821
+ new_scope?: string;
822
+ requested_scope?: string;
823
+ rejection_reason?: string;
824
+ estimated_time_add?: number;
825
+ }): void {
826
+ const event: CoordinatorEvent = {
827
+ session_id: params.session_id,
828
+ epic_id: params.epic_id,
829
+ timestamp: new Date().toISOString(),
830
+ event_type: "DECISION",
831
+ decision_type: params.approved ? "scope_change_approved" : "scope_change_rejected",
832
+ payload: params.approved
833
+ ? {
834
+ worker_id: params.worker_id,
835
+ subtask_id: params.subtask_id,
836
+ original_scope: params.original_scope,
837
+ new_scope: params.new_scope,
838
+ estimated_time_add: params.estimated_time_add,
839
+ }
840
+ : {
841
+ worker_id: params.worker_id,
842
+ subtask_id: params.subtask_id,
843
+ requested_scope: params.requested_scope,
844
+ rejection_reason: params.rejection_reason,
845
+ },
846
+ };
847
+
848
+ captureCoordinatorEvent(event);
849
+ }
850
+
851
+ /**
852
+ * Capture a blocker detected event
853
+ *
854
+ * Called when a worker reports being blocked (OUTCOME event, not DECISION).
855
+ */
856
+ export function captureBlockerDetected(params: {
857
+ session_id: string;
858
+ epic_id: string;
859
+ worker_id: string;
860
+ subtask_id: string;
861
+ blocker_type: string;
862
+ blocker_description: string;
863
+ }): void {
864
+ const event: CoordinatorEvent = {
865
+ session_id: params.session_id,
866
+ epic_id: params.epic_id,
867
+ timestamp: new Date().toISOString(),
868
+ event_type: "OUTCOME",
869
+ outcome_type: "blocker_detected",
870
+ payload: {
871
+ worker_id: params.worker_id,
872
+ subtask_id: params.subtask_id,
873
+ blocker_type: params.blocker_type,
874
+ blocker_description: params.blocker_description,
875
+ reported_at: new Date().toISOString(),
876
+ },
877
+ };
878
+
879
+ captureCoordinatorEvent(event);
880
+ }
881
+
690
882
  /**
691
883
  * Read all events from a session file
692
884
  */
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Tests for eval-runner - Programmatic evalite execution
3
+ *
4
+ * TDD: These tests MUST fail initially, then pass after implementation.
5
+ */
6
+
7
+ import { describe, test, expect, beforeAll } from "bun:test";
8
+ import { runEvals } from "./eval-runner";
9
+ import path from "node:path";
10
+
11
+ // Use project root for all tests
12
+ const PROJECT_ROOT = path.resolve(import.meta.dir, "..");
13
+
14
+ describe("runEvals", () => {
15
+ test("runs all evals when no suite filter provided", async () => {
16
+ const result = await runEvals({
17
+ cwd: PROJECT_ROOT,
18
+ });
19
+
20
+ // Even if some evals fail, we should get results
21
+ expect(typeof result.success).toBe("boolean");
22
+ expect(typeof result.totalSuites).toBe("number");
23
+ expect(typeof result.totalEvals).toBe("number");
24
+ expect(typeof result.averageScore).toBe("number");
25
+ expect(Array.isArray(result.suites)).toBe(true);
26
+
27
+ // Should have at least the example.eval.ts suite
28
+ expect(result.totalSuites).toBeGreaterThan(0);
29
+ expect(result.suites.length).toBeGreaterThan(0);
30
+ }, 60000); // 60s timeout for full eval run
31
+
32
+ test("filters evals by suite name", async () => {
33
+ const result = await runEvals({
34
+ cwd: PROJECT_ROOT,
35
+ suiteFilter: "example",
36
+ });
37
+
38
+ expect(result.success).toBe(true);
39
+ // All suite filepaths should contain "example"
40
+ for (const suite of result.suites) {
41
+ expect(suite.filepath.toLowerCase()).toContain("example");
42
+ }
43
+ }, 30000);
44
+
45
+ test("respects score threshold", async () => {
46
+ const result = await runEvals({
47
+ cwd: PROJECT_ROOT,
48
+ suiteFilter: "example", // Known good eval
49
+ scoreThreshold: 0, // Very low threshold, should pass
50
+ });
51
+
52
+ expect(result.success).toBe(true);
53
+ expect(result.averageScore).toBeGreaterThanOrEqual(0);
54
+ }, 30000);
55
+
56
+ test("returns structured suite results with scores", async () => {
57
+ const result = await runEvals({
58
+ cwd: PROJECT_ROOT,
59
+ suiteFilter: "example",
60
+ });
61
+
62
+ expect(result.suites.length).toBeGreaterThan(0);
63
+
64
+ const suite = result.suites[0];
65
+ expect(suite).toMatchObject({
66
+ name: expect.any(String),
67
+ filepath: expect.any(String),
68
+ status: expect.stringMatching(/^(success|fail|running)$/),
69
+ duration: expect.any(Number),
70
+ averageScore: expect.any(Number),
71
+ evalCount: expect.any(Number),
72
+ });
73
+ }, 30000);
74
+
75
+ test("handles errors gracefully", async () => {
76
+ const result = await runEvals({
77
+ cwd: "/nonexistent/path",
78
+ });
79
+
80
+ expect(result.success).toBe(false);
81
+ expect(result.error).toBeDefined();
82
+ expect(result.suites).toEqual([]);
83
+ }, 10000);
84
+
85
+ test("returns empty results when no evals match filter", async () => {
86
+ const result = await runEvals({
87
+ cwd: PROJECT_ROOT,
88
+ suiteFilter: "nonexistent-eval-name-xyz",
89
+ });
90
+
91
+ // Should succeed but with no suites
92
+ expect(result.success).toBe(true);
93
+ expect(result.totalSuites).toBe(0);
94
+ expect(result.suites).toEqual([]);
95
+ }, 10000);
96
+ });