opencode-swarm-plugin 0.35.0 → 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.hive/issues.jsonl +4 -4
  2. package/.hive/memories.jsonl +274 -1
  3. package/.turbo/turbo-build.log +4 -4
  4. package/.turbo/turbo-test.log +307 -307
  5. package/CHANGELOG.md +133 -0
  6. package/bin/swarm.ts +234 -179
  7. package/dist/compaction-hook.d.ts +54 -4
  8. package/dist/compaction-hook.d.ts.map +1 -1
  9. package/dist/eval-capture.d.ts +122 -17
  10. package/dist/eval-capture.d.ts.map +1 -1
  11. package/dist/index.d.ts +1 -7
  12. package/dist/index.d.ts.map +1 -1
  13. package/dist/index.js +1278 -619
  14. package/dist/planning-guardrails.d.ts +121 -0
  15. package/dist/planning-guardrails.d.ts.map +1 -1
  16. package/dist/plugin.d.ts +9 -9
  17. package/dist/plugin.d.ts.map +1 -1
  18. package/dist/plugin.js +1283 -329
  19. package/dist/schemas/task.d.ts +0 -1
  20. package/dist/schemas/task.d.ts.map +1 -1
  21. package/dist/swarm-decompose.d.ts +0 -8
  22. package/dist/swarm-decompose.d.ts.map +1 -1
  23. package/dist/swarm-orchestrate.d.ts.map +1 -1
  24. package/dist/swarm-prompts.d.ts +0 -4
  25. package/dist/swarm-prompts.d.ts.map +1 -1
  26. package/dist/swarm-review.d.ts.map +1 -1
  27. package/dist/swarm.d.ts +0 -6
  28. package/dist/swarm.d.ts.map +1 -1
  29. package/evals/README.md +38 -0
  30. package/evals/coordinator-session.eval.ts +154 -0
  31. package/evals/fixtures/coordinator-sessions.ts +328 -0
  32. package/evals/lib/data-loader.ts +69 -0
  33. package/evals/scorers/coordinator-discipline.evalite-test.ts +536 -0
  34. package/evals/scorers/coordinator-discipline.ts +315 -0
  35. package/evals/scorers/index.ts +12 -0
  36. package/examples/plugin-wrapper-template.ts +747 -34
  37. package/package.json +2 -2
  38. package/src/compaction-hook.test.ts +234 -281
  39. package/src/compaction-hook.ts +221 -63
  40. package/src/eval-capture.test.ts +390 -0
  41. package/src/eval-capture.ts +168 -10
  42. package/src/index.ts +89 -2
  43. package/src/learning.integration.test.ts +0 -2
  44. package/src/planning-guardrails.test.ts +387 -2
  45. package/src/planning-guardrails.ts +289 -0
  46. package/src/plugin.ts +10 -10
  47. package/src/schemas/task.ts +0 -1
  48. package/src/swarm-decompose.ts +21 -8
  49. package/src/swarm-orchestrate.ts +44 -0
  50. package/src/swarm-prompts.ts +20 -0
  51. package/src/swarm-review.ts +41 -0
  52. package/src/swarm.integration.test.ts +0 -40
@@ -0,0 +1,328 @@
1
+ /**
2
+ * Coordinator Session Test Fixtures
3
+ *
4
+ * Synthetic coordinator sessions for testing coordinator-discipline scorers.
5
+ * Each fixture demonstrates good or bad coordinator behavior.
6
+ */
7
+
8
+ import type { CoordinatorSession } from "../../src/eval-capture.js";
9
+
10
+ /**
11
+ * PERFECT COORDINATOR
12
+ *
13
+ * - No violations (no direct edits, tests, or reservations)
14
+ * - 100% spawn efficiency (3/3 workers spawned)
15
+ * - 100% review thoroughness (all workers reviewed)
16
+ * - Fast time to first spawn (30s)
17
+ */
18
+ export const perfectCoordinator: CoordinatorSession = {
19
+ session_id: "test-session-perfect",
20
+ epic_id: "test-epic-perfect",
21
+ start_time: "2025-01-01T10:00:00.000Z",
22
+ end_time: "2025-01-01T10:30:00.000Z",
23
+ events: [
24
+ // 1. Decomposition complete
25
+ {
26
+ session_id: "test-session-perfect",
27
+ epic_id: "test-epic-perfect",
28
+ timestamp: "2025-01-01T10:00:00.000Z",
29
+ event_type: "DECISION",
30
+ decision_type: "decomposition_complete",
31
+ payload: { subtask_count: 3 },
32
+ },
33
+ // 2. First spawn (30s after decomp)
34
+ {
35
+ session_id: "test-session-perfect",
36
+ epic_id: "test-epic-perfect",
37
+ timestamp: "2025-01-01T10:00:30.000Z",
38
+ event_type: "DECISION",
39
+ decision_type: "worker_spawned",
40
+ payload: { worker: "BlueLake", bead_id: "test-epic-perfect.1" },
41
+ },
42
+ // 3. Second spawn
43
+ {
44
+ session_id: "test-session-perfect",
45
+ epic_id: "test-epic-perfect",
46
+ timestamp: "2025-01-01T10:01:00.000Z",
47
+ event_type: "DECISION",
48
+ decision_type: "worker_spawned",
49
+ payload: { worker: "GreenMountain", bead_id: "test-epic-perfect.2" },
50
+ },
51
+ // 4. Third spawn
52
+ {
53
+ session_id: "test-session-perfect",
54
+ epic_id: "test-epic-perfect",
55
+ timestamp: "2025-01-01T10:01:30.000Z",
56
+ event_type: "DECISION",
57
+ decision_type: "worker_spawned",
58
+ payload: { worker: "RedForest", bead_id: "test-epic-perfect.3" },
59
+ },
60
+ // 5. First worker completes
61
+ {
62
+ session_id: "test-session-perfect",
63
+ epic_id: "test-epic-perfect",
64
+ timestamp: "2025-01-01T10:10:00.000Z",
65
+ event_type: "OUTCOME",
66
+ outcome_type: "subtask_success",
67
+ payload: { bead_id: "test-epic-perfect.1", worker: "BlueLake" },
68
+ },
69
+ // 6. First review
70
+ {
71
+ session_id: "test-session-perfect",
72
+ epic_id: "test-epic-perfect",
73
+ timestamp: "2025-01-01T10:11:00.000Z",
74
+ event_type: "DECISION",
75
+ decision_type: "review_completed",
76
+ payload: {
77
+ bead_id: "test-epic-perfect.1",
78
+ approved: true,
79
+ issues: [],
80
+ },
81
+ },
82
+ // 7. Second worker completes
83
+ {
84
+ session_id: "test-session-perfect",
85
+ epic_id: "test-epic-perfect",
86
+ timestamp: "2025-01-01T10:15:00.000Z",
87
+ event_type: "OUTCOME",
88
+ outcome_type: "subtask_success",
89
+ payload: { bead_id: "test-epic-perfect.2", worker: "GreenMountain" },
90
+ },
91
+ // 8. Second review
92
+ {
93
+ session_id: "test-session-perfect",
94
+ epic_id: "test-epic-perfect",
95
+ timestamp: "2025-01-01T10:16:00.000Z",
96
+ event_type: "DECISION",
97
+ decision_type: "review_completed",
98
+ payload: {
99
+ bead_id: "test-epic-perfect.2",
100
+ approved: true,
101
+ issues: [],
102
+ },
103
+ },
104
+ // 9. Third worker completes
105
+ {
106
+ session_id: "test-session-perfect",
107
+ epic_id: "test-epic-perfect",
108
+ timestamp: "2025-01-01T10:20:00.000Z",
109
+ event_type: "OUTCOME",
110
+ outcome_type: "subtask_success",
111
+ payload: { bead_id: "test-epic-perfect.3", worker: "RedForest" },
112
+ },
113
+ // 10. Third review
114
+ {
115
+ session_id: "test-session-perfect",
116
+ epic_id: "test-epic-perfect",
117
+ timestamp: "2025-01-01T10:21:00.000Z",
118
+ event_type: "DECISION",
119
+ decision_type: "review_completed",
120
+ payload: {
121
+ bead_id: "test-epic-perfect.3",
122
+ approved: true,
123
+ issues: [],
124
+ },
125
+ },
126
+ // 11. Epic complete
127
+ {
128
+ session_id: "test-session-perfect",
129
+ epic_id: "test-epic-perfect",
130
+ timestamp: "2025-01-01T10:30:00.000Z",
131
+ event_type: "OUTCOME",
132
+ outcome_type: "epic_complete",
133
+ payload: { epic_id: "test-epic-perfect", total_subtasks: 3 },
134
+ },
135
+ ],
136
+ };
137
+
138
+ /**
139
+ * BAD COORDINATOR - Multiple Violations
140
+ *
141
+ * - 3 violations (edited file, ran tests, reserved files)
142
+ * - 33% spawn efficiency (only 1/3 workers spawned)
143
+ * - 0% review thoroughness (no reviews)
144
+ * - Slow time to first spawn (10 minutes)
145
+ */
146
+ export const badCoordinator: CoordinatorSession = {
147
+ session_id: "test-session-bad",
148
+ epic_id: "test-epic-bad",
149
+ start_time: "2025-01-01T10:00:00.000Z",
150
+ end_time: "2025-01-01T11:00:00.000Z",
151
+ events: [
152
+ // 1. Decomposition complete
153
+ {
154
+ session_id: "test-session-bad",
155
+ epic_id: "test-epic-bad",
156
+ timestamp: "2025-01-01T10:00:00.000Z",
157
+ event_type: "DECISION",
158
+ decision_type: "decomposition_complete",
159
+ payload: { subtask_count: 3 },
160
+ },
161
+ // 2. VIOLATION: Coordinator edited file directly
162
+ {
163
+ session_id: "test-session-bad",
164
+ epic_id: "test-epic-bad",
165
+ timestamp: "2025-01-01T10:01:00.000Z",
166
+ event_type: "VIOLATION",
167
+ violation_type: "coordinator_edited_file",
168
+ payload: { file: "src/auth.ts", reason: "should spawn worker instead" },
169
+ },
170
+ // 3. VIOLATION: Coordinator ran tests
171
+ {
172
+ session_id: "test-session-bad",
173
+ epic_id: "test-epic-bad",
174
+ timestamp: "2025-01-01T10:02:00.000Z",
175
+ event_type: "VIOLATION",
176
+ violation_type: "coordinator_ran_tests",
177
+ payload: { command: "bun test", reason: "workers do verification" },
178
+ },
179
+ // 4. VIOLATION: Coordinator reserved files
180
+ {
181
+ session_id: "test-session-bad",
182
+ epic_id: "test-epic-bad",
183
+ timestamp: "2025-01-01T10:03:00.000Z",
184
+ event_type: "VIOLATION",
185
+ violation_type: "coordinator_reserved_files",
186
+ payload: { paths: ["src/**"], reason: "only workers reserve" },
187
+ },
188
+ // 5. First spawn (10 minutes after decomp - way too slow)
189
+ {
190
+ session_id: "test-session-bad",
191
+ epic_id: "test-epic-bad",
192
+ timestamp: "2025-01-01T10:10:00.000Z",
193
+ event_type: "DECISION",
194
+ decision_type: "worker_spawned",
195
+ payload: { worker: "BlueLake", bead_id: "test-epic-bad.1" },
196
+ },
197
+ // 6. Worker completes (but no review!)
198
+ {
199
+ session_id: "test-session-bad",
200
+ epic_id: "test-epic-bad",
201
+ timestamp: "2025-01-01T10:20:00.000Z",
202
+ event_type: "OUTCOME",
203
+ outcome_type: "subtask_success",
204
+ payload: { bead_id: "test-epic-bad.1", worker: "BlueLake" },
205
+ },
206
+ // 7. VIOLATION: No worker spawned for subtask 2
207
+ {
208
+ session_id: "test-session-bad",
209
+ epic_id: "test-epic-bad",
210
+ timestamp: "2025-01-01T10:30:00.000Z",
211
+ event_type: "VIOLATION",
212
+ violation_type: "no_worker_spawned",
213
+ payload: { bead_id: "test-epic-bad.2", reason: "coordinator did work directly" },
214
+ },
215
+ // 8. VIOLATION: No worker spawned for subtask 3
216
+ {
217
+ session_id: "test-session-bad",
218
+ epic_id: "test-epic-bad",
219
+ timestamp: "2025-01-01T10:40:00.000Z",
220
+ event_type: "VIOLATION",
221
+ violation_type: "no_worker_spawned",
222
+ payload: { bead_id: "test-epic-bad.3", reason: "coordinator did work directly" },
223
+ },
224
+ ],
225
+ };
226
+
227
+ /**
228
+ * DECENT COORDINATOR - Some Issues
229
+ *
230
+ * - 1 violation (ran tests once)
231
+ * - 100% spawn efficiency (2/2 workers spawned)
232
+ * - 50% review thoroughness (reviewed only 1/2)
233
+ * - Good time to first spawn (45s)
234
+ */
235
+ export const decentCoordinator: CoordinatorSession = {
236
+ session_id: "test-session-decent",
237
+ epic_id: "test-epic-decent",
238
+ start_time: "2025-01-01T10:00:00.000Z",
239
+ end_time: "2025-01-01T10:25:00.000Z",
240
+ events: [
241
+ // 1. Decomposition complete
242
+ {
243
+ session_id: "test-session-decent",
244
+ epic_id: "test-epic-decent",
245
+ timestamp: "2025-01-01T10:00:00.000Z",
246
+ event_type: "DECISION",
247
+ decision_type: "decomposition_complete",
248
+ payload: { subtask_count: 2 },
249
+ },
250
+ // 2. First spawn (45s - acceptable)
251
+ {
252
+ session_id: "test-session-decent",
253
+ epic_id: "test-epic-decent",
254
+ timestamp: "2025-01-01T10:00:45.000Z",
255
+ event_type: "DECISION",
256
+ decision_type: "worker_spawned",
257
+ payload: { worker: "BlueLake", bead_id: "test-epic-decent.1" },
258
+ },
259
+ // 3. Second spawn
260
+ {
261
+ session_id: "test-session-decent",
262
+ epic_id: "test-epic-decent",
263
+ timestamp: "2025-01-01T10:01:00.000Z",
264
+ event_type: "DECISION",
265
+ decision_type: "worker_spawned",
266
+ payload: { worker: "GreenMountain", bead_id: "test-epic-decent.2" },
267
+ },
268
+ // 4. First worker completes
269
+ {
270
+ session_id: "test-session-decent",
271
+ epic_id: "test-epic-decent",
272
+ timestamp: "2025-01-01T10:10:00.000Z",
273
+ event_type: "OUTCOME",
274
+ outcome_type: "subtask_success",
275
+ payload: { bead_id: "test-epic-decent.1", worker: "BlueLake" },
276
+ },
277
+ // 5. First review
278
+ {
279
+ session_id: "test-session-decent",
280
+ epic_id: "test-epic-decent",
281
+ timestamp: "2025-01-01T10:11:00.000Z",
282
+ event_type: "DECISION",
283
+ decision_type: "review_completed",
284
+ payload: {
285
+ bead_id: "test-epic-decent.1",
286
+ approved: true,
287
+ issues: [],
288
+ },
289
+ },
290
+ // 6. VIOLATION: Ran tests (one slip-up)
291
+ {
292
+ session_id: "test-session-decent",
293
+ epic_id: "test-epic-decent",
294
+ timestamp: "2025-01-01T10:15:00.000Z",
295
+ event_type: "VIOLATION",
296
+ violation_type: "coordinator_ran_tests",
297
+ payload: { command: "bun test", reason: "should let worker verify" },
298
+ },
299
+ // 7. Second worker completes
300
+ {
301
+ session_id: "test-session-decent",
302
+ epic_id: "test-epic-decent",
303
+ timestamp: "2025-01-01T10:20:00.000Z",
304
+ event_type: "OUTCOME",
305
+ outcome_type: "subtask_success",
306
+ payload: { bead_id: "test-epic-decent.2", worker: "GreenMountain" },
307
+ },
308
+ // 8. No review for second worker (50% review rate)
309
+ // 9. Epic complete
310
+ {
311
+ session_id: "test-session-decent",
312
+ epic_id: "test-epic-decent",
313
+ timestamp: "2025-01-01T10:25:00.000Z",
314
+ event_type: "OUTCOME",
315
+ outcome_type: "epic_complete",
316
+ payload: { epic_id: "test-epic-decent", total_subtasks: 2 },
317
+ },
318
+ ],
319
+ };
320
+
321
+ /**
322
+ * All test fixtures
323
+ */
324
+ export const coordinatorSessionFixtures = [
325
+ perfectCoordinator,
326
+ badCoordinator,
327
+ decentCoordinator,
328
+ ];
@@ -4,6 +4,7 @@
4
4
  * Loads real decomposition outcomes from the eval_records table
5
5
  * for use in Evalite evals.
6
6
  */
7
+ import * as fs from "node:fs";
7
8
  import {
8
9
  getEvalRecords,
9
10
  getEvalStats,
@@ -109,3 +110,71 @@ export async function getEvalDataSummary(
109
110
  hasEnoughData: stats.totalRecords >= 5,
110
111
  };
111
112
  }
113
+
114
+ /**
115
+ * Load captured coordinator sessions from ~/.config/swarm-tools/sessions/
116
+ *
117
+ * Reads all JSONL session files and returns CoordinatorSession objects.
118
+ *
119
+ * @param options - Filter options
120
+ * @returns Array of coordinator sessions
121
+ */
122
+ export async function loadCapturedSessions(options?: {
123
+ sessionIds?: string[];
124
+ limit?: number;
125
+ }): Promise<
126
+ Array<{ session: import("../../src/eval-capture.js").CoordinatorSession }>
127
+ > {
128
+ const { getSessionDir, readSessionEvents, saveSession } = await import(
129
+ "../../src/eval-capture.js"
130
+ );
131
+ const sessionDir = getSessionDir();
132
+
133
+ // If session dir doesn't exist, return empty
134
+ if (!fs.existsSync(sessionDir)) {
135
+ return [];
136
+ }
137
+
138
+ // Read all .jsonl files in session directory
139
+ const files = fs
140
+ .readdirSync(sessionDir)
141
+ .filter((f) => f.endsWith(".jsonl"));
142
+
143
+ // Filter by sessionIds if provided
144
+ const targetFiles = options?.sessionIds
145
+ ? files.filter((f) => options.sessionIds?.includes(f.replace(".jsonl", "")))
146
+ : files;
147
+
148
+ // Load each session
149
+ const sessions: Array<{
150
+ session: import("../../src/eval-capture.js").CoordinatorSession;
151
+ }> = [];
152
+
153
+ for (const file of targetFiles) {
154
+ const sessionId = file.replace(".jsonl", "");
155
+
156
+ try {
157
+ const events = readSessionEvents(sessionId);
158
+ if (events.length === 0) continue;
159
+
160
+ // Find epic_id from first event
161
+ const epicId = events[0]?.epic_id;
162
+ if (!epicId) continue;
163
+
164
+ const session = saveSession({ session_id: sessionId, epic_id: epicId });
165
+ if (session) {
166
+ sessions.push({ session });
167
+ }
168
+ } catch (error) {
169
+ // Skip invalid sessions
170
+ console.warn(`Failed to load session ${sessionId}:`, error);
171
+ }
172
+
173
+ // Apply limit if specified
174
+ if (options?.limit && sessions.length >= options.limit) {
175
+ break;
176
+ }
177
+ }
178
+
179
+ return sessions;
180
+ }