opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.env +2 -0
  2. package/.hive/eval-results.json +26 -0
  3. package/.hive/issues.jsonl +20 -5
  4. package/.hive/memories.jsonl +35 -1
  5. package/.opencode/eval-history.jsonl +12 -0
  6. package/.turbo/turbo-build.log +4 -4
  7. package/.turbo/turbo-test.log +319 -319
  8. package/CHANGELOG.md +258 -0
  9. package/README.md +50 -0
  10. package/bin/swarm.test.ts +475 -0
  11. package/bin/swarm.ts +385 -208
  12. package/dist/compaction-hook.d.ts +1 -1
  13. package/dist/compaction-hook.d.ts.map +1 -1
  14. package/dist/compaction-prompt-scoring.d.ts +124 -0
  15. package/dist/compaction-prompt-scoring.d.ts.map +1 -0
  16. package/dist/eval-capture.d.ts +81 -1
  17. package/dist/eval-capture.d.ts.map +1 -1
  18. package/dist/eval-gates.d.ts +84 -0
  19. package/dist/eval-gates.d.ts.map +1 -0
  20. package/dist/eval-history.d.ts +117 -0
  21. package/dist/eval-history.d.ts.map +1 -0
  22. package/dist/eval-learning.d.ts +216 -0
  23. package/dist/eval-learning.d.ts.map +1 -0
  24. package/dist/hive.d.ts +59 -0
  25. package/dist/hive.d.ts.map +1 -1
  26. package/dist/index.d.ts +87 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +823 -131
  29. package/dist/plugin.js +655 -131
  30. package/dist/post-compaction-tracker.d.ts +133 -0
  31. package/dist/post-compaction-tracker.d.ts.map +1 -0
  32. package/dist/swarm-decompose.d.ts +30 -0
  33. package/dist/swarm-decompose.d.ts.map +1 -1
  34. package/dist/swarm-orchestrate.d.ts +23 -0
  35. package/dist/swarm-orchestrate.d.ts.map +1 -1
  36. package/dist/swarm-prompts.d.ts +25 -1
  37. package/dist/swarm-prompts.d.ts.map +1 -1
  38. package/dist/swarm.d.ts +19 -0
  39. package/dist/swarm.d.ts.map +1 -1
  40. package/evals/README.md +595 -94
  41. package/evals/compaction-prompt.eval.ts +149 -0
  42. package/evals/coordinator-behavior.eval.ts +8 -8
  43. package/evals/fixtures/compaction-prompt-cases.ts +305 -0
  44. package/evals/lib/compaction-loader.test.ts +248 -0
  45. package/evals/lib/compaction-loader.ts +320 -0
  46. package/evals/lib/data-loader.test.ts +345 -0
  47. package/evals/lib/data-loader.ts +107 -6
  48. package/evals/scorers/compaction-prompt-scorers.ts +145 -0
  49. package/evals/scorers/compaction-scorers.ts +13 -13
  50. package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
  51. package/evals/scorers/coordinator-discipline.ts +13 -13
  52. package/examples/plugin-wrapper-template.ts +177 -8
  53. package/package.json +7 -2
  54. package/scripts/migrate-unknown-sessions.ts +349 -0
  55. package/src/compaction-capture.integration.test.ts +257 -0
  56. package/src/compaction-hook.test.ts +139 -2
  57. package/src/compaction-hook.ts +113 -2
  58. package/src/compaction-prompt-scorers.test.ts +299 -0
  59. package/src/compaction-prompt-scoring.ts +298 -0
  60. package/src/eval-capture.test.ts +422 -0
  61. package/src/eval-capture.ts +94 -2
  62. package/src/eval-gates.test.ts +306 -0
  63. package/src/eval-gates.ts +218 -0
  64. package/src/eval-history.test.ts +508 -0
  65. package/src/eval-history.ts +214 -0
  66. package/src/eval-learning.test.ts +378 -0
  67. package/src/eval-learning.ts +360 -0
  68. package/src/index.ts +61 -1
  69. package/src/post-compaction-tracker.test.ts +251 -0
  70. package/src/post-compaction-tracker.ts +237 -0
  71. package/src/swarm-decompose.test.ts +40 -47
  72. package/src/swarm-decompose.ts +2 -2
  73. package/src/swarm-orchestrate.test.ts +270 -7
  74. package/src/swarm-orchestrate.ts +100 -13
  75. package/src/swarm-prompts.test.ts +121 -0
  76. package/src/swarm-prompts.ts +297 -4
  77. package/src/swarm-research.integration.test.ts +157 -0
  78. package/src/swarm-review.ts +3 -3
  79. /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
@@ -12,7 +12,7 @@ import {
12
12
  } from "./coordinator-discipline.js";
13
13
 
14
14
  describe("violationCount", () => {
15
- it("scores 1.0 for zero violations", () => {
15
+ it("scores 1.0 for zero violations", async () => {
16
16
  const session: CoordinatorSession = {
17
17
  session_id: "test-session",
18
18
  epic_id: "test-epic",
@@ -30,9 +30,10 @@ describe("violationCount", () => {
30
30
  ],
31
31
  };
32
32
 
33
- const result = violationCount.scorer({
33
+ const result = await violationCount({
34
34
  output: JSON.stringify(session),
35
35
  expected: {},
36
+ input: undefined,
36
37
  });
37
38
 
38
39
  expect(result.score).toBe(1.0);
@@ -270,14 +270,14 @@ export const timeToFirstSpawn = createScorer({
270
270
  export const overallDiscipline = createScorer({
271
271
  name: "Overall Coordinator Discipline",
272
272
  description: "Composite score for coordinator protocol adherence",
273
- scorer: ({ output, expected }) => {
273
+ scorer: async ({ output, expected, input }) => {
274
274
  try {
275
275
  // Run all scorers
276
276
  const scores = {
277
- violations: violationCount.scorer({ output, expected }),
278
- spawn: spawnEfficiency.scorer({ output, expected }),
279
- review: reviewThoroughness.scorer({ output, expected }),
280
- speed: timeToFirstSpawn.scorer({ output, expected }),
277
+ violations: await violationCount({ output, expected, input }),
278
+ spawn: await spawnEfficiency({ output, expected, input }),
279
+ review: await reviewThoroughness({ output, expected, input }),
280
+ speed: await timeToFirstSpawn({ output, expected, input }),
281
281
  };
282
282
 
283
283
  // Weighted average
@@ -289,16 +289,16 @@ export const overallDiscipline = createScorer({
289
289
  };
290
290
 
291
291
  const totalScore =
292
- scores.violations.score * weights.violations +
293
- scores.spawn.score * weights.spawn +
294
- scores.review.score * weights.review +
295
- scores.speed.score * weights.speed;
292
+ (scores.violations.score ?? 0) * weights.violations +
293
+ (scores.spawn.score ?? 0) * weights.spawn +
294
+ (scores.review.score ?? 0) * weights.review +
295
+ (scores.speed.score ?? 0) * weights.speed;
296
296
 
297
297
  const details = [
298
- `Violations: ${(scores.violations.score * 100).toFixed(0)}%`,
299
- `Spawn: ${(scores.spawn.score * 100).toFixed(0)}%`,
300
- `Review: ${(scores.review.score * 100).toFixed(0)}%`,
301
- `Speed: ${(scores.speed.score * 100).toFixed(0)}%`,
298
+ `Violations: ${((scores.violations.score ?? 0) * 100).toFixed(0)}%`,
299
+ `Spawn: ${((scores.spawn.score ?? 0) * 100).toFixed(0)}%`,
300
+ `Review: ${((scores.review.score ?? 0) * 100).toFixed(0)}%`,
301
+ `Speed: ${((scores.speed.score ?? 0) * 100).toFixed(0)}%`,
302
302
  ].join(", ");
303
303
 
304
304
  return {
@@ -65,6 +65,42 @@ function logCompaction(
65
65
  }
66
66
  }
67
67
 
68
+ /**
69
+ * Capture compaction event for evals (non-fatal dynamic import)
70
+ *
71
+ * Uses dynamic import to avoid circular dependencies and keep the plugin wrapper
72
+ * self-contained. Captures COMPACTION events to session JSONL for eval analysis.
73
+ *
74
+ * @param sessionID - Session ID
75
+ * @param epicID - Epic ID (or "unknown" if not detected)
76
+ * @param compactionType - Event type (detection_complete, prompt_generated, context_injected)
77
+ * @param payload - Event-specific data (full prompts, detection results, etc.)
78
+ */
79
+ async function captureCompaction(
80
+ sessionID: string,
81
+ epicID: string,
82
+ compactionType: "detection_complete" | "prompt_generated" | "context_injected",
83
+ payload: any,
84
+ ): Promise<void> {
85
+ try {
86
+ // Dynamic import to avoid circular deps (plugin wrapper → src → plugin wrapper)
87
+ const { captureCompactionEvent } = await import("../src/eval-capture");
88
+ captureCompactionEvent({
89
+ session_id: sessionID,
90
+ epic_id: epicID,
91
+ compaction_type: compactionType,
92
+ payload,
93
+ });
94
+ } catch (err) {
95
+ // Non-fatal - capture failures shouldn't break compaction
96
+ logCompaction("warn", "compaction_capture_failed", {
97
+ session_id: sessionID,
98
+ compaction_type: compactionType,
99
+ error: err instanceof Error ? err.message : String(err),
100
+ });
101
+ }
102
+ }
103
+
68
104
  // Module-level project directory - set during plugin initialization
69
105
  // This is CRITICAL: without it, the CLI uses process.cwd() which may be wrong
70
106
  let projectDirectory: string = process.cwd();
@@ -305,6 +341,34 @@ const hive_sync = tool({
305
341
  execute: (args, ctx) => execTool("hive_sync", args, ctx),
306
342
  });
307
343
 
344
+ const hive_cells = tool({
345
+ description: `Query cells from the hive database with flexible filtering.
346
+
347
+ USE THIS TOOL TO:
348
+ - List all open cells: hive_cells()
349
+ - Find cells by status: hive_cells({ status: "in_progress" })
350
+ - Find cells by type: hive_cells({ type: "bug" })
351
+ - Get a specific cell by partial ID: hive_cells({ id: "mjkmd" })
352
+ - Get the next ready (unblocked) cell: hive_cells({ ready: true })
353
+ - Combine filters: hive_cells({ status: "open", type: "task" })
354
+
355
+ RETURNS: Array of cells with id, title, status, priority, type, parent_id, created_at, updated_at
356
+
357
+ PREFER THIS OVER hive_query when you need to:
358
+ - See what work is available
359
+ - Check status of multiple cells
360
+ - Find cells matching criteria
361
+ - Look up a cell by partial ID`,
362
+ args: {
363
+ id: tool.schema.string().optional().describe("Partial or full cell ID to look up"),
364
+ status: tool.schema.enum(["open", "in_progress", "blocked", "closed"]).optional().describe("Filter by status"),
365
+ type: tool.schema.enum(["task", "bug", "feature", "epic", "chore"]).optional().describe("Filter by type"),
366
+ ready: tool.schema.boolean().optional().describe("If true, return only the next unblocked cell"),
367
+ limit: tool.schema.number().optional().describe("Max cells to return (default 20)"),
368
+ },
369
+ execute: (args, ctx) => execTool("hive_cells", args, ctx),
370
+ });
371
+
308
372
  const beads_link_thread = tool({
309
373
  description: "Add metadata linking bead to Agent Mail thread",
310
374
  args: {
@@ -1202,9 +1266,18 @@ ${JSON.stringify(snapshot, null, 2)}
1202
1266
 
1203
1267
  Generate a prompt following this structure:
1204
1268
 
1269
+ ┌─────────────────────────────────────────────────────────────┐
1270
+ │ │
1271
+ │ 🐝 YOU ARE THE COORDINATOR 🐝 │
1272
+ │ │
1273
+ │ NOT A WORKER. NOT AN IMPLEMENTER. │
1274
+ │ YOU ORCHESTRATE. │
1275
+ │ │
1276
+ └─────────────────────────────────────────────────────────────┘
1277
+
1205
1278
  # 🐝 Swarm Continuation - [Epic Title or "Unknown"]
1206
1279
 
1207
- You are resuming coordination of an active swarm that was interrupted by context compaction.
1280
+ **NON-NEGOTIABLE: YOU ARE THE COORDINATOR.** You resumed after context compaction.
1208
1281
 
1209
1282
  ## Epic State
1210
1283
 
@@ -1231,15 +1304,29 @@ You are resuming coordination of an active swarm that was interrupted by context
1231
1304
 
1232
1305
  [List 3-5 concrete actions with actual commands, using real IDs from the state]
1233
1306
 
1234
- ## Coordinator Reminders
1307
+ ## 🎯 COORDINATOR MANDATES (NON-NEGOTIABLE)
1308
+
1309
+ **YOU ARE THE COORDINATOR. NOT A WORKER.**
1310
+
1311
+ ### ⛔ FORBIDDEN - NEVER do these:
1312
+ - ❌ NEVER use \`edit\`, \`write\`, or \`bash\` for implementation - SPAWN A WORKER
1313
+ - ❌ NEVER fetch directly with \`repo-crawl_*\`, \`repo-autopsy_*\`, \`webfetch\`, \`fetch_fetch\` - SPAWN A RESEARCHER
1314
+ - ❌ NEVER use \`context7_*\` or \`pdf-brain_*\` directly - SPAWN A RESEARCHER
1315
+ - ❌ NEVER reserve files - Workers reserve files
1316
+
1317
+ ### ✅ ALWAYS do these:
1318
+ - ✅ ALWAYS check \`swarm_status\` and \`swarmmail_inbox\` first
1319
+ - ✅ ALWAYS use \`swarm_spawn_subtask\` for implementation work
1320
+ - ✅ ALWAYS use \`swarm_spawn_researcher\` for external data fetching
1321
+ - ✅ ALWAYS review worker output with \`swarm_review\` → \`swarm_review_feedback\`
1322
+ - ✅ ALWAYS monitor actively - Check messages every ~10 minutes
1323
+ - ✅ ALWAYS unblock aggressively - Resolve dependencies immediately
1235
1324
 
1236
- - **You are the coordinator** - Don't wait for instructions, orchestrate
1237
- - **Monitor actively** - Check messages every ~10 minutes
1238
- - **Unblock aggressively** - Resolve dependencies immediately
1239
- - **Review thoroughly** - 3-strike rule enforced
1240
- - **Ship it** - When all subtasks done, close the epic
1325
+ **If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
1241
1326
 
1242
- Keep the prompt concise but actionable. Use actual data from the snapshot, not placeholders.`;
1327
+ **3-strike rule enforced:** Workers get 3 review attempts. After 3 rejections, escalate to human.
1328
+
1329
+ Keep the prompt concise but actionable. Use actual data from the snapshot, not placeholders. Include the ASCII header and ALL coordinator mandates.`;
1243
1330
 
1244
1331
  logCompaction("debug", "generate_compaction_prompt_calling_llm", {
1245
1332
  session_id: snapshot.sessionID,
@@ -1896,6 +1983,7 @@ const SwarmPlugin: Plugin = async (
1896
1983
  hive_close,
1897
1984
  hive_start,
1898
1985
  hive_ready,
1986
+ hive_cells,
1899
1987
  hive_sync,
1900
1988
  beads_link_thread,
1901
1989
  // Swarm Mail (Embedded)
@@ -2100,6 +2188,25 @@ const SwarmPlugin: Plugin = async (
2100
2188
  full_snapshot: snapshot, // Log the entire snapshot
2101
2189
  });
2102
2190
 
2191
+ // =======================================================================
2192
+ // CAPTURE POINT 1: Detection complete - record confidence and reasons
2193
+ // =======================================================================
2194
+ await captureCompaction(
2195
+ input.sessionID,
2196
+ snapshot.epic?.id || "unknown",
2197
+ "detection_complete",
2198
+ {
2199
+ confidence: snapshot.detection.confidence,
2200
+ detected: detection.detected,
2201
+ reasons: snapshot.detection.reasons,
2202
+ session_scan_contributed: sessionScan.swarmDetected,
2203
+ session_scan_reasons: sessionScan.reasons,
2204
+ epic_id: snapshot.epic?.id,
2205
+ epic_title: snapshot.epic?.title,
2206
+ subtask_count: snapshot.epic?.subtasks?.length ?? 0,
2207
+ },
2208
+ );
2209
+
2103
2210
  // Level 2: Generate prompt with LLM
2104
2211
  const llmStart = Date.now();
2105
2212
  const llmPrompt = await generateCompactionPrompt(snapshot);
@@ -2113,6 +2220,23 @@ const SwarmPlugin: Plugin = async (
2113
2220
  prompt_preview: llmPrompt?.substring(0, 500),
2114
2221
  });
2115
2222
 
2223
+ // =======================================================================
2224
+ // CAPTURE POINT 2: Prompt generated - record FULL prompt content
2225
+ // =======================================================================
2226
+ if (llmPrompt) {
2227
+ await captureCompaction(
2228
+ input.sessionID,
2229
+ snapshot.epic?.id || "unknown",
2230
+ "prompt_generated",
2231
+ {
2232
+ prompt_length: llmPrompt.length,
2233
+ full_prompt: llmPrompt, // FULL content, not truncated
2234
+ context_type: "llm_generated",
2235
+ duration_ms: llmDuration,
2236
+ },
2237
+ );
2238
+ }
2239
+
2116
2240
  if (llmPrompt) {
2117
2241
  // SUCCESS: Use LLM-generated prompt
2118
2242
  const header = `[Swarm compaction: LLM-generated, ${detection.reasons.join(", ")}]\n\n`;
@@ -2136,6 +2260,21 @@ const SwarmPlugin: Plugin = async (
2136
2260
  });
2137
2261
  }
2138
2262
 
2263
+ // =======================================================================
2264
+ // CAPTURE POINT 3a: Context injected (LLM path) - record FULL content
2265
+ // =======================================================================
2266
+ await captureCompaction(
2267
+ input.sessionID,
2268
+ snapshot.epic?.id || "unknown",
2269
+ "context_injected",
2270
+ {
2271
+ full_content: fullContent, // FULL content, not truncated
2272
+ content_length: fullContent.length,
2273
+ injection_method: "prompt" in output ? "output.prompt" : "output.context.push",
2274
+ context_type: "llm_generated",
2275
+ },
2276
+ );
2277
+
2139
2278
  const totalDuration = Date.now() - startTime;
2140
2279
  logCompaction("info", "compaction_complete_llm_success", {
2141
2280
  session_id: input.sessionID,
@@ -2171,6 +2310,21 @@ const SwarmPlugin: Plugin = async (
2171
2310
  const staticContent = header + SWARM_COMPACTION_CONTEXT;
2172
2311
  output.context.push(staticContent);
2173
2312
 
2313
+ // =======================================================================
2314
+ // CAPTURE POINT 3b: Context injected (static fallback) - record FULL content
2315
+ // =======================================================================
2316
+ await captureCompaction(
2317
+ input.sessionID,
2318
+ "unknown", // No snapshot available in this path
2319
+ "context_injected",
2320
+ {
2321
+ full_content: staticContent,
2322
+ content_length: staticContent.length,
2323
+ injection_method: "output.context.push",
2324
+ context_type: "static_swarm_context",
2325
+ },
2326
+ );
2327
+
2174
2328
  const totalDuration = Date.now() - startTime;
2175
2329
  logCompaction("info", "compaction_complete_static_fallback", {
2176
2330
  session_id: input.sessionID,
@@ -2186,6 +2340,21 @@ const SwarmPlugin: Plugin = async (
2186
2340
  const fallbackContent = header + SWARM_DETECTION_FALLBACK;
2187
2341
  output.context.push(fallbackContent);
2188
2342
 
2343
+ // =======================================================================
2344
+ // CAPTURE POINT 3c: Context injected (detection fallback) - record FULL content
2345
+ // =======================================================================
2346
+ await captureCompaction(
2347
+ input.sessionID,
2348
+ "unknown", // No snapshot for low confidence
2349
+ "context_injected",
2350
+ {
2351
+ full_content: fallbackContent,
2352
+ content_length: fallbackContent.length,
2353
+ injection_method: "output.context.push",
2354
+ context_type: "detection_fallback",
2355
+ },
2356
+ );
2357
+
2189
2358
  const totalDuration = Date.now() - startTime;
2190
2359
  logCompaction("info", "compaction_complete_detection_fallback", {
2191
2360
  session_id: input.sessionID,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-swarm-plugin",
3
- "version": "0.37.0",
3
+ "version": "0.39.1",
4
4
  "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -30,6 +30,11 @@
30
30
  "test:all": "bun test --timeout 60000 src/",
31
31
  "test:watch": "bun test --watch src/",
32
32
  "typecheck": "tsc --noEmit",
33
+ "eval:run": "bun --env-file=.env run bunx evalite run evals/",
34
+ "eval:decomposition": "bun --env-file=.env run bunx evalite run evals/swarm-decomposition.eval.ts",
35
+ "eval:coordinator": "bun --env-file=.env run bunx evalite run evals/coordinator-session.eval.ts",
36
+ "eval:compaction": "bun --env-file=.env run bunx evalite run evals/compaction-prompt.eval.ts",
37
+ "migrate:sessions": "bun run scripts/migrate-unknown-sessions.ts",
33
38
  "postinstall": "node -e \"console.log('\\n\\x1b[33m Run \\x1b[36mswarm setup\\x1b[33m to configure OpenCode integration\\x1b[0m\\n')\""
34
39
  },
35
40
  "dependencies": {
@@ -41,7 +46,7 @@
41
46
  "minimatch": "^10.1.1",
42
47
  "pino": "^9.6.0",
43
48
  "pino-roll": "^1.3.0",
44
- "swarm-mail": "1.5.1",
49
+ "swarm-mail": "1.5.2",
45
50
  "yaml": "^2.8.2",
46
51
  "zod": "4.1.8"
47
52
  },