npm - opencode-swarm-plugin - Versions diffs - 0.42.0 → 0.42.1 - Mend

opencode-swarm-plugin 0.42.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/.hive/issues.jsonl +7 -5
package/.turbo/turbo-build.log +2 -2
package/CHANGELOG.md +10 -0
package/bin/swarm.serve.test.ts +46 -0
package/bin/swarm.ts +61 -0
package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
package/evals/scorers/coordinator-discipline.ts +0 -70
package/package.json +1 -1
package/src/compaction-prompt-scorers.test.ts +175 -0

package/.hive/issues.jsonl CHANGED Viewed

@@ -44,10 +44,6 @@
 {"id":"opencode-swarm-plugin--ys7z8-mjlk7jspacf","title":"Audit session data quality and filtering","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:50:08.761Z","updated_at":"2025-12-25T14:59:53.284Z","closed_at":"2025-12-25T14:59:53.284Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjlk7jsrvls","title":"Analyze scorer implementations and scoring patterns","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:50:08.763Z","updated_at":"2025-12-25T14:59:54.612Z","closed_at":"2025-12-25T14:59:54.612Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjlk7jstvch","title":"Synthesize findings and propose improvements","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T14:50:08.765Z","updated_at":"2025-12-25T15:04:46.898Z","closed_at":"2025-12-25T15:04:46.898Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
-{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","title":"P0 Eval Fixes","description":"Immediate fixes from eval audit. Target: restore eval health (0%→100%, 53%→70%), remove 250 LOC dead code.","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T15:42:19.671Z","updated_at":"2025-12-25T15:42:19.671Z","dependencies":[],"labels":[],"comments":[]}
-{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmlu3m","title":"Fix example.eval.ts data/task mismatch","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.677Z","updated_at":"2025-12-25T15:42:19.677Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
-{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmont1","title":"Fix compaction-prompt case-sensitive regex and missing tools","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.680Z","updated_at":"2025-12-25T15:42:19.680Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
-{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmt1kq","title":"Remove 4 unused coordinator scorers","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.685Z","updated_at":"2025-12-25T15:42:19.685Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","title":"Eval System Improvements: Tool + Event Capture + Scorers","description":"Improve eval system with:\n1. Plugin tool for running evals (eval_run)\n2. Capture decomposition_complete events\n3. Capture VIOLATION events\n4. Improve compaction prompt structure\n5. Add review efficiency scorer\n6. Enforce knowledge gathering validation\n\nTarget: 70% → 85% overall eval score","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T05:28:16.999Z","updated_at":"2025-12-25T16:06:41.043Z","closed_at":"2025-12-25T16:06:41.043Z","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjl04znlxzw","title":"Improve compaction prompt structure","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T05:28:17.025Z","updated_at":"2025-12-25T16:06:31.435Z","closed_at":"2025-12-25T16:06:31.435Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjlnn93f5t1","title":"Eval-Driven Improvement Flywheel","description":"Wire up the complete eval improvement feedback loop:\n1. Connect eval-runner to eval-history (recordEvalRun)\n2. Add checkGate + learnFromEvalFailure integration\n3. Add eval:gate script for CI\n4. Inject failures into coordinator prompts\n5. Add GitHub Actions workflow\n\nGoal: Evals drive real improvement over time via automated regression detection and learning.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T16:26:20.235Z","updated_at":"2025-12-25T16:49:21.513Z","closed_at":"2025-12-25T16:49:21.513Z","dependencies":[],"labels":[],"comments":[]}
@@ -58,7 +54,6 @@
 {"id":"opencode-swarm-plugin--ys7z8-mjlnn9412oc","title":"Add GitHub Actions eval workflow","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T16:26:20.257Z","updated_at":"2025-12-25T16:49:12.694Z","closed_at":"2025-12-25T16:49:12.694Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlnn93f5t1","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjljadmw66u","title":"Research: Swarm Coordination (decomposition, orchestration, review, worktree)","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.128Z","updated_at":"2025-12-25T16:50:12.242Z","closed_at":"2025-12-25T16:50:12.242Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjljadn7knk","title":"Research: Mandates, Guardrails & Structured Output","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.139Z","updated_at":"2025-12-25T16:50:13.275Z","closed_at":"2025-12-25T16:50:13.275Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
-{"id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","title":"Swarm O11y & Eval Insights Pipeline","description":"Comprehensive observability and eval insights for swarm coordination. Fixes data capture gaps, adds CLI commands for visibility, injects insights into prompts, and creates a real-time dashboard.\n\nGoals:\n1. All swarm events captured (decomposition, outcomes, reviews, failures)\n2. CLI commands: `swarm stats`, `swarm history` for human visibility\n3. Prompt injection: surface insights to coordinators/workers\n4. TanStack Start dashboard with real-time streaming\n\nDatabase: ~/.config/swarm-tools/swarm.db (libSQL)\nSessions: ~/.config/swarm-tools/sessions/*.jsonl\nExisting analytics: swarm-mail/src/analytics.ts","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T19:20:20.054Z","updated_at":"2025-12-25T19:20:20.054Z","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjltv0j8x4n","title":"Audit & fix data capture gaps","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T19:20:20.084Z","updated_at":"2025-12-25T19:30:57.891Z","closed_at":"2025-12-25T19:30:57.891Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjltv0jcpjx","title":"Add swarm stats CLI command","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T19:20:20.088Z","updated_at":"2025-12-25T19:30:59.853Z","closed_at":"2025-12-25T19:30:59.853Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjltv0jhsd6","title":"Add swarm history CLI command","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T19:20:20.093Z","updated_at":"2025-12-25T19:31:01.913Z","closed_at":"2025-12-25T19:31:01.913Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
@@ -102,3 +97,10 @@
 {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzdnf2","title":"Events pane with live tail","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T19:58:48.841Z","updated_at":"2025-12-25T20:18:33.574Z","closed_at":"2025-12-25T20:18:33.574Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzi0bv","title":"Cells pane with tree view","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T19:58:48.846Z","updated_at":"2025-12-25T20:18:34.562Z","closed_at":"2025-12-25T20:18:34.562Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
 {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzxyee","title":"Main layout with keyboard navigation","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-25T19:58:48.861Z","updated_at":"2025-12-25T20:24:43.227Z","closed_at":"2025-12-25T20:24:43.227Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","title":"P0 Eval Fixes","description":"Immediate fixes from eval audit. Target: restore eval health (0%→100%, 53%→70%), remove 250 LOC dead code.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T15:42:19.671Z","updated_at":"2025-12-25T20:40:14.672Z","closed_at":"2025-12-25T20:40:14.672Z","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmlu3m","title":"Fix example.eval.ts data/task mismatch","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.677Z","updated_at":"2025-12-25T20:40:00.437Z","closed_at":"2025-12-25T20:40:00.437Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmont1","title":"Fix compaction-prompt case-sensitive regex and missing tools","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.680Z","updated_at":"2025-12-25T20:40:02.213Z","closed_at":"2025-12-25T20:40:02.213Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlm2nmt1kq","title":"Remove 4 unused coordinator scorers","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.685Z","updated_at":"2025-12-25T20:40:03.974Z","closed_at":"2025-12-25T20:40:03.974Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","title":"Swarm O11y & Eval Insights Pipeline","description":"Comprehensive observability and eval insights for swarm coordination.\n\nCOMPLETED:\n1. ✅ `swarm serve` command - starts SSE server on configurable port\n2. ✅ Dashboard panes wired to real data (CellsPane, AgentsPane)\n3. ✅ Vite + React dashboard with SSE hooks\n\nREMAINING:\n- GET /cells endpoint on server (dashboard blocked on this)\n- Fix dashboard test fixtures for mock server\n- CLI commands: `swarm stats`, `swarm history`\n- Prompt injection for insights","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T19:20:20.054Z","updated_at":"2025-12-25T20:40:19.433Z","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlwcoh8ut2","title":"Add `swarm serve` command to start SSE server","description":"Add a `serve` subcommand to bin/swarm.ts that starts the DurableStreamServer on a configurable port (default 3001).\n\nFiles: bin/swarm.ts\n\nImplementation:\n1. Add `serve` case to the switch statement\n2. Import createDurableStreamServer from swarm-mail\n3. Start server with adapter from getSwarmMailLibSQL()\n4. Print URL to console\n5. Keep process alive\n\nExample usage: `swarm serve --port 3001`\n\nThe dashboard at localhost:5173 will connect to this SSE endpoint.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T20:30:03.500Z","updated_at":"2025-12-25T20:40:05.728Z","closed_at":"2025-12-25T20:40:05.728Z","dependencies":[],"labels":[],"comments":[]}
+{"id":"opencode-swarm-plugin--ys7z8-mjlwcslohuv","title":"Wire dashboard panes to real swarm-mail data","description":"Connect CellsPane and AgentsPane to real data from the SSE server.\n\nFiles: \n- packages/swarm-dashboard/src/lib/api.ts\n- packages/swarm-dashboard/src/components/CellsPane.tsx\n- packages/swarm-dashboard/src/components/AgentsPane.tsx\n- packages/swarm-dashboard/src/hooks/useSwarmEvents.ts\n\nImplementation:\n1. Update api.ts to fetch cells from hive (can use REST endpoint or derive from events)\n2. Update CellsPane to use real cell data instead of mock\n3. Update AgentsPane to derive agent list from SSE events (agent_registered events)\n4. Ensure useSwarmEvents properly accumulates agent state\n\nThe SSE server runs at localhost:3001 (from `swarm serve` command).","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T20:30:08.844Z","updated_at":"2025-12-25T20:40:07.548Z","closed_at":"2025-12-25T20:40:07.548Z","dependencies":[],"labels":[],"comments":[]}

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,9 +1,9 @@
 $ bun build ./src/index.ts --outdir ./dist --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && bun build ./src/plugin.ts --outfile ./dist/plugin.js --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && tsc
-Bundled 1348 modules in 205ms
+Bundled 1348 modules in 198ms
   index.js  4.33 MB  (entry point)
-Bundled 1349 modules in 196ms
+Bundled 1349 modules in 190ms
   plugin.js  4.30 MB  (entry point)

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # opencode-swarm-plugin
+## 0.42.1
+### Patch Changes
+- [`f6707d5`](https://github.com/joelhooks/swarm-tools/commit/f6707d53eb92021b6976212e903994c98c798483) Thanks [@joelhooks](https://github.com/joelhooks)! - ## 🐦 @swarmtoolsai Now Tweets Releases
+  Automated release announcements are live! When packages publish to npm, Claude summarizes the changelog into a tweet and posts from @swarmtoolsai.
+  No more manual "hey we shipped" posts - the bees handle it now.
 ## 0.42.0
 ### Minor Changes

package/bin/swarm.serve.test.ts ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * Tests for `swarm serve` command
+ */
+import { describe, test, expect } from "bun:test";
+import { spawn } from "bun";
+describe("swarm serve command", () => {
+  test("serve command accepts custom port via --port flag", () => {
+    // Verify that CLI parsing works for custom port
+    const args = ["serve", "--port", "8080"];
+    const port = args.includes("--port")
+      ? Number.parseInt(args[args.indexOf("--port") + 1])
+      : 3001;
+    expect(port).toBe(8080);
+  });
+  test("serve command defaults to port 3001", () => {
+    const args = ["serve"];
+    const port = args.includes("--port")
+      ? Number.parseInt(args[args.indexOf("--port") + 1])
+      : 3001;
+    expect(port).toBe(3001);
+  });
+  test("serve command uses project path from CWD", () => {
+    const projectPath = process.cwd();
+    expect(projectPath).toBeDefined();
+    expect(typeof projectPath).toBe("string");
+  });
+  test("serve command appears in help text", async () => {
+    const proc = spawn(["bun", "run", "bin/swarm.ts", "help"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    const output = await new Response(proc.stdout).text();
+    expect(output).toContain("swarm serve");
+    expect(output).toContain("Start SSE server");
+    expect(output).toContain("--port");
+  });
+});

package/bin/swarm.ts CHANGED Viewed

@@ -2518,6 +2518,8 @@ ${cyan("Commands:")}
   swarm config    Show paths to generated config files
   swarm agents    Update AGENTS.md with skill awareness
   swarm migrate   Migrate PGlite database to libSQL
+  swarm serve     Start SSE server for real-time event streaming
+    --port <n>          Port to listen on (default: 3001)
   swarm cells     List or get cells from database (replaces 'swarm tool hive_query')
   swarm log       View swarm logs with filtering
   swarm stats     Show swarm health metrics and success rates
@@ -4488,6 +4490,62 @@ async function evalRun() {
   }
 }
+// ============================================================================
+// Serve Command - Start SSE Server
+// ============================================================================
+async function serve() {
+  p.intro("swarm serve v" + VERSION);
+  // Parse --port flag (default 3001)
+  const portFlagIndex = process.argv.indexOf("--port");
+  const port = portFlagIndex !== -1
+    ? Number.parseInt(process.argv[portFlagIndex + 1]) || 3001
+    : 3001;
+  const projectPath = process.cwd();
+  p.log.step("Starting DurableStreamServer...");
+  p.log.message(dim(`  Project: ${projectPath}`));
+  p.log.message(dim(`  Port: ${port}`));
+  try {
+    // Import dependencies
+    const { getSwarmMailLibSQL } = await import("swarm-mail");
+    const { createDurableStreamAdapter, createDurableStreamServer } = await import("swarm-mail");
+    // Get swarm-mail adapter
+    const swarmMail = await getSwarmMailLibSQL(projectPath);
+    // Create stream adapter
+    const streamAdapter = createDurableStreamAdapter(swarmMail, projectPath);
+    // Create and start server
+    const server = createDurableStreamServer({
+      adapter: streamAdapter,
+      port,
+      projectKey: projectPath,
+    });
+    await server.start();
+    p.log.success("Server started!");
+    p.log.message("");
+    p.log.message(cyan("  Dashboard: http://localhost:5173"));
+    p.log.message(cyan(`  SSE Endpoint: ${server.url}/streams/${encodeURIComponent(projectPath)}`));
+    p.log.message("");
+    p.log.message(dim("  Press Ctrl+C to stop"));
+    // Keep process alive
+    await new Promise(() => {});
+  } catch (error) {
+    p.log.error("Failed to start server");
+    p.log.message(error instanceof Error ? error.message : String(error));
+    p.outro("Aborted");
+    process.exit(1);
+  }
+}
 // ============================================================================
 // Main
 // ============================================================================
@@ -4510,6 +4568,9 @@ switch (command) {
   case "config":
     config();
     break;
+  case "serve":
+    await serve();
+    break;
   case "update":
     await update();
     break;

package/evals/scorers/coordinator-discipline.evalite-test.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import { describe, expect, it } from "bun:test";
 import type { CoordinatorSession } from "../../src/eval-capture.js";
 import {
 	overallDiscipline,
-	reviewEfficiency,
 	reviewThoroughness,
 	spawnEfficiency,
 	timeToFirstSpawn,
@@ -537,164 +537,3 @@ describe("overallDiscipline", () => {
 	});
 });
-describe("reviewEfficiency", () => {
-	it("scores 1.0 for ideal 1:1 ratio (one review per spawn)", async () => {
-		const session: CoordinatorSession = {
-			session_id: "test-session",
-			epic_id: "test-epic",
-			start_time: "2025-01-01T00:00:00Z",
-			events: [
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:10Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-1" },
-				},
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:20Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-2" },
-				},
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:10:00Z",
-					event_type: "DECISION",
-					decision_type: "review_completed",
-					payload: { bead_id: "bd-1" },
-				},
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:10:10Z",
-					event_type: "DECISION",
-					decision_type: "review_completed",
-					payload: { bead_id: "bd-2" },
-				},
-			],
-		};
-		const result = await reviewEfficiency({
-			output: JSON.stringify(session),
-			expected: {},
-			input: undefined,
-		});
-		expect(result.score).toBe(1.0);
-		expect(result.message).toContain("2 reviews / 2 spawns");
-	});
-	it("penalizes over-reviewing (>2:1 ratio)", async () => {
-		// 6 reviews for 2 spawns = 3:1 ratio (over-reviewing)
-		const session: CoordinatorSession = {
-			session_id: "test-session",
-			epic_id: "test-epic",
-			start_time: "2025-01-01T00:00:00Z",
-			events: [
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:10Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-1" },
-				},
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:20Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-2" },
-				},
-				...Array.from({ length: 6 }, (_, i) => ({
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: `2025-01-01T00:10:${String(i * 10).padStart(2, "0")}Z`,
-					event_type: "DECISION" as const,
-					decision_type: "review_completed" as const,
-					payload: { bead_id: `bd-${(i % 2) + 1}` },
-				})),
-			],
-		};
-		const result = await reviewEfficiency({
-			output: JSON.stringify(session),
-			expected: {},
-			input: undefined,
-		});
-		// 3:1 ratio should be penalized (score < 0.5)
-		expect(result.score).toBeLessThan(0.5);
-		expect(result.message).toContain("6 reviews / 2 spawns");
-	});
-	it("handles no spawns gracefully", async () => {
-		const session: CoordinatorSession = {
-			session_id: "test-session",
-			epic_id: "test-epic",
-			start_time: "2025-01-01T00:00:00Z",
-			events: [
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:00Z",
-					event_type: "DECISION",
-					decision_type: "strategy_selected",
-					payload: { strategy: "file-based" },
-				},
-			],
-		};
-		const result = await reviewEfficiency({
-			output: JSON.stringify(session),
-			expected: {},
-			input: undefined,
-		});
-		expect(result.score).toBe(1.0);
-		expect(result.message).toContain("No workers spawned");
-	});
-	it("handles no reviews gracefully (0:N ratio)", async () => {
-		const session: CoordinatorSession = {
-			session_id: "test-session",
-			epic_id: "test-epic",
-			start_time: "2025-01-01T00:00:00Z",
-			events: [
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:10Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-1" },
-				},
-				{
-					session_id: "test-session",
-					epic_id: "test-epic",
-					timestamp: "2025-01-01T00:00:20Z",
-					event_type: "DECISION",
-					decision_type: "worker_spawned",
-					payload: { bead_id: "bd-2" },
-				},
-			],
-		};
-		const result = await reviewEfficiency({
-			output: JSON.stringify(session),
-			expected: {},
-			input: undefined,
-		});
-		// No reviews is bad (should use reviewThoroughness for this)
-		// But this scorer focuses on over-reviewing, so no reviews = 1.0 (not over-reviewing)
-		expect(result.score).toBe(1.0);
-		expect(result.message).toContain("0 reviews / 2 spawns");
-	});
-});

package/evals/scorers/coordinator-discipline.ts CHANGED Viewed

@@ -132,76 +132,6 @@ export const spawnEfficiency = createScorer({
   },
 });
-/**
- * Review Efficiency Scorer
- *
- * Measures review-to-spawn ratio to detect over-reviewing.
- * Ideal ratio is 1:1 (one review per spawned worker).
- * Penalizes >2:1 ratio (over-reviewing wastes context).
- *
- * Scoring:
- * - 0:N or 1:1 ratio = 1.0 (perfect)
- * - 2:1 ratio = 0.5 (threshold)
- * - >2:1 ratio = linear penalty toward 0.0
- *
- * Score: normalized to 0-1 (lower ratio is better)
- */
-export const reviewEfficiency = createScorer({
-  name: "Review Efficiency",
-  description: "Review-to-spawn ratio (penalize over-reviewing >2:1)",
-  scorer: ({ output }) => {
-    try {
-      const session = JSON.parse(String(output)) as CoordinatorSession;
-      // Count worker_spawned events
-      const spawned = session.events.filter(
-        (e) =>
-          e.event_type === "DECISION" && e.decision_type === "worker_spawned"
-      ).length;
-      if (spawned === 0) {
-        return {
-          score: 1.0,
-          message: "No workers spawned",
-        };
-      }
-      // Count review_completed events
-      const reviewed = session.events.filter(
-        (e) =>
-          e.event_type === "DECISION" && e.decision_type === "review_completed"
-      ).length;
-      const ratio = reviewed / spawned;
-      // Scoring:
-      // - ratio <= 1.0: perfect (1.0)
-      // - ratio <= 2.0: linear decay from 1.0 to 0.5
-      // - ratio > 2.0: linear penalty from 0.5 toward 0.0
-      let score: number;
-      if (ratio <= 1.0) {
-        score = 1.0;
-      } else if (ratio <= 2.0) {
-        // Linear decay: 1.0 at ratio=1.0, 0.5 at ratio=2.0
-        score = 1.0 - (ratio - 1.0) * 0.5;
-      } else {
-        // Penalty for extreme over-reviewing: 0.5 at ratio=2.0, 0.0 at ratio=4.0
-        score = Math.max(0, 0.5 - (ratio - 2.0) * 0.25);
-      }
-      return {
-        score,
-        message: `${reviewed} reviews / ${spawned} spawns (${ratio.toFixed(1)}:1 ratio)`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to parse CoordinatorSession: ${error}`,
-      };
-    }
-  },
-});
 /**
  * Review Thoroughness Scorer
  *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-swarm-plugin",
-  "version": "0.42.0",
+  "version": "0.42.1",
   "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
   "type": "module",
   "main": "./dist/index.js",

package/src/compaction-prompt-scorers.test.ts CHANGED Viewed

@@ -3,6 +3,15 @@
  *
  * TDD approach - tests written FIRST to define scorer behavior
  * Tests the PURE scoring functions (not evalite wrappers)
+ *
+ * **Case-Sensitivity Verification**:
+ * All tool name regexes MUST be case-insensitive (/i flag) because:
+ * - LLMs generate inconsistent casing (Edit vs edit, Read vs read)
+ * - Fixtures contain mixed case examples
+ * - Scoring must be robust to case variations
+ *
+ * Fixed in commit adding /i flags to Edit, Write, bash patterns.
+ * Tests added to prevent regression.
  */
 import { describe, expect, test } from "bun:test";
@@ -15,6 +24,109 @@ import {
 	scorePostCompactionDiscipline,
 } from "./compaction-prompt-scoring.js";
+describe("Case-Insensitive Tool Detection (Regression Prevention)", () => {
+	test("all scorers handle mixed-case tool names correctly", () => {
+		// Real-world example with mixed casing from LLM output
+		const prompt: CompactionPrompt = {
+			content: `┌─────────────────────────────────────────┐
+│     YOU ARE THE COORDINATOR             │
+└─────────────────────────────────────────┘
+You are coordinating epic mjkw81rkq4c.
+## IMMEDIATE ACTIONS
+1. swarm_status(epic_id='mjkw81rkq4c', project_key='/path')
+2. swarmmail_inbox()
+## FORBIDDEN TOOLS
+NEVER use these tools - delegate to workers:
+- edit (file modifications)
+- write (file creation)
+- BASH (shell commands for file mods)
+- swarmmail_reserve (only workers)
+- git commit (workers handle)
+ALWAYS spawn workers for code changes.`,
+		};
+		// Epic ID detection should work
+		const epicResult = scoreEpicIdSpecificity(prompt);
+		expect(epicResult.score).toBe(1.0);
+		// Actionability should detect swarm_status
+		const actionResult = scoreActionability(prompt);
+		expect(actionResult.score).toBe(1.0);
+		// Coordinator identity should detect ASCII + NEVER/ALWAYS
+		const identityResult = scoreCoordinatorIdentity(prompt);
+		expect(identityResult.score).toBe(1.0);
+		// Forbidden tools should detect all 5 despite mixed case
+		const forbiddenResult = scoreForbiddenToolsPresent(prompt);
+		expect(forbiddenResult.score).toBe(1.0);
+		expect(forbiddenResult.message).toContain("All 5");
+		// Post-compaction discipline should detect swarm_status as first tool
+		const disciplineResult = scorePostCompactionDiscipline(prompt);
+		expect(disciplineResult.score).toBe(1.0);
+	});
+	test("forbidden tools scorer detects lowercase tool names", () => {
+		// Previously failed before /i flags were added
+		const prompt: CompactionPrompt = {
+			content: `Don't use: edit, write, bash, swarmmail_reserve, git commit`,
+		};
+		const result = scoreForbiddenToolsPresent(prompt);
+		// Should detect all 5 tools regardless of case
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("All 5");
+	});
+	test("forbidden tools scorer detects UPPERCASE tool names", () => {
+		const prompt: CompactionPrompt = {
+			content: `Forbidden: EDIT, WRITE, BASH, swarmmail_reserve, git commit`,
+		};
+		const result = scoreForbiddenToolsPresent(prompt);
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("All 5");
+	});
+	test("post-compaction discipline detects mixed-case first tools", () => {
+		const testCases = [
+			{ tool: "EDIT", shouldPass: false },
+			{ tool: "edit", shouldPass: false },
+			{ tool: "Edit", shouldPass: false },
+			{ tool: "WRITE", shouldPass: false },
+			{ tool: "write", shouldPass: false },
+			{ tool: "READ", shouldPass: false },
+			{ tool: "read", shouldPass: false },
+			{ tool: "swarm_status", shouldPass: true },
+			{ tool: "SWARM_STATUS", shouldPass: true },
+			{ tool: "swarmmail_inbox", shouldPass: true },
+		];
+		for (const { tool, shouldPass } of testCases) {
+			const prompt: CompactionPrompt = {
+				content: `1. ${tool}()`,
+			};
+			const result = scorePostCompactionDiscipline(prompt);
+			if (shouldPass) {
+				expect(result.score).toBe(1.0);
+			} else {
+				expect(result.score).toBe(0.0);
+			}
+		}
+	});
+});
 describe("epicIdSpecificity scorer", () => {
 	test("scores 1.0 for real epic IDs", () => {
 		const prompt: CompactionPrompt = {
@@ -218,6 +330,33 @@ describe("forbiddenToolsPresent scorer", () => {
 		expect(result.score).toBe(0.0);
 		expect(result.message).toContain("0/5");
 	});
+	test("scores 1.0 with lowercase forbidden tools (case-insensitive)", () => {
+		const prompt: CompactionPrompt = {
+			content: `🚫 FORBIDDEN TOOLS - NEVER call these:
+- edit (use swarm_spawn_subtask)
+- write (use swarm_spawn_subtask)
+- swarmmail_reserve (only workers reserve)
+- git commit (workers commit)
+- bash (for file modifications)`,
+		};
+		const result = scoreForbiddenToolsPresent(prompt);
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("All 5 forbidden tools");
+	});
+	test("scores correctly with mixed case forbidden tools", () => {
+		const prompt: CompactionPrompt = {
+			content: `Avoid: edit, Write, BASH`,
+		};
+		const result = scoreForbiddenToolsPresent(prompt);
+		expect(result.score).toBe(0.6);
+		expect(result.message).toContain("3/5");
+	});
 });
 describe("postCompactionDiscipline scorer", () => {
@@ -297,4 +436,40 @@ describe("postCompactionDiscipline scorer", () => {
 		expect(result.score).toBe(0.0);
 		expect(result.message).toContain("No tool");
 	});
+	test("scores 0.0 when first tool is lowercase 'read' (case-insensitive)", () => {
+		const prompt: CompactionPrompt = {
+			content: `1. read(file='src/index.ts')
+2. swarm_status()`,
+		};
+		const result = scorePostCompactionDiscipline(prompt);
+		expect(result.score).toBe(0.0);
+		expect(result.message).toContain("read");
+	});
+	test("scores 0.0 when first tool is lowercase 'edit'", () => {
+		const prompt: CompactionPrompt = {
+			content: `1. edit(file='src/auth.ts', ...)
+2. swarm_status()`,
+		};
+		const result = scorePostCompactionDiscipline(prompt);
+		expect(result.score).toBe(0.0);
+		expect(result.message).toContain("edit");
+	});
+	test("scores 0.0 when first tool is lowercase 'write'", () => {
+		const prompt: CompactionPrompt = {
+			content: `1. write(file='README.md', content='...')
+2. swarm_status()`,
+		};
+		const result = scorePostCompactionDiscipline(prompt);
+		expect(result.score).toBe(0.0);
+		expect(result.message).toContain("write");
+	});
 });