opencode-swarm-plugin 0.42.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,10 +44,6 @@
44
44
  {"id":"opencode-swarm-plugin--ys7z8-mjlk7jspacf","title":"Audit session data quality and filtering","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:50:08.761Z","updated_at":"2025-12-25T14:59:53.284Z","closed_at":"2025-12-25T14:59:53.284Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
45
45
  {"id":"opencode-swarm-plugin--ys7z8-mjlk7jsrvls","title":"Analyze scorer implementations and scoring patterns","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:50:08.763Z","updated_at":"2025-12-25T14:59:54.612Z","closed_at":"2025-12-25T14:59:54.612Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
46
46
  {"id":"opencode-swarm-plugin--ys7z8-mjlk7jstvch","title":"Synthesize findings and propose improvements","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T14:50:08.765Z","updated_at":"2025-12-25T15:04:46.898Z","closed_at":"2025-12-25T15:04:46.898Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlk7js9bt1","dependencies":[],"labels":[],"comments":[]}
47
- {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","title":"P0 Eval Fixes","description":"Immediate fixes from eval audit. Target: restore eval health (0%→100%, 53%→70%), remove 250 LOC dead code.","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T15:42:19.671Z","updated_at":"2025-12-25T15:42:19.671Z","dependencies":[],"labels":[],"comments":[]}
48
- {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmlu3m","title":"Fix example.eval.ts data/task mismatch","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.677Z","updated_at":"2025-12-25T15:42:19.677Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
49
- {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmont1","title":"Fix compaction-prompt case-sensitive regex and missing tools","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.680Z","updated_at":"2025-12-25T15:42:19.680Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
50
- {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmt1kq","title":"Remove 4 unused coordinator scorers","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.685Z","updated_at":"2025-12-25T15:42:19.685Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
51
47
  {"id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","title":"Eval System Improvements: Tool + Event Capture + Scorers","description":"Improve eval system with:\n1. Plugin tool for running evals (eval_run)\n2. Capture decomposition_complete events\n3. Capture VIOLATION events\n4. Improve compaction prompt structure\n5. Add review efficiency scorer\n6. Enforce knowledge gathering validation\n\nTarget: 70% → 85% overall eval score","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T05:28:16.999Z","updated_at":"2025-12-25T16:06:41.043Z","closed_at":"2025-12-25T16:06:41.043Z","dependencies":[],"labels":[],"comments":[]}
52
48
  {"id":"opencode-swarm-plugin--ys7z8-mjl04znlxzw","title":"Improve compaction prompt structure","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T05:28:17.025Z","updated_at":"2025-12-25T16:06:31.435Z","closed_at":"2025-12-25T16:06:31.435Z","parent_id":"opencode-swarm-plugin--ys7z8-mjl04zmvv7c","dependencies":[],"labels":[],"comments":[]}
53
49
  {"id":"opencode-swarm-plugin--ys7z8-mjlnn93f5t1","title":"Eval-Driven Improvement Flywheel","description":"Wire up the complete eval improvement feedback loop:\n1. Connect eval-runner to eval-history (recordEvalRun)\n2. Add checkGate + learnFromEvalFailure integration\n3. Add eval:gate script for CI\n4. Inject failures into coordinator prompts\n5. Add GitHub Actions workflow\n\nGoal: Evals drive real improvement over time via automated regression detection and learning.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T16:26:20.235Z","updated_at":"2025-12-25T16:49:21.513Z","closed_at":"2025-12-25T16:49:21.513Z","dependencies":[],"labels":[],"comments":[]}
@@ -58,7 +54,6 @@
58
54
  {"id":"opencode-swarm-plugin--ys7z8-mjlnn9412oc","title":"Add GitHub Actions eval workflow","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T16:26:20.257Z","updated_at":"2025-12-25T16:49:12.694Z","closed_at":"2025-12-25T16:49:12.694Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlnn93f5t1","dependencies":[],"labels":[],"comments":[]}
59
55
  {"id":"opencode-swarm-plugin--ys7z8-mjljadmw66u","title":"Research: Swarm Coordination (decomposition, orchestration, review, worktree)","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.128Z","updated_at":"2025-12-25T16:50:12.242Z","closed_at":"2025-12-25T16:50:12.242Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
60
56
  {"id":"opencode-swarm-plugin--ys7z8-mjljadn7knk","title":"Research: Mandates, Guardrails & Structured Output","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T14:24:21.139Z","updated_at":"2025-12-25T16:50:13.275Z","closed_at":"2025-12-25T16:50:13.275Z","parent_id":"opencode-swarm-plugin--ys7z8-mjljadmo9mg","dependencies":[],"labels":[],"comments":[]}
61
- {"id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","title":"Swarm O11y & Eval Insights Pipeline","description":"Comprehensive observability and eval insights for swarm coordination. Fixes data capture gaps, adds CLI commands for visibility, injects insights into prompts, and creates a real-time dashboard.\n\nGoals:\n1. All swarm events captured (decomposition, outcomes, reviews, failures)\n2. CLI commands: `swarm stats`, `swarm history` for human visibility\n3. Prompt injection: surface insights to coordinators/workers\n4. TanStack Start dashboard with real-time streaming\n\nDatabase: ~/.config/swarm-tools/swarm.db (libSQL)\nSessions: ~/.config/swarm-tools/sessions/*.jsonl\nExisting analytics: swarm-mail/src/analytics.ts","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T19:20:20.054Z","updated_at":"2025-12-25T19:20:20.054Z","dependencies":[],"labels":[],"comments":[]}
62
57
  {"id":"opencode-swarm-plugin--ys7z8-mjltv0j8x4n","title":"Audit & fix data capture gaps","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T19:20:20.084Z","updated_at":"2025-12-25T19:30:57.891Z","closed_at":"2025-12-25T19:30:57.891Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
63
58
  {"id":"opencode-swarm-plugin--ys7z8-mjltv0jcpjx","title":"Add swarm stats CLI command","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T19:20:20.088Z","updated_at":"2025-12-25T19:30:59.853Z","closed_at":"2025-12-25T19:30:59.853Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
64
59
  {"id":"opencode-swarm-plugin--ys7z8-mjltv0jhsd6","title":"Add swarm history CLI command","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-25T19:20:20.093Z","updated_at":"2025-12-25T19:31:01.913Z","closed_at":"2025-12-25T19:31:01.913Z","parent_id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","dependencies":[],"labels":[],"comments":[]}
@@ -102,3 +97,10 @@
102
97
  {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzdnf2","title":"Events pane with live tail","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T19:58:48.841Z","updated_at":"2025-12-25T20:18:33.574Z","closed_at":"2025-12-25T20:18:33.574Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
103
98
  {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzi0bv","title":"Cells pane with tree view","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T19:58:48.846Z","updated_at":"2025-12-25T20:18:34.562Z","closed_at":"2025-12-25T20:18:34.562Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
104
99
  {"id":"opencode-swarm-plugin--ys7z8-mjlv8hzxyee","title":"Main layout with keyboard navigation","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-25T19:58:48.861Z","updated_at":"2025-12-25T20:24:43.227Z","closed_at":"2025-12-25T20:24:43.227Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlv8hy1tzf","dependencies":[],"labels":[],"comments":[]}
100
+ {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","title":"P0 Eval Fixes","description":"Immediate fixes from eval audit. Target: restore eval health (0%→100%, 53%→70%), remove 250 LOC dead code.","status":"closed","priority":1,"issue_type":"epic","created_at":"2025-12-25T15:42:19.671Z","updated_at":"2025-12-25T20:40:14.672Z","closed_at":"2025-12-25T20:40:14.672Z","dependencies":[],"labels":[],"comments":[]}
101
+ {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmlu3m","title":"Fix example.eval.ts data/task mismatch","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.677Z","updated_at":"2025-12-25T20:40:00.437Z","closed_at":"2025-12-25T20:40:00.437Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
102
+ {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmont1","title":"Fix compaction-prompt case-sensitive regex and missing tools","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.680Z","updated_at":"2025-12-25T20:40:02.213Z","closed_at":"2025-12-25T20:40:02.213Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
103
+ {"id":"opencode-swarm-plugin--ys7z8-mjlm2nmt1kq","title":"Remove 4 unused coordinator scorers","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-25T15:42:19.685Z","updated_at":"2025-12-25T20:40:03.974Z","closed_at":"2025-12-25T20:40:03.974Z","parent_id":"opencode-swarm-plugin--ys7z8-mjlm2nmf2hw","dependencies":[],"labels":[],"comments":[]}
104
+ {"id":"opencode-swarm-plugin--ys7z8-mjltv0ievr0","title":"Swarm O11y & Eval Insights Pipeline","description":"Comprehensive observability and eval insights for swarm coordination.\n\nCOMPLETED:\n1. ✅ `swarm serve` command - starts SSE server on configurable port\n2. ✅ Dashboard panes wired to real data (CellsPane, AgentsPane)\n3. ✅ Vite + React dashboard with SSE hooks\n\nREMAINING:\n- GET /cells endpoint on server (dashboard blocked on this)\n- Fix dashboard test fixtures for mock server\n- CLI commands: `swarm stats`, `swarm history`\n- Prompt injection for insights","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-25T19:20:20.054Z","updated_at":"2025-12-25T20:40:19.433Z","dependencies":[],"labels":[],"comments":[]}
105
+ {"id":"opencode-swarm-plugin--ys7z8-mjlwcoh8ut2","title":"Add `swarm serve` command to start SSE server","description":"Add a `serve` subcommand to bin/swarm.ts that starts the DurableStreamServer on a configurable port (default 3001).\n\nFiles: bin/swarm.ts\n\nImplementation:\n1. Add `serve` case to the switch statement\n2. Import createDurableStreamServer from swarm-mail\n3. Start server with adapter from getSwarmMailLibSQL()\n4. Print URL to console\n5. Keep process alive\n\nExample usage: `swarm serve --port 3001`\n\nThe dashboard at localhost:5173 will connect to this SSE endpoint.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T20:30:03.500Z","updated_at":"2025-12-25T20:40:05.728Z","closed_at":"2025-12-25T20:40:05.728Z","dependencies":[],"labels":[],"comments":[]}
106
+ {"id":"opencode-swarm-plugin--ys7z8-mjlwcslohuv","title":"Wire dashboard panes to real swarm-mail data","description":"Connect CellsPane and AgentsPane to real data from the SSE server.\n\nFiles: \n- packages/swarm-dashboard/src/lib/api.ts\n- packages/swarm-dashboard/src/components/CellsPane.tsx\n- packages/swarm-dashboard/src/components/AgentsPane.tsx\n- packages/swarm-dashboard/src/hooks/useSwarmEvents.ts\n\nImplementation:\n1. Update api.ts to fetch cells from hive (can use REST endpoint or derive from events)\n2. Update CellsPane to use real cell data instead of mock\n3. Update AgentsPane to derive agent list from SSE events (agent_registered events)\n4. Ensure useSwarmEvents properly accumulates agent state\n\nThe SSE server runs at localhost:3001 (from `swarm serve` command).","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-25T20:30:08.844Z","updated_at":"2025-12-25T20:40:07.548Z","closed_at":"2025-12-25T20:40:07.548Z","dependencies":[],"labels":[],"comments":[]}
@@ -1,9 +1,9 @@
1
1
  $ bun build ./src/index.ts --outdir ./dist --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && bun build ./src/plugin.ts --outfile ./dist/plugin.js --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && tsc
2
- Bundled 1348 modules in 205ms
2
+ Bundled 1348 modules in 198ms
3
3
 
4
4
  index.js 4.33 MB (entry point)
5
5
 
6
- Bundled 1349 modules in 196ms
6
+ Bundled 1349 modules in 190ms
7
7
 
8
8
  plugin.js 4.30 MB (entry point)
9
9
 
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # opencode-swarm-plugin
2
2
 
3
+ ## 0.42.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [`f6707d5`](https://github.com/joelhooks/swarm-tools/commit/f6707d53eb92021b6976212e903994c98c798483) Thanks [@joelhooks](https://github.com/joelhooks)! - ## 🐦 @swarmtoolsai Now Tweets Releases
8
+
9
+ Automated release announcements are live! When packages publish to npm, Claude summarizes the changelog into a tweet and posts from @swarmtoolsai.
10
+
11
+ No more manual "hey we shipped" posts - the bees handle it now.
12
+
3
13
  ## 0.42.0
4
14
 
5
15
  ### Minor Changes
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Tests for `swarm serve` command
3
+ */
4
+
5
+ import { describe, test, expect } from "bun:test";
6
+ import { spawn } from "bun";
7
+
8
+ describe("swarm serve command", () => {
9
+ test("serve command accepts custom port via --port flag", () => {
10
+ // Verify that CLI parsing works for custom port
11
+ const args = ["serve", "--port", "8080"];
12
+ const port = args.includes("--port")
13
+ ? Number.parseInt(args[args.indexOf("--port") + 1])
14
+ : 3001;
15
+
16
+ expect(port).toBe(8080);
17
+ });
18
+
19
+ test("serve command defaults to port 3001", () => {
20
+ const args = ["serve"];
21
+ const port = args.includes("--port")
22
+ ? Number.parseInt(args[args.indexOf("--port") + 1])
23
+ : 3001;
24
+
25
+ expect(port).toBe(3001);
26
+ });
27
+
28
+ test("serve command uses project path from CWD", () => {
29
+ const projectPath = process.cwd();
30
+ expect(projectPath).toBeDefined();
31
+ expect(typeof projectPath).toBe("string");
32
+ });
33
+
34
+ test("serve command appears in help text", async () => {
35
+ const proc = spawn(["bun", "run", "bin/swarm.ts", "help"], {
36
+ stdout: "pipe",
37
+ stderr: "pipe",
38
+ });
39
+
40
+ const output = await new Response(proc.stdout).text();
41
+
42
+ expect(output).toContain("swarm serve");
43
+ expect(output).toContain("Start SSE server");
44
+ expect(output).toContain("--port");
45
+ });
46
+ });
package/bin/swarm.ts CHANGED
@@ -2518,6 +2518,8 @@ ${cyan("Commands:")}
2518
2518
  swarm config Show paths to generated config files
2519
2519
  swarm agents Update AGENTS.md with skill awareness
2520
2520
  swarm migrate Migrate PGlite database to libSQL
2521
+ swarm serve Start SSE server for real-time event streaming
2522
+ --port <n> Port to listen on (default: 3001)
2521
2523
  swarm cells List or get cells from database (replaces 'swarm tool hive_query')
2522
2524
  swarm log View swarm logs with filtering
2523
2525
  swarm stats Show swarm health metrics and success rates
@@ -4488,6 +4490,62 @@ async function evalRun() {
4488
4490
  }
4489
4491
  }
4490
4492
 
4493
+ // ============================================================================
4494
+ // Serve Command - Start SSE Server
4495
+ // ============================================================================
4496
+
4497
+ async function serve() {
4498
+ p.intro("swarm serve v" + VERSION);
4499
+
4500
+ // Parse --port flag (default 3001)
4501
+ const portFlagIndex = process.argv.indexOf("--port");
4502
+ const port = portFlagIndex !== -1
4503
+ ? Number.parseInt(process.argv[portFlagIndex + 1]) || 3001
4504
+ : 3001;
4505
+
4506
+ const projectPath = process.cwd();
4507
+
4508
+ p.log.step("Starting DurableStreamServer...");
4509
+ p.log.message(dim(` Project: ${projectPath}`));
4510
+ p.log.message(dim(` Port: ${port}`));
4511
+
4512
+ try {
4513
+ // Import dependencies
4514
+ const { getSwarmMailLibSQL } = await import("swarm-mail");
4515
+ const { createDurableStreamAdapter, createDurableStreamServer } = await import("swarm-mail");
4516
+
4517
+ // Get swarm-mail adapter
4518
+ const swarmMail = await getSwarmMailLibSQL(projectPath);
4519
+
4520
+ // Create stream adapter
4521
+ const streamAdapter = createDurableStreamAdapter(swarmMail, projectPath);
4522
+
4523
+ // Create and start server
4524
+ const server = createDurableStreamServer({
4525
+ adapter: streamAdapter,
4526
+ port,
4527
+ projectKey: projectPath,
4528
+ });
4529
+
4530
+ await server.start();
4531
+
4532
+ p.log.success("Server started!");
4533
+ p.log.message("");
4534
+ p.log.message(cyan(" Dashboard: http://localhost:5173"));
4535
+ p.log.message(cyan(` SSE Endpoint: ${server.url}/streams/${encodeURIComponent(projectPath)}`));
4536
+ p.log.message("");
4537
+ p.log.message(dim(" Press Ctrl+C to stop"));
4538
+
4539
+ // Keep process alive
4540
+ await new Promise(() => {});
4541
+ } catch (error) {
4542
+ p.log.error("Failed to start server");
4543
+ p.log.message(error instanceof Error ? error.message : String(error));
4544
+ p.outro("Aborted");
4545
+ process.exit(1);
4546
+ }
4547
+ }
4548
+
4491
4549
  // ============================================================================
4492
4550
  // Main
4493
4551
  // ============================================================================
@@ -4510,6 +4568,9 @@ switch (command) {
4510
4568
  case "config":
4511
4569
  config();
4512
4570
  break;
4571
+ case "serve":
4572
+ await serve();
4573
+ break;
4513
4574
  case "update":
4514
4575
  await update();
4515
4576
  break;
@@ -5,7 +5,7 @@ import { describe, expect, it } from "bun:test";
5
5
  import type { CoordinatorSession } from "../../src/eval-capture.js";
6
6
  import {
7
7
  overallDiscipline,
8
- reviewEfficiency,
8
+
9
9
  reviewThoroughness,
10
10
  spawnEfficiency,
11
11
  timeToFirstSpawn,
@@ -537,164 +537,3 @@ describe("overallDiscipline", () => {
537
537
  });
538
538
  });
539
539
 
540
- describe("reviewEfficiency", () => {
541
- it("scores 1.0 for ideal 1:1 ratio (one review per spawn)", async () => {
542
- const session: CoordinatorSession = {
543
- session_id: "test-session",
544
- epic_id: "test-epic",
545
- start_time: "2025-01-01T00:00:00Z",
546
- events: [
547
- {
548
- session_id: "test-session",
549
- epic_id: "test-epic",
550
- timestamp: "2025-01-01T00:00:10Z",
551
- event_type: "DECISION",
552
- decision_type: "worker_spawned",
553
- payload: { bead_id: "bd-1" },
554
- },
555
- {
556
- session_id: "test-session",
557
- epic_id: "test-epic",
558
- timestamp: "2025-01-01T00:00:20Z",
559
- event_type: "DECISION",
560
- decision_type: "worker_spawned",
561
- payload: { bead_id: "bd-2" },
562
- },
563
- {
564
- session_id: "test-session",
565
- epic_id: "test-epic",
566
- timestamp: "2025-01-01T00:10:00Z",
567
- event_type: "DECISION",
568
- decision_type: "review_completed",
569
- payload: { bead_id: "bd-1" },
570
- },
571
- {
572
- session_id: "test-session",
573
- epic_id: "test-epic",
574
- timestamp: "2025-01-01T00:10:10Z",
575
- event_type: "DECISION",
576
- decision_type: "review_completed",
577
- payload: { bead_id: "bd-2" },
578
- },
579
- ],
580
- };
581
-
582
- const result = await reviewEfficiency({
583
- output: JSON.stringify(session),
584
- expected: {},
585
- input: undefined,
586
- });
587
-
588
- expect(result.score).toBe(1.0);
589
- expect(result.message).toContain("2 reviews / 2 spawns");
590
- });
591
-
592
- it("penalizes over-reviewing (>2:1 ratio)", async () => {
593
- // 6 reviews for 2 spawns = 3:1 ratio (over-reviewing)
594
- const session: CoordinatorSession = {
595
- session_id: "test-session",
596
- epic_id: "test-epic",
597
- start_time: "2025-01-01T00:00:00Z",
598
- events: [
599
- {
600
- session_id: "test-session",
601
- epic_id: "test-epic",
602
- timestamp: "2025-01-01T00:00:10Z",
603
- event_type: "DECISION",
604
- decision_type: "worker_spawned",
605
- payload: { bead_id: "bd-1" },
606
- },
607
- {
608
- session_id: "test-session",
609
- epic_id: "test-epic",
610
- timestamp: "2025-01-01T00:00:20Z",
611
- event_type: "DECISION",
612
- decision_type: "worker_spawned",
613
- payload: { bead_id: "bd-2" },
614
- },
615
- ...Array.from({ length: 6 }, (_, i) => ({
616
- session_id: "test-session",
617
- epic_id: "test-epic",
618
- timestamp: `2025-01-01T00:10:${String(i * 10).padStart(2, "0")}Z`,
619
- event_type: "DECISION" as const,
620
- decision_type: "review_completed" as const,
621
- payload: { bead_id: `bd-${(i % 2) + 1}` },
622
- })),
623
- ],
624
- };
625
-
626
- const result = await reviewEfficiency({
627
- output: JSON.stringify(session),
628
- expected: {},
629
- input: undefined,
630
- });
631
-
632
- // 3:1 ratio should be penalized (score < 0.5)
633
- expect(result.score).toBeLessThan(0.5);
634
- expect(result.message).toContain("6 reviews / 2 spawns");
635
- });
636
-
637
- it("handles no spawns gracefully", async () => {
638
- const session: CoordinatorSession = {
639
- session_id: "test-session",
640
- epic_id: "test-epic",
641
- start_time: "2025-01-01T00:00:00Z",
642
- events: [
643
- {
644
- session_id: "test-session",
645
- epic_id: "test-epic",
646
- timestamp: "2025-01-01T00:00:00Z",
647
- event_type: "DECISION",
648
- decision_type: "strategy_selected",
649
- payload: { strategy: "file-based" },
650
- },
651
- ],
652
- };
653
-
654
- const result = await reviewEfficiency({
655
- output: JSON.stringify(session),
656
- expected: {},
657
- input: undefined,
658
- });
659
-
660
- expect(result.score).toBe(1.0);
661
- expect(result.message).toContain("No workers spawned");
662
- });
663
-
664
- it("handles no reviews gracefully (0:N ratio)", async () => {
665
- const session: CoordinatorSession = {
666
- session_id: "test-session",
667
- epic_id: "test-epic",
668
- start_time: "2025-01-01T00:00:00Z",
669
- events: [
670
- {
671
- session_id: "test-session",
672
- epic_id: "test-epic",
673
- timestamp: "2025-01-01T00:00:10Z",
674
- event_type: "DECISION",
675
- decision_type: "worker_spawned",
676
- payload: { bead_id: "bd-1" },
677
- },
678
- {
679
- session_id: "test-session",
680
- epic_id: "test-epic",
681
- timestamp: "2025-01-01T00:00:20Z",
682
- event_type: "DECISION",
683
- decision_type: "worker_spawned",
684
- payload: { bead_id: "bd-2" },
685
- },
686
- ],
687
- };
688
-
689
- const result = await reviewEfficiency({
690
- output: JSON.stringify(session),
691
- expected: {},
692
- input: undefined,
693
- });
694
-
695
- // No reviews is bad (should use reviewThoroughness for this)
696
- // But this scorer focuses on over-reviewing, so no reviews = 1.0 (not over-reviewing)
697
- expect(result.score).toBe(1.0);
698
- expect(result.message).toContain("0 reviews / 2 spawns");
699
- });
700
- });
@@ -132,76 +132,6 @@ export const spawnEfficiency = createScorer({
132
132
  },
133
133
  });
134
134
 
135
- /**
136
- * Review Efficiency Scorer
137
- *
138
- * Measures review-to-spawn ratio to detect over-reviewing.
139
- * Ideal ratio is 1:1 (one review per spawned worker).
140
- * Penalizes >2:1 ratio (over-reviewing wastes context).
141
- *
142
- * Scoring:
143
- * - 0:N or 1:1 ratio = 1.0 (perfect)
144
- * - 2:1 ratio = 0.5 (threshold)
145
- * - >2:1 ratio = linear penalty toward 0.0
146
- *
147
- * Score: normalized to 0-1 (lower ratio is better)
148
- */
149
- export const reviewEfficiency = createScorer({
150
- name: "Review Efficiency",
151
- description: "Review-to-spawn ratio (penalize over-reviewing >2:1)",
152
- scorer: ({ output }) => {
153
- try {
154
- const session = JSON.parse(String(output)) as CoordinatorSession;
155
-
156
- // Count worker_spawned events
157
- const spawned = session.events.filter(
158
- (e) =>
159
- e.event_type === "DECISION" && e.decision_type === "worker_spawned"
160
- ).length;
161
-
162
- if (spawned === 0) {
163
- return {
164
- score: 1.0,
165
- message: "No workers spawned",
166
- };
167
- }
168
-
169
- // Count review_completed events
170
- const reviewed = session.events.filter(
171
- (e) =>
172
- e.event_type === "DECISION" && e.decision_type === "review_completed"
173
- ).length;
174
-
175
- const ratio = reviewed / spawned;
176
-
177
- // Scoring:
178
- // - ratio <= 1.0: perfect (1.0)
179
- // - ratio <= 2.0: linear decay from 1.0 to 0.5
180
- // - ratio > 2.0: linear penalty from 0.5 toward 0.0
181
- let score: number;
182
- if (ratio <= 1.0) {
183
- score = 1.0;
184
- } else if (ratio <= 2.0) {
185
- // Linear decay: 1.0 at ratio=1.0, 0.5 at ratio=2.0
186
- score = 1.0 - (ratio - 1.0) * 0.5;
187
- } else {
188
- // Penalty for extreme over-reviewing: 0.5 at ratio=2.0, 0.0 at ratio=4.0
189
- score = Math.max(0, 0.5 - (ratio - 2.0) * 0.25);
190
- }
191
-
192
- return {
193
- score,
194
- message: `${reviewed} reviews / ${spawned} spawns (${ratio.toFixed(1)}:1 ratio)`,
195
- };
196
- } catch (error) {
197
- return {
198
- score: 0,
199
- message: `Failed to parse CoordinatorSession: ${error}`,
200
- };
201
- }
202
- },
203
- });
204
-
205
135
  /**
206
136
  * Review Thoroughness Scorer
207
137
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-swarm-plugin",
3
- "version": "0.42.0",
3
+ "version": "0.42.1",
4
4
  "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -3,6 +3,15 @@
3
3
  *
4
4
  * TDD approach - tests written FIRST to define scorer behavior
5
5
  * Tests the PURE scoring functions (not evalite wrappers)
6
+ *
7
+ * **Case-Sensitivity Verification**:
8
+ * All tool name regexes MUST be case-insensitive (/i flag) because:
9
+ * - LLMs generate inconsistent casing (Edit vs edit, Read vs read)
10
+ * - Fixtures contain mixed case examples
11
+ * - Scoring must be robust to case variations
12
+ *
13
+ * Fixed in commit adding /i flags to Edit, Write, bash patterns.
14
+ * Tests added to prevent regression.
6
15
  */
7
16
 
8
17
  import { describe, expect, test } from "bun:test";
@@ -15,6 +24,109 @@ import {
15
24
  scorePostCompactionDiscipline,
16
25
  } from "./compaction-prompt-scoring.js";
17
26
 
27
+ describe("Case-Insensitive Tool Detection (Regression Prevention)", () => {
28
+ test("all scorers handle mixed-case tool names correctly", () => {
29
+ // Real-world example with mixed casing from LLM output
30
+ const prompt: CompactionPrompt = {
31
+ content: `┌─────────────────────────────────────────┐
32
+ │ YOU ARE THE COORDINATOR │
33
+ └─────────────────────────────────────────┘
34
+
35
+ You are coordinating epic mjkw81rkq4c.
36
+
37
+ ## IMMEDIATE ACTIONS
38
+
39
+ 1. swarm_status(epic_id='mjkw81rkq4c', project_key='/path')
40
+ 2. swarmmail_inbox()
41
+
42
+ ## FORBIDDEN TOOLS
43
+
44
+ NEVER use these tools - delegate to workers:
45
+ - edit (file modifications)
46
+ - write (file creation)
47
+ - BASH (shell commands for file mods)
48
+ - swarmmail_reserve (only workers)
49
+ - git commit (workers handle)
50
+
51
+ ALWAYS spawn workers for code changes.`,
52
+ };
53
+
54
+ // Epic ID detection should work
55
+ const epicResult = scoreEpicIdSpecificity(prompt);
56
+ expect(epicResult.score).toBe(1.0);
57
+
58
+ // Actionability should detect swarm_status
59
+ const actionResult = scoreActionability(prompt);
60
+ expect(actionResult.score).toBe(1.0);
61
+
62
+ // Coordinator identity should detect ASCII + NEVER/ALWAYS
63
+ const identityResult = scoreCoordinatorIdentity(prompt);
64
+ expect(identityResult.score).toBe(1.0);
65
+
66
+ // Forbidden tools should detect all 5 despite mixed case
67
+ const forbiddenResult = scoreForbiddenToolsPresent(prompt);
68
+ expect(forbiddenResult.score).toBe(1.0);
69
+ expect(forbiddenResult.message).toContain("All 5");
70
+
71
+ // Post-compaction discipline should detect swarm_status as first tool
72
+ const disciplineResult = scorePostCompactionDiscipline(prompt);
73
+ expect(disciplineResult.score).toBe(1.0);
74
+ });
75
+
76
+ test("forbidden tools scorer detects lowercase tool names", () => {
77
+ // Previously failed before /i flags were added
78
+ const prompt: CompactionPrompt = {
79
+ content: `Don't use: edit, write, bash, swarmmail_reserve, git commit`,
80
+ };
81
+
82
+ const result = scoreForbiddenToolsPresent(prompt);
83
+
84
+ // Should detect all 5 tools regardless of case
85
+ expect(result.score).toBe(1.0);
86
+ expect(result.message).toContain("All 5");
87
+ });
88
+
89
+ test("forbidden tools scorer detects UPPERCASE tool names", () => {
90
+ const prompt: CompactionPrompt = {
91
+ content: `Forbidden: EDIT, WRITE, BASH, swarmmail_reserve, git commit`,
92
+ };
93
+
94
+ const result = scoreForbiddenToolsPresent(prompt);
95
+
96
+ expect(result.score).toBe(1.0);
97
+ expect(result.message).toContain("All 5");
98
+ });
99
+
100
+ test("post-compaction discipline detects mixed-case first tools", () => {
101
+ const testCases = [
102
+ { tool: "EDIT", shouldPass: false },
103
+ { tool: "edit", shouldPass: false },
104
+ { tool: "Edit", shouldPass: false },
105
+ { tool: "WRITE", shouldPass: false },
106
+ { tool: "write", shouldPass: false },
107
+ { tool: "READ", shouldPass: false },
108
+ { tool: "read", shouldPass: false },
109
+ { tool: "swarm_status", shouldPass: true },
110
+ { tool: "SWARM_STATUS", shouldPass: true },
111
+ { tool: "swarmmail_inbox", shouldPass: true },
112
+ ];
113
+
114
+ for (const { tool, shouldPass } of testCases) {
115
+ const prompt: CompactionPrompt = {
116
+ content: `1. ${tool}()`,
117
+ };
118
+
119
+ const result = scorePostCompactionDiscipline(prompt);
120
+
121
+ if (shouldPass) {
122
+ expect(result.score).toBe(1.0);
123
+ } else {
124
+ expect(result.score).toBe(0.0);
125
+ }
126
+ }
127
+ });
128
+ });
129
+
18
130
  describe("epicIdSpecificity scorer", () => {
19
131
  test("scores 1.0 for real epic IDs", () => {
20
132
  const prompt: CompactionPrompt = {
@@ -218,6 +330,33 @@ describe("forbiddenToolsPresent scorer", () => {
218
330
  expect(result.score).toBe(0.0);
219
331
  expect(result.message).toContain("0/5");
220
332
  });
333
+
334
+ test("scores 1.0 with lowercase forbidden tools (case-insensitive)", () => {
335
+ const prompt: CompactionPrompt = {
336
+ content: `🚫 FORBIDDEN TOOLS - NEVER call these:
337
+ - edit (use swarm_spawn_subtask)
338
+ - write (use swarm_spawn_subtask)
339
+ - swarmmail_reserve (only workers reserve)
340
+ - git commit (workers commit)
341
+ - bash (for file modifications)`,
342
+ };
343
+
344
+ const result = scoreForbiddenToolsPresent(prompt);
345
+
346
+ expect(result.score).toBe(1.0);
347
+ expect(result.message).toContain("All 5 forbidden tools");
348
+ });
349
+
350
+ test("scores correctly with mixed case forbidden tools", () => {
351
+ const prompt: CompactionPrompt = {
352
+ content: `Avoid: edit, Write, BASH`,
353
+ };
354
+
355
+ const result = scoreForbiddenToolsPresent(prompt);
356
+
357
+ expect(result.score).toBe(0.6);
358
+ expect(result.message).toContain("3/5");
359
+ });
221
360
  });
222
361
 
223
362
  describe("postCompactionDiscipline scorer", () => {
@@ -297,4 +436,40 @@ describe("postCompactionDiscipline scorer", () => {
297
436
  expect(result.score).toBe(0.0);
298
437
  expect(result.message).toContain("No tool");
299
438
  });
439
+
440
+ test("scores 0.0 when first tool is lowercase 'read' (case-insensitive)", () => {
441
+ const prompt: CompactionPrompt = {
442
+ content: `1. read(file='src/index.ts')
443
+ 2. swarm_status()`,
444
+ };
445
+
446
+ const result = scorePostCompactionDiscipline(prompt);
447
+
448
+ expect(result.score).toBe(0.0);
449
+ expect(result.message).toContain("read");
450
+ });
451
+
452
+ test("scores 0.0 when first tool is lowercase 'edit'", () => {
453
+ const prompt: CompactionPrompt = {
454
+ content: `1. edit(file='src/auth.ts', ...)
455
+ 2. swarm_status()`,
456
+ };
457
+
458
+ const result = scorePostCompactionDiscipline(prompt);
459
+
460
+ expect(result.score).toBe(0.0);
461
+ expect(result.message).toContain("edit");
462
+ });
463
+
464
+ test("scores 0.0 when first tool is lowercase 'write'", () => {
465
+ const prompt: CompactionPrompt = {
466
+ content: `1. write(file='README.md', content='...')
467
+ 2. swarm_status()`,
468
+ };
469
+
470
+ const result = scorePostCompactionDiscipline(prompt);
471
+
472
+ expect(result.score).toBe(0.0);
473
+ expect(result.message).toContain("write");
474
+ });
300
475
  });