npm - opencode-swarm-plugin - Versions diffs - 0.39.1 → 0.40.0 - Mend

opencode-swarm-plugin 0.39.1 → 0.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/.hive/issues.jsonl +16 -0
package/CHANGELOG.md +52 -0
package/bin/swarm.test.ts +406 -0
package/bin/swarm.ts +303 -0
package/dist/compaction-hook.d.ts +8 -1
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/compaction-observability.d.ts +173 -0
package/dist/compaction-observability.d.ts.map +1 -0
package/dist/eval-capture.d.ts +93 -0
package/dist/eval-capture.d.ts.map +1 -1
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +36 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +15670 -580
package/dist/plugin.js +15623 -557
package/dist/schemas/task.d.ts +3 -3
package/evals/README.md +113 -0
package/evals/scorers/coordinator-discipline.evalite-test.ts +163 -0
package/evals/scorers/coordinator-discipline.ts +335 -2
package/evals/scorers/index.test.ts +146 -0
package/evals/scorers/index.ts +104 -0
package/evals/swarm-decomposition.eval.ts +9 -2
package/examples/commands/swarm.md +291 -21
package/package.json +1 -1
package/src/compaction-hook.ts +258 -110
package/src/compaction-observability.integration.test.ts +139 -0
package/src/compaction-observability.test.ts +187 -0
package/src/compaction-observability.ts +324 -0
package/src/eval-capture.test.ts +204 -1
package/src/eval-capture.ts +194 -2
package/src/eval-runner.test.ts +96 -0
package/src/eval-runner.ts +356 -0
package/src/hive.ts +34 -0
package/src/index.ts +54 -1
package/src/memory.test.ts +110 -0
package/src/memory.ts +34 -0
package/dist/beads.d.ts +0 -386
package/dist/beads.d.ts.map +0 -1
package/dist/schemas/bead-events.d.ts +0 -698
package/dist/schemas/bead-events.d.ts.map +0 -1
package/dist/schemas/bead.d.ts +0 -255
package/dist/schemas/bead.d.ts.map +0 -1

package/dist/schemas/task.d.ts CHANGED Viewed

@@ -16,8 +16,8 @@ import { z } from "zod";
  */
 export declare const EffortLevelSchema: z.ZodEnum<{
     small: "small";
-    trivial: "trivial";
     medium: "medium";
+    trivial: "trivial";
     large: "large";
 }>;
 export type EffortLevel = z.infer<typeof EffortLevelSchema>;
@@ -39,8 +39,8 @@ export declare const DecomposedSubtaskSchema: z.ZodObject<{
     files: z.ZodArray<z.ZodString>;
     estimated_effort: z.ZodEnum<{
         small: "small";
-        trivial: "trivial";
         medium: "medium";
+        trivial: "trivial";
         large: "large";
     }>;
     risks: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
@@ -74,8 +74,8 @@ export declare const TaskDecompositionSchema: z.ZodObject<{
         files: z.ZodArray<z.ZodString>;
         estimated_effort: z.ZodEnum<{
             small: "small";
-            trivial: "trivial";
             medium: "medium";
+            trivial: "trivial";
             large: "large";
         }>;
         risks: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;

package/evals/README.md CHANGED Viewed

@@ -167,6 +167,119 @@ coordinator-behavior
    → overallDiscipline: 0.89 ✅ PASS (bootstrap phase, collecting data)
 ```
+#### Coordinator Session Capture (Deep Dive)
+**How it works:** Session capture is fully automatic when coordinator tools are used. No manual instrumentation needed.
+**Capture flow:**
+```
+┌─────────────────────────────────────────────────────────────┐
+│                  SESSION CAPTURE FLOW                       │
+│                                                             │
+│  1. Coordinator tool call detected                          │
+│     ├─ swarm_decompose, hive_create_epic, etc.              │
+│     └─ Tool name + args inspected in real-time              │
+│                                                             │
+│  2. Violation detection (planning-guardrails.ts)            │
+│     ├─ detectCoordinatorViolation() checks patterns         │
+│     ├─ Edit/Write tools → coordinator_edited_file           │
+│     ├─ bash with test patterns → coordinator_ran_tests      │
+│     └─ swarmmail_reserve → coordinator_reserved_files       │
+│                                                             │
+│  3. Event emission (eval-capture.ts)                        │
+│     ├─ captureCoordinatorEvent() validates via Zod          │
+│     ├─ Appends JSONL line to session file                   │
+│     └─ ~/.config/swarm-tools/sessions/{session_id}.jsonl    │
+│                                                             │
+│  4. Eval consumption (coordinator-session.eval.ts)          │
+│     ├─ loadCapturedSessions() reads all *.jsonl files       │
+│     ├─ Parses events, reconstructs sessions                 │
+│     └─ Scorers analyze event sequences                      │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+```
+**Event types:**
+| Event Type     | Subtypes                                                              | When Captured                        |
+| -------------- | --------------------------------------------------------------------- | ------------------------------------ |
+| `DECISION`     | strategy_selected, worker_spawned, review_completed, decomposition_complete | Coordinator makes decision           |
+| `VIOLATION`    | coordinator_edited_file, coordinator_ran_tests, coordinator_reserved_files, no_worker_spawned | Protocol violation detected          |
+| `OUTCOME`      | subtask_success, subtask_retry, subtask_failed, epic_complete        | Worker completes or epic finishes    |
+| `COMPACTION`   | detection_complete, prompt_generated, context_injected, resumption_started, tool_call_tracked | Compaction lifecycle events          |
+**Violation detection patterns** (from `planning-guardrails.ts`):
+```typescript
+// File modification detection
+VIOLATION_PATTERNS.FILE_MODIFICATION_TOOLS = ["edit", "write"];
+// Test execution detection (regex patterns in bash commands)
+VIOLATION_PATTERNS.TEST_EXECUTION_PATTERNS = [
+  /\bbun\s+test\b/i,
+  /\bnpm\s+(run\s+)?test/i,
+  /\bjest\b/i,
+  /\bvitest\b/i,
+  // ... and 6 more patterns
+];
+// File reservation detection
+VIOLATION_PATTERNS.RESERVATION_TOOLS = ["swarmmail_reserve", "agentmail_reserve"];
+```
+**Example session file** (`~/.config/swarm-tools/sessions/session-abc123.jsonl`):
+```jsonl
+{"session_id":"session-abc123","epic_id":"mjkw81rkq4c","timestamp":"2025-01-01T12:00:00Z","event_type":"DECISION","decision_type":"strategy_selected","payload":{"strategy":"feature-based"}}
+{"session_id":"session-abc123","epic_id":"mjkw81rkq4c","timestamp":"2025-01-01T12:01:00Z","event_type":"DECISION","decision_type":"decomposition_complete","payload":{"subtask_count":3}}
+{"session_id":"session-abc123","epic_id":"mjkw81rkq4c","timestamp":"2025-01-01T12:02:00Z","event_type":"DECISION","decision_type":"worker_spawned","payload":{"worker_id":"SwiftFire","bead_id":"mjkw81rkq4c.1"}}
+{"session_id":"session-abc123","epic_id":"mjkw81rkq4c","timestamp":"2025-01-01T12:05:00Z","event_type":"VIOLATION","violation_type":"coordinator_edited_file","payload":{"tool":"edit","file":"src/auth.ts"}}
+{"session_id":"session-abc123","epic_id":"mjkw81rkq4c","timestamp":"2025-01-01T12:10:00Z","event_type":"OUTCOME","outcome_type":"subtask_success","payload":{"bead_id":"mjkw81rkq4c.1","duration_ms":480000}}
+```
+**Viewing sessions:**
+```bash
+# List all captured sessions (coming soon)
+swarm log sessions
+# View specific session events
+cat ~/.config/swarm-tools/sessions/session-abc123.jsonl | jq .
+# Filter to violations only
+cat ~/.config/swarm-tools/sessions/*.jsonl | jq 'select(.event_type == "VIOLATION")'
+# Count violations by type
+cat ~/.config/swarm-tools/sessions/*.jsonl | jq -r 'select(.event_type == "VIOLATION") | .violation_type' | sort | uniq -c
+```
+**Why JSONL format?**
+- **Append-only**: No file locking, safe for concurrent writes
+- **Streamable**: Process events one-by-one without loading full file
+- **Line-oriented**: Easy to `grep`, `jq`, `tail -f` for live monitoring
+- **Fault-tolerant**: Corrupted line doesn't break entire file
+**Integration points:**
+| Where                      | What Gets Captured                        | File                    |
+| -------------------------- | ----------------------------------------- | ----------------------- |
+| `swarm_decompose`          | DECISION: strategy_selected, decomposition_complete | sessions/*.jsonl        |
+| `swarm_spawn_subtask`      | DECISION: worker_spawned                  | sessions/*.jsonl        |
+| `swarm_review`             | DECISION: review_completed                | sessions/*.jsonl        |
+| `swarm_complete`           | OUTCOME: subtask_success/failed           | sessions/*.jsonl        |
+| Tool call inspection       | VIOLATION: (real-time pattern matching)   | sessions/*.jsonl        |
+| Compaction hook            | COMPACTION: (all lifecycle stages)        | sessions/*.jsonl        |
+**Source files:**
+- **Schema**: `src/eval-capture.ts` - CoordinatorEventSchema (Zod discriminated union)
+- **Violation detection**: `src/planning-guardrails.ts` - detectCoordinatorViolation()
+- **Capture**: `src/eval-capture.ts` - captureCoordinatorEvent()
+- **Scorers**: `evals/scorers/coordinator-discipline.ts` - violationCount, spawnEfficiency, etc.
+- **Eval**: `evals/coordinator-session.eval.ts` - Real sessions + fixtures
 ### Compaction Prompt (`compaction-prompt.eval.ts`)
 **What it measures:** Quality of continuation prompts after context compaction

package/evals/scorers/coordinator-discipline.evalite-test.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { describe, expect, it } from "bun:test";
 import type { CoordinatorSession } from "../../src/eval-capture.js";
 import {
 	overallDiscipline,
+	reviewEfficiency,
 	reviewThoroughness,
 	spawnEfficiency,
 	timeToFirstSpawn,
@@ -535,3 +536,165 @@ describe("overallDiscipline", () => {
 		expect(result.message).toContain("Speed:");
 	});
 });
+describe("reviewEfficiency", () => {
+	it("scores 1.0 for ideal 1:1 ratio (one review per spawn)", async () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:00Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:10Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-2" },
+				},
+			],
+		};
+		const result = await reviewEfficiency({
+			output: JSON.stringify(session),
+			expected: {},
+			input: undefined,
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("2 reviews / 2 spawns");
+	});
+	it("penalizes over-reviewing (>2:1 ratio)", async () => {
+		// 6 reviews for 2 spawns = 3:1 ratio (over-reviewing)
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+				...Array.from({ length: 6 }, (_, i) => ({
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: `2025-01-01T00:10:${String(i * 10).padStart(2, "0")}Z`,
+					event_type: "DECISION" as const,
+					decision_type: "review_completed" as const,
+					payload: { bead_id: `bd-${(i % 2) + 1}` },
+				})),
+			],
+		};
+		const result = await reviewEfficiency({
+			output: JSON.stringify(session),
+			expected: {},
+			input: undefined,
+		});
+		// 3:1 ratio should be penalized (score < 0.5)
+		expect(result.score).toBeLessThan(0.5);
+		expect(result.message).toContain("6 reviews / 2 spawns");
+	});
+	it("handles no spawns gracefully", async () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "strategy_selected",
+					payload: { strategy: "file-based" },
+				},
+			],
+		};
+		const result = await reviewEfficiency({
+			output: JSON.stringify(session),
+			expected: {},
+			input: undefined,
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("No workers spawned");
+	});
+	it("handles no reviews gracefully (0:N ratio)", async () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+			],
+		};
+		const result = await reviewEfficiency({
+			output: JSON.stringify(session),
+			expected: {},
+			input: undefined,
+		});
+		// No reviews is bad (should use reviewThoroughness for this)
+		// But this scorer focuses on over-reviewing, so no reviews = 1.0 (not over-reviewing)
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("0 reviews / 2 spawns");
+	});
+});

package/evals/scorers/coordinator-discipline.ts CHANGED Viewed

@@ -70,6 +70,9 @@ export const violationCount = createScorer({
  * Coordinators should delegate work, not do it themselves.
  *
  * Score: workers_spawned / subtasks_planned
+ *
+ * If no decomposition_complete event exists, falls back to counting spawns
+ * and returns 1.0 if any workers were spawned (better than nothing).
  */
 export const spawnEfficiency = createScorer({
   name: "Spawn Efficiency",
@@ -85,7 +88,20 @@ export const spawnEfficiency = createScorer({
           e.decision_type === "decomposition_complete"
       );
+      // Count worker_spawned events
+      const spawned = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "worker_spawned"
+      ).length;
       if (!decomp) {
+        // Fallback: if workers were spawned but no decomp event, assume they're doing work
+        if (spawned > 0) {
+          return {
+            score: 1.0,
+            message: `${spawned} workers spawned (no decomposition event)`,
+          };
+        }
         return {
           score: 0,
           message: "No decomposition event found",
@@ -101,17 +117,81 @@ export const spawnEfficiency = createScorer({
         };
       }
+      const score = spawned / subtaskCount;
+      return {
+        score,
+        message: `${spawned}/${subtaskCount} workers spawned (${(score * 100).toFixed(0)}%)`,
+      };
+    } catch (error) {
+      return {
+        score: 0,
+        message: `Failed to parse CoordinatorSession: ${error}`,
+      };
+    }
+  },
+});
+/**
+ * Review Efficiency Scorer
+ *
+ * Measures review-to-spawn ratio to detect over-reviewing.
+ * Ideal ratio is 1:1 (one review per spawned worker).
+ * Penalizes >2:1 ratio (over-reviewing wastes context).
+ *
+ * Scoring:
+ * - 0:N or 1:1 ratio = 1.0 (perfect)
+ * - 2:1 ratio = 0.5 (threshold)
+ * - >2:1 ratio = linear penalty toward 0.0
+ *
+ * Score: normalized to 0-1 (lower ratio is better)
+ */
+export const reviewEfficiency = createScorer({
+  name: "Review Efficiency",
+  description: "Review-to-spawn ratio (penalize over-reviewing >2:1)",
+  scorer: ({ output }) => {
+    try {
+      const session = JSON.parse(String(output)) as CoordinatorSession;
       // Count worker_spawned events
       const spawned = session.events.filter(
         (e) =>
           e.event_type === "DECISION" && e.decision_type === "worker_spawned"
       ).length;
-      const score = spawned / subtaskCount;
+      if (spawned === 0) {
+        return {
+          score: 1.0,
+          message: "No workers spawned",
+        };
+      }
+      // Count review_completed events
+      const reviewed = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "review_completed"
+      ).length;
+      const ratio = reviewed / spawned;
+      // Scoring:
+      // - ratio <= 1.0: perfect (1.0)
+      // - ratio <= 2.0: linear decay from 1.0 to 0.5
+      // - ratio > 2.0: linear penalty from 0.5 toward 0.0
+      let score: number;
+      if (ratio <= 1.0) {
+        score = 1.0;
+      } else if (ratio <= 2.0) {
+        // Linear decay: 1.0 at ratio=1.0, 0.5 at ratio=2.0
+        score = 1.0 - (ratio - 1.0) * 0.5;
+      } else {
+        // Penalty for extreme over-reviewing: 0.5 at ratio=2.0, 0.0 at ratio=4.0
+        score = Math.max(0, 0.5 - (ratio - 2.0) * 0.25);
+      }
       return {
         score,
-        message: `${spawned}/${subtaskCount} workers spawned (${(score * 100).toFixed(0)}%)`,
+        message: `${reviewed} reviews / ${spawned} spawns (${ratio.toFixed(1)}:1 ratio)`,
       };
     } catch (error) {
       return {
@@ -254,6 +334,259 @@ export const timeToFirstSpawn = createScorer({
   },
 });
+/**
+ * Researcher Spawn Rate Scorer
+ *
+ * Measures whether coordinator spawns researchers for unfamiliar technology.
+ * Coordinators should delegate research instead of calling pdf-brain/context7 directly.
+ *
+ * Score: 1.0 if researcher_spawned events exist, 0.0 otherwise
+ */
+export const researcherSpawnRate = createScorer({
+  name: "Researcher Spawn Rate",
+  description: "Coordinator spawned researchers for unfamiliar tech",
+  scorer: ({ output }) => {
+    try {
+      const session = JSON.parse(String(output)) as CoordinatorSession;
+      // Count researcher_spawned events
+      const researchers = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "researcher_spawned"
+      );
+      const count = researchers.length;
+      if (count === 0) {
+        return {
+          score: 0.0,
+          message: "No researchers spawned (may indicate coordinator queried docs directly)",
+        };
+      }
+      return {
+        score: 1.0,
+        message: `${count} researcher(s) spawned`,
+      };
+    } catch (error) {
+      return {
+        score: 0,
+        message: `Failed to parse CoordinatorSession: ${error}`,
+      };
+    }
+  },
+});
+/**
+ * Skill Loading Rate Scorer
+ *
+ * Measures whether coordinator loads relevant skills via skills_use().
+ * Shows knowledge-seeking behavior.
+ *
+ * Score: 1.0 if skill_loaded events exist, 0.5 otherwise (not critical, but helpful)
+ */
+export const skillLoadingRate = createScorer({
+  name: "Skill Loading Rate",
+  description: "Coordinator loaded relevant skills for domain knowledge",
+  scorer: ({ output }) => {
+    try {
+      const session = JSON.parse(String(output)) as CoordinatorSession;
+      // Count skill_loaded events
+      const skills = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "skill_loaded"
+      );
+      const count = skills.length;
+      if (count === 0) {
+        return {
+          score: 0.5,
+          message: "No skills loaded (not critical, but helpful)",
+        };
+      }
+      return {
+        score: 1.0,
+        message: `${count} skill(s) loaded`,
+      };
+    } catch (error) {
+      return {
+        score: 0,
+        message: `Failed to parse CoordinatorSession: ${error}`,
+      };
+    }
+  },
+});
+/**
+ * Inbox Monitoring Rate Scorer
+ *
+ * Measures how frequently coordinator checks inbox for worker messages.
+ * Regular monitoring (every ~15min or when workers finish) shows good coordination.
+ *
+ * Score based on inbox_checked events relative to worker activity:
+ * - 0 checks = 0.0 (coordinator not monitoring)
+ * - 1+ checks = 1.0 (coordinator is responsive)
+ */
+export const inboxMonitoringRate = createScorer({
+  name: "Inbox Monitoring Rate",
+  description: "Coordinator checked inbox regularly for worker messages",
+  scorer: ({ output }) => {
+    try {
+      const session = JSON.parse(String(output)) as CoordinatorSession;
+      // Count inbox_checked events
+      const checks = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "inbox_checked"
+      );
+      // Count worker activity (spawns + outcomes)
+      const workerActivity = session.events.filter(
+        (e) =>
+          (e.event_type === "DECISION" && e.decision_type === "worker_spawned") ||
+          (e.event_type === "OUTCOME" &&
+            ["subtask_success", "subtask_failed", "blocker_detected"].includes(
+              e.outcome_type
+            ))
+      );
+      const checkCount = checks.length;
+      const activityCount = workerActivity.length;
+      if (activityCount === 0) {
+        return {
+          score: 1.0,
+          message: "No worker activity to monitor",
+        };
+      }
+      if (checkCount === 0) {
+        return {
+          score: 0.0,
+          message: `${activityCount} worker events, 0 inbox checks (not monitoring)`,
+        };
+      }
+      return {
+        score: 1.0,
+        message: `${checkCount} inbox check(s) for ${activityCount} worker events`,
+      };
+    } catch (error) {
+      return {
+        score: 0,
+        message: `Failed to parse CoordinatorSession: ${error}`,
+      };
+    }
+  },
+});
+/**
+ * Blocker Response Time Scorer
+ *
+ * Measures how quickly coordinator responds to blocked workers.
+ * Time between blocker_detected (OUTCOME) and blocker_resolved (DECISION).
+ *
+ * Normalization:
+ * - < 5min: 1.0 (excellent)
+ * - 5-15min: linear decay to 0.5
+ * - > 15min: 0.0 (too slow, worker is idle)
+ *
+ * Score: Average response time across all blockers
+ */
+export const blockerResponseTime = createScorer({
+  name: "Blocker Response Time",
+  description: "Coordinator unblocked workers quickly",
+  scorer: ({ output }) => {
+    try {
+      const session = JSON.parse(String(output)) as CoordinatorSession;
+      // Find blocker_detected events
+      const blockers = session.events.filter(
+        (e) =>
+          e.event_type === "OUTCOME" && e.outcome_type === "blocker_detected"
+      );
+      if (blockers.length === 0) {
+        return {
+          score: 1.0,
+          message: "No blockers detected",
+        };
+      }
+      // Find blocker_resolved events
+      const resolutions = session.events.filter(
+        (e) =>
+          e.event_type === "DECISION" && e.decision_type === "blocker_resolved"
+      );
+      if (resolutions.length === 0) {
+        return {
+          score: 0.0,
+          message: `${blockers.length} blocker(s) detected, 0 resolved (workers still blocked)`,
+        };
+      }
+      // Match blockers to resolutions by subtask_id and calculate response times
+      const responseTimes: number[] = [];
+      for (const blocker of blockers) {
+        const subtaskId = (blocker.payload as any).subtask_id;
+        const blockerTime = new Date(blocker.timestamp).getTime();
+        // Find resolution for this subtask
+        const resolution = resolutions.find(
+          (r) => (r.payload as any).subtask_id === subtaskId
+        );
+        if (resolution) {
+          const resolutionTime = new Date(resolution.timestamp).getTime();
+          const deltaMs = resolutionTime - blockerTime;
+          responseTimes.push(deltaMs);
+        }
+      }
+      if (responseTimes.length === 0) {
+        return {
+          score: 0.5,
+          message: `${blockers.length} blocker(s) detected, ${resolutions.length} resolution(s), but no matches by subtask_id`,
+        };
+      }
+      // Calculate average response time
+      const avgResponseMs =
+        responseTimes.reduce((sum, t) => sum + t, 0) / responseTimes.length;
+      // Normalize: < 5min = 1.0, > 15min = 0.0, linear in between
+      const EXCELLENT_MS = 5 * 60 * 1000; // 5 min
+      const POOR_MS = 15 * 60 * 1000; // 15 min
+      let score: number;
+      if (avgResponseMs < EXCELLENT_MS) {
+        score = 1.0;
+      } else if (avgResponseMs > POOR_MS) {
+        score = 0.0;
+      } else {
+        // Linear decay from 1.0 to 0.0
+        score = 1.0 - (avgResponseMs - EXCELLENT_MS) / (POOR_MS - EXCELLENT_MS);
+      }
+      const avgMinutes = Math.round(avgResponseMs / 1000 / 60);
+      return {
+        score,
+        message: `Avg response time: ${avgMinutes}min (${responseTimes.length}/${blockers.length} blockers resolved)`,
+      };
+    } catch (error) {
+      return {
+        score: 0,
+        message: `Failed to parse CoordinatorSession: ${error}`,
+      };
+    }
+  },
+});
 /**
  * Overall Discipline Scorer
  *