npm - opencode-swarm-plugin - Versions diffs - 0.37.0 → 0.39.1 - Mend

opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/.env +2 -0
package/.hive/eval-results.json +26 -0
package/.hive/issues.jsonl +20 -5
package/.hive/memories.jsonl +35 -1
package/.opencode/eval-history.jsonl +12 -0
package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +319 -319
package/CHANGELOG.md +258 -0
package/README.md +50 -0
package/bin/swarm.test.ts +475 -0
package/bin/swarm.ts +385 -208
package/dist/compaction-hook.d.ts +1 -1
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/compaction-prompt-scoring.d.ts +124 -0
package/dist/compaction-prompt-scoring.d.ts.map +1 -0
package/dist/eval-capture.d.ts +81 -1
package/dist/eval-capture.d.ts.map +1 -1
package/dist/eval-gates.d.ts +84 -0
package/dist/eval-gates.d.ts.map +1 -0
package/dist/eval-history.d.ts +117 -0
package/dist/eval-history.d.ts.map +1 -0
package/dist/eval-learning.d.ts +216 -0
package/dist/eval-learning.d.ts.map +1 -0
package/dist/hive.d.ts +59 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +87 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +823 -131
package/dist/plugin.js +655 -131
package/dist/post-compaction-tracker.d.ts +133 -0
package/dist/post-compaction-tracker.d.ts.map +1 -0
package/dist/swarm-decompose.d.ts +30 -0
package/dist/swarm-decompose.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts +23 -0
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +25 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm.d.ts +19 -0
package/dist/swarm.d.ts.map +1 -1
package/evals/README.md +595 -94
package/evals/compaction-prompt.eval.ts +149 -0
package/evals/coordinator-behavior.eval.ts +8 -8
package/evals/fixtures/compaction-prompt-cases.ts +305 -0
package/evals/lib/compaction-loader.test.ts +248 -0
package/evals/lib/compaction-loader.ts +320 -0
package/evals/lib/data-loader.test.ts +345 -0
package/evals/lib/data-loader.ts +107 -6
package/evals/scorers/compaction-prompt-scorers.ts +145 -0
package/evals/scorers/compaction-scorers.ts +13 -13
package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
package/evals/scorers/coordinator-discipline.ts +13 -13
package/examples/plugin-wrapper-template.ts +177 -8
package/package.json +7 -2
package/scripts/migrate-unknown-sessions.ts +349 -0
package/src/compaction-capture.integration.test.ts +257 -0
package/src/compaction-hook.test.ts +139 -2
package/src/compaction-hook.ts +113 -2
package/src/compaction-prompt-scorers.test.ts +299 -0
package/src/compaction-prompt-scoring.ts +298 -0
package/src/eval-capture.test.ts +422 -0
package/src/eval-capture.ts +94 -2
package/src/eval-gates.test.ts +306 -0
package/src/eval-gates.ts +218 -0
package/src/eval-history.test.ts +508 -0
package/src/eval-history.ts +214 -0
package/src/eval-learning.test.ts +378 -0
package/src/eval-learning.ts +360 -0
package/src/index.ts +61 -1
package/src/post-compaction-tracker.test.ts +251 -0
package/src/post-compaction-tracker.ts +237 -0
package/src/swarm-decompose.test.ts +40 -47
package/src/swarm-decompose.ts +2 -2
package/src/swarm-orchestrate.test.ts +270 -7
package/src/swarm-orchestrate.ts +100 -13
package/src/swarm-prompts.test.ts +121 -0
package/src/swarm-prompts.ts +297 -4
package/src/swarm-research.integration.test.ts +157 -0
package/src/swarm-review.ts +3 -3
/package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0

package/src/post-compaction-tracker.ts ADDED Viewed

@@ -0,0 +1,237 @@
+/**
+ * Post-Compaction Tool Call Tracker
+ *
+ * Tracks tool calls after compaction resumption to detect coordinator violations
+ * and provide learning signals for eval-driven development.
+ *
+ * ## Purpose
+ *
+ * When context is compacted, the continuation agent needs observation to learn
+ * if it's following coordinator discipline. This tracker:
+ *
+ * 1. Emits resumption_started on first tool call (marks compaction exit)
+ * 2. Tracks up to N tool calls (default 20) with violation detection
+ * 3. Stops tracking after limit to avoid noise in long sessions
+ *
+ * ## Coordinator Violations Detected
+ *
+ * - **Edit/Write**: Coordinators NEVER edit files - spawn worker instead
+ * - **swarmmail_reserve/agentmail_reserve**: Workers reserve, not coordinators
+ *
+ * ## Integration
+ *
+ * Used by compaction hook to wire tool.call events → eval capture.
+ *
+ * @example
+ * ```typescript
+ * const tracker = createPostCompactionTracker({
+ *   sessionId: "session-123",
+ *   epicId: "bd-epic-456",
+ *   onEvent: captureCompactionEvent,
+ * });
+ *
+ * // Wire to OpenCode hook
+ * hooks["tool.call"] = (input) => {
+ *   tracker.trackToolCall({
+ *     tool: input.tool,
+ *     args: input.args,
+ *     timestamp: Date.now(),
+ *   });
+ * };
+ * ```
+ */
+/**
+ * Tool call event structure
+ */
+export interface ToolCallEvent {
+  tool: string;
+  args: Record<string, unknown>;
+  timestamp: number;
+}
+/**
+ * Compaction event payload (matches eval-capture.ts structure)
+ */
+export interface CompactionEvent {
+  session_id: string;
+  epic_id: string;
+  compaction_type:
+    | "detection_complete"
+    | "prompt_generated"
+    | "context_injected"
+    | "resumption_started"
+    | "tool_call_tracked";
+  payload: {
+    session_id?: string;
+    epic_id?: string;
+    tool?: string;
+    args?: Record<string, unknown>;
+    call_number?: number;
+    is_coordinator_violation?: boolean;
+    violation_reason?: string;
+    timestamp?: number;
+  };
+}
+/**
+ * Tracker configuration
+ */
+export interface PostCompactionTrackerConfig {
+  sessionId: string;
+  epicId: string;
+  onEvent: (event: CompactionEvent) => void;
+  maxCalls?: number;
+}
+/**
+ * Post-compaction tracker instance
+ */
+export interface PostCompactionTracker {
+  trackToolCall(event: ToolCallEvent): void;
+  isTracking(): boolean;
+}
+// ============================================================================
+// Constants
+// ============================================================================
+/**
+ * Default maximum number of tool calls to track
+ *
+ * Chosen to balance:
+ * - Enough data for pattern detection (20 calls is ~2-3 minutes of coordinator work)
+ * - Avoiding noise pollution in long sessions
+ */
+export const DEFAULT_MAX_TRACKED_CALLS = 20;
+// ============================================================================
+// Coordinator Violation Detection
+// ============================================================================
+/**
+ * Tools that coordinators are NEVER allowed to use
+ *
+ * Key insight from semantic memory: coordinators lose identity after compaction
+ * and start doing implementation work. These violations are observable signals
+ * that the coordinator mandate wasn't preserved in continuation prompt.
+ */
+const FORBIDDEN_COORDINATOR_TOOLS: Record<string, string> = {
+  edit: "Coordinators NEVER edit files - spawn worker instead",
+  write: "Coordinators NEVER write files - spawn worker instead",
+  swarmmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
+  agentmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
+};
+/**
+ * Check if tool call is a coordinator violation
+ *
+ * @param tool - Tool name from OpenCode tool.call hook
+ * @returns Violation status with reason if forbidden
+ *
+ * @example
+ * ```typescript
+ * const result = isCoordinatorViolation("edit");
+ * // { isViolation: true, reason: "Coordinators NEVER edit..." }
+ *
+ * const result = isCoordinatorViolation("read");
+ * // { isViolation: false }
+ * ```
+ */
+export function isCoordinatorViolation(tool: string): {
+  isViolation: boolean;
+  reason?: string;
+} {
+  const reason = FORBIDDEN_COORDINATOR_TOOLS[tool];
+  return {
+    isViolation: !!reason,
+    reason,
+  };
+}
+// ============================================================================
+// Tracker Factory
+// ============================================================================
+/**
+ * Create a post-compaction tool call tracker
+ *
+ * @example
+ * ```typescript
+ * const tracker = createPostCompactionTracker({
+ *   sessionId: "session-123",
+ *   epicId: "bd-epic-456",
+ *   onEvent: (event) => captureCompactionEvent(event),
+ *   maxCalls: 20
+ * });
+ *
+ * // Track tool calls
+ * tracker.trackToolCall({
+ *   tool: "read",
+ *   args: { filePath: "/test.ts" },
+ *   timestamp: Date.now()
+ * });
+ * ```
+ */
+export function createPostCompactionTracker(
+  config: PostCompactionTrackerConfig,
+): PostCompactionTracker {
+  const {
+    sessionId,
+    epicId,
+    onEvent,
+    maxCalls = DEFAULT_MAX_TRACKED_CALLS,
+  } = config;
+  let callCount = 0;
+  let resumptionEmitted = false;
+  return {
+    trackToolCall(event: ToolCallEvent): void {
+      // Stop tracking after max calls reached
+      if (callCount >= maxCalls) {
+        return;
+      }
+      // Emit resumption_started on first call
+      if (!resumptionEmitted) {
+        onEvent({
+          session_id: sessionId,
+          epic_id: epicId,
+          compaction_type: "resumption_started",
+          payload: {
+            session_id: sessionId,
+            epic_id: epicId,
+            timestamp: event.timestamp,
+          },
+        });
+        resumptionEmitted = true;
+      }
+      // Increment before emitting so call_number is 1-based
+      callCount++;
+      // Check for coordinator violations
+      const violation = isCoordinatorViolation(event.tool);
+      // Emit tool_call_tracked event
+      onEvent({
+        session_id: sessionId,
+        epic_id: epicId,
+        compaction_type: "tool_call_tracked",
+        payload: {
+          tool: event.tool,
+          args: event.args,
+          call_number: callCount,
+          is_coordinator_violation: violation.isViolation,
+          violation_reason: violation.reason,
+          timestamp: event.timestamp,
+        },
+      });
+    },
+    isTracking(): boolean {
+      return callCount < maxCalls;
+    },
+  };
+}

package/src/swarm-decompose.test.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * TDD: Testing eval capture integration - verifies captureDecomposition() is called
  * after successful validation with correct parameters.
  */
-import { afterEach, beforeEach, describe, expect, test, mock } from "bun:test";
+import { afterEach, beforeEach, describe, expect, test, spyOn } from "bun:test";
 import * as fs from "node:fs";
 import { swarm_validate_decomposition } from "./swarm-decompose";
 import * as evalCapture from "./eval-capture.js";
@@ -41,15 +41,8 @@ afterEach(() => {
 describe("captureDecomposition integration", () => {
   test("calls captureDecomposition after successful validation with all params", async () => {
-    // Mock captureDecomposition to spy on calls
-    const captureDecompositionSpy = mock(() => ({
-      id: "test-epic-123",
-      timestamp: new Date().toISOString(),
-      task: "Add user authentication",
-    }));
-    const original = evalCapture.captureDecomposition;
-    // @ts-expect-error - mocking for test
-    evalCapture.captureDecomposition = captureDecompositionSpy;
+    // Spy on captureDecomposition
+    const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
     const validCellTree = JSON.stringify({
       epic: {
@@ -91,27 +84,37 @@ describe("captureDecomposition integration", () => {
     // Verify captureDecomposition was called with correct params
     expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
-    const callArgs = captureDecompositionSpy.mock.calls[0][0];
-    expect(callArgs.epicId).toBe("test-epic-123");
-    expect(callArgs.projectPath).toBe(testProjectPath);
-    expect(callArgs.task).toBe("Add user authentication");
-    expect(callArgs.context).toBe("Using NextAuth.js");
-    expect(callArgs.strategy).toBe("feature-based");
-    expect(callArgs.epicTitle).toBe("Add OAuth");
-    expect(callArgs.epicDescription).toBe("Implement OAuth authentication");
-    expect(callArgs.subtasks).toHaveLength(2);
-    expect(callArgs.subtasks[0].title).toBe("Add OAuth provider config");
-    // Restore
-    // @ts-expect-error - restoring mock
-    evalCapture.captureDecomposition = original;
+    expect(captureDecompositionSpy).toHaveBeenCalledWith({
+      epicId: "test-epic-123",
+      projectPath: testProjectPath,
+      task: "Add user authentication",
+      context: "Using NextAuth.js",
+      strategy: "feature-based",
+      epicTitle: "Add OAuth",
+      epicDescription: "Implement OAuth authentication",
+      subtasks: [
+        {
+          title: "Add OAuth provider config",
+          description: "Set up Google OAuth",
+          files: ["src/auth/google.ts", "src/auth/config.ts"],
+          dependencies: [],
+          estimated_complexity: 2,
+        },
+        {
+          title: "Add login UI",
+          description: "Create login button component",
+          files: ["src/components/LoginButton.tsx"],
+          dependencies: [0],
+          estimated_complexity: 1,
+        },
+      ],
+    });
+    captureDecompositionSpy.mockRestore();
   });
   test("does not call captureDecomposition when validation fails", async () => {
-    const captureDecompositionSpy = mock(() => ({}));
-    const original = evalCapture.captureDecomposition;
-    // @ts-expect-error - mocking for test
-    evalCapture.captureDecomposition = captureDecompositionSpy;
+    const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
     // Invalid CellTree - missing required fields
     const invalidCellTree = JSON.stringify({
@@ -136,20 +139,11 @@ describe("captureDecomposition integration", () => {
     // Verify captureDecomposition was NOT called
     expect(captureDecompositionSpy).not.toHaveBeenCalled();
-    // Restore
-    // @ts-expect-error - restoring mock
-    evalCapture.captureDecomposition = original;
+    captureDecompositionSpy.mockRestore();
   });
   test("handles optional context and description fields", async () => {
-    const captureDecompositionSpy = mock(() => ({
-      id: "test-epic-789",
-      timestamp: new Date().toISOString(),
-      task: "Fix the auth bug",
-    }));
-    const original = evalCapture.captureDecomposition;
-    // @ts-expect-error - mocking for test
-    evalCapture.captureDecomposition = captureDecompositionSpy;
+    const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
     const validCellTree = JSON.stringify({
       epic: {
@@ -183,13 +177,12 @@ describe("captureDecomposition integration", () => {
     // Verify captureDecomposition was called without optional fields
     expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
-    const callArgs = captureDecompositionSpy.mock.calls[0][0];
-    expect(callArgs.epicId).toBe("test-epic-789");
-    expect(callArgs.context).toBeUndefined();
-    expect(callArgs.epicDescription).toBeUndefined();
-    // Restore
-    // @ts-expect-error - restoring mock
-    evalCapture.captureDecomposition = original;
+    const call = captureDecompositionSpy.mock.calls[0];
+    expect(call[0].epicId).toBe("test-epic-789");
+    expect(call[0].context).toBeUndefined();
+    // Schema default makes description empty string instead of undefined
+    expect(call[0].epicDescription).toBe("");
+    captureDecompositionSpy.mockRestore();
   });
 });

package/src/swarm-decompose.ts CHANGED Viewed

@@ -753,7 +753,7 @@ export const swarm_delegate_planning = tool({
       .default(true)
       .describe("Query CASS for similar past tasks (default: true)"),
   },
-  async execute(args) {
+  async execute(args, _ctx) {
     // Import needed modules
     const { selectStrategy, formatStrategyGuidelines } =
       await import("./swarm-strategies");
@@ -777,7 +777,7 @@ export const swarm_delegate_planning = tool({
     // Capture strategy selection decision
     try {
       captureCoordinatorEvent({
-        session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+        session_id: _ctx.sessionID || "unknown",
         epic_id: "planning", // No epic ID yet - this is pre-decomposition
         timestamp: new Date().toISOString(),
         event_type: "DECISION",

package/src/swarm-orchestrate.test.ts CHANGED Viewed

@@ -6,10 +6,13 @@
  * - Researcher spawning for identified technologies
  * - Summary collection from semantic-memory
  * - Research result aggregation
+ * - Eval capture integration (captureSubtaskOutcome wiring)
  */
-import { describe, test, expect, beforeEach } from "bun:test";
-import { runResearchPhase, extractTechStack } from "./swarm-orchestrate";
+import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test";
+import { runResearchPhase, extractTechStack, swarm_complete } from "./swarm-orchestrate";
+import * as evalCapture from "./eval-capture.js";
+import * as fs from "node:fs";
 describe("extractTechStack", () => {
   test("extracts Next.js from task description", () => {
@@ -115,9 +118,269 @@ describe("runResearchPhase", () => {
   });
 });
-describe("swarm_research_phase tool", () => {
-  test.todo("exposes research phase as plugin tool");
-  test.todo("validates task parameter");
-  test.todo("validates project_path parameter");
-  test.todo("returns JSON string with research results");
+// describe("swarm_research_phase tool", () => {
+//   test.todo("exposes research phase as plugin tool");
+//   test.todo("validates task parameter");
+//   test.todo("validates project_path parameter");
+//   test.todo("returns JSON string with research results");
+// });
+// ============================================================================
+// Eval Capture Integration Tests (swarm_complete)
+// ============================================================================
+describe("captureSubtaskOutcome integration", () => {
+  const mockContext = {
+    sessionID: `test-complete-${Date.now()}`,
+    messageID: `test-message-${Date.now()}`,
+    agent: "test-agent",
+    abort: new AbortController().signal,
+  };
+  let testProjectPath: string;
+  beforeEach(async () => {
+    testProjectPath = `/tmp/test-swarm-complete-${Date.now()}`;
+    fs.mkdirSync(testProjectPath, { recursive: true });
+    // Create .hive directory and issues.jsonl
+    const hiveDir = `${testProjectPath}/.hive`;
+    fs.mkdirSync(hiveDir, { recursive: true });
+    fs.writeFileSync(`${hiveDir}/issues.jsonl`, "", "utf-8");
+    // Set hive working directory to testProjectPath
+    const { setHiveWorkingDirectory } = await import("./hive");
+    setHiveWorkingDirectory(testProjectPath);
+  });
+  afterEach(() => {
+    if (fs.existsSync(testProjectPath)) {
+      fs.rmSync(testProjectPath, { recursive: true, force: true });
+    }
+  });
+  test("calls captureSubtaskOutcome after successful completion with all params", async () => {
+    // Import hive tools
+    const { hive_create_epic } = await import("./hive");
+    // Spy on captureSubtaskOutcome
+    const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
+    // Create an epic with a subtask using hive_create_epic
+    const epicResult = await hive_create_epic.execute({
+      epic_title: "Add OAuth",
+      epic_description: "Implement OAuth authentication",
+      subtasks: [
+        {
+          title: "Add auth service",
+          priority: 2,
+          files: ["src/auth/service.ts", "src/auth/schema.ts"],
+        },
+      ],
+    }, mockContext);
+    const epicData = JSON.parse(epicResult);
+    expect(epicData.success).toBe(true);
+    const epicId = epicData.epic.id;
+    const beadId = epicData.subtasks[0].id;
+    const startTime = Date.now() - 120000; // Started 2 minutes ago
+    const plannedFiles = ["src/auth/service.ts", "src/auth/schema.ts"];
+    const actualFiles = ["src/auth/service.ts", "src/auth/schema.ts", "src/auth/types.ts"];
+    // Call swarm_complete
+    const result = await swarm_complete.execute(
+      {
+        project_key: testProjectPath,
+        agent_name: "TestAgent",
+        bead_id: beadId,
+        summary: "Implemented OAuth service with JWT strategy",
+        files_touched: actualFiles,
+        skip_verification: true, // Skip verification for test
+        skip_review: true, // Skip review for test
+        planned_files: plannedFiles,
+        start_time: startTime,
+        error_count: 0,
+        retry_count: 0,
+      },
+      mockContext,
+    );
+    const parsed = JSON.parse(result);
+    expect(parsed.success).toBe(true);
+    // Verify captureSubtaskOutcome was called with correct params
+    expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
+    const call = captureOutcomeSpy.mock.calls[0][0];
+    expect(call.epicId).toBe(epicId);
+    expect(call.projectPath).toBe(testProjectPath);
+    expect(call.beadId).toBe(beadId);
+    expect(call.title).toBe("Add auth service");
+    expect(call.plannedFiles).toEqual(plannedFiles);
+    expect(call.actualFiles).toEqual(actualFiles);
+    expect(call.durationMs).toBeGreaterThan(0);
+    expect(call.errorCount).toBe(0);
+    expect(call.retryCount).toBe(0);
+    expect(call.success).toBe(true);
+    captureOutcomeSpy.mockRestore();
+  });
+  test("does not call captureSubtaskOutcome when required params missing", async () => {
+    const { hive_create_epic } = await import("./hive");
+    const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
+    // Create an epic with a subtask
+    const epicResult = await hive_create_epic.execute({
+      epic_title: "Fix bug",
+      subtasks: [
+        {
+          title: "Fix auth bug",
+          priority: 1,
+          files: ["src/auth.ts"],
+        },
+      ],
+    }, mockContext);
+    const epicData = JSON.parse(epicResult);
+    const beadId = epicData.subtasks[0].id;
+    // Call without planned_files or start_time
+    const result = await swarm_complete.execute(
+      {
+        project_key: testProjectPath,
+        agent_name: "TestAgent",
+        bead_id: beadId,
+        summary: "Fixed the bug",
+        skip_verification: true,
+        skip_review: true,
+        // No planned_files, start_time
+      },
+      mockContext,
+    );
+    const parsed = JSON.parse(result);
+    expect(parsed.success).toBe(true);
+    // Capture should still be called, but with default values
+    // (The function is called in all success cases, it just handles missing params)
+    expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
+    captureOutcomeSpy.mockRestore();
+  });
+});
+// ============================================================================
+// Eval Capture Integration Tests (swarm_record_outcome)
+// ============================================================================
+describe("finalizeEvalRecord integration", () => {
+  const mockContext = {
+    sessionID: `test-finalize-${Date.now()}`,
+    messageID: `test-message-${Date.now()}`,
+    agent: "test-agent",
+    abort: new AbortController().signal,
+  };
+  test("calls finalizeEvalRecord when project_path and epic_id provided", async () => {
+    const { swarm_record_outcome } = await import("./swarm-orchestrate");
+    // Spy on finalizeEvalRecord
+    const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
+    finalizeEvalSpy.mockReturnValue(null); // Mock return value
+    const testProjectPath = "/tmp/test-project";
+    const testEpicId = "bd-test123";
+    const testBeadId = `${testEpicId}.0`;
+    // Call swarm_record_outcome with epic_id and project_path
+    await swarm_record_outcome.execute({
+      bead_id: testBeadId,
+      duration_ms: 120000,
+      error_count: 0,
+      retry_count: 0,
+      success: true,
+      files_touched: ["src/test.ts"],
+      epic_id: testEpicId,
+      project_path: testProjectPath,
+    }, mockContext);
+    // Verify finalizeEvalRecord was called
+    expect(finalizeEvalSpy).toHaveBeenCalledTimes(1);
+    expect(finalizeEvalSpy).toHaveBeenCalledWith({
+      epicId: testEpicId,
+      projectPath: testProjectPath,
+    });
+    finalizeEvalSpy.mockRestore();
+  });
+  test("does not call finalizeEvalRecord when epic_id or project_path missing", async () => {
+    const { swarm_record_outcome } = await import("./swarm-orchestrate");
+    // Spy on finalizeEvalRecord
+    const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
+    const testBeadId = "bd-test123.0";
+    // Call without epic_id or project_path
+    await swarm_record_outcome.execute({
+      bead_id: testBeadId,
+      duration_ms: 120000,
+      error_count: 0,
+      retry_count: 0,
+      success: true,
+    }, mockContext);
+    // Verify finalizeEvalRecord was NOT called
+    expect(finalizeEvalSpy).toHaveBeenCalledTimes(0);
+    finalizeEvalSpy.mockRestore();
+  });
+  test("includes finalized record in response when available", async () => {
+    const { swarm_record_outcome } = await import("./swarm-orchestrate");
+    // Mock finalizeEvalRecord to return a record
+    const mockFinalRecord = {
+      id: "bd-test123",
+      timestamp: new Date().toISOString(),
+      project_path: "/tmp/test-project",
+      task: "Test task",
+      strategy: "file-based" as const,
+      subtask_count: 2,
+      epic_title: "Test Epic",
+      subtasks: [],
+      overall_success: true,
+      total_duration_ms: 240000,
+      total_errors: 0,
+    };
+    const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
+    finalizeEvalSpy.mockReturnValue(mockFinalRecord);
+    const testProjectPath = "/tmp/test-project";
+    const testEpicId = "bd-test123";
+    const testBeadId = `${testEpicId}.0`;
+    // Call with epic_id and project_path
+    const result = await swarm_record_outcome.execute({
+      bead_id: testBeadId,
+      duration_ms: 120000,
+      error_count: 0,
+      retry_count: 0,
+      success: true,
+      epic_id: testEpicId,
+      project_path: testProjectPath,
+    }, mockContext);
+    // Parse result and check for finalized record
+    const parsed = JSON.parse(result);
+    expect(parsed).toHaveProperty("finalized_eval_record");
+    expect(parsed.finalized_eval_record).toEqual(mockFinalRecord);
+    finalizeEvalSpy.mockRestore();
+  });
 });