opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.env +2 -0
  2. package/.hive/eval-results.json +26 -0
  3. package/.hive/issues.jsonl +20 -5
  4. package/.hive/memories.jsonl +35 -1
  5. package/.opencode/eval-history.jsonl +12 -0
  6. package/.turbo/turbo-build.log +4 -4
  7. package/.turbo/turbo-test.log +319 -319
  8. package/CHANGELOG.md +258 -0
  9. package/README.md +50 -0
  10. package/bin/swarm.test.ts +475 -0
  11. package/bin/swarm.ts +385 -208
  12. package/dist/compaction-hook.d.ts +1 -1
  13. package/dist/compaction-hook.d.ts.map +1 -1
  14. package/dist/compaction-prompt-scoring.d.ts +124 -0
  15. package/dist/compaction-prompt-scoring.d.ts.map +1 -0
  16. package/dist/eval-capture.d.ts +81 -1
  17. package/dist/eval-capture.d.ts.map +1 -1
  18. package/dist/eval-gates.d.ts +84 -0
  19. package/dist/eval-gates.d.ts.map +1 -0
  20. package/dist/eval-history.d.ts +117 -0
  21. package/dist/eval-history.d.ts.map +1 -0
  22. package/dist/eval-learning.d.ts +216 -0
  23. package/dist/eval-learning.d.ts.map +1 -0
  24. package/dist/hive.d.ts +59 -0
  25. package/dist/hive.d.ts.map +1 -1
  26. package/dist/index.d.ts +87 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +823 -131
  29. package/dist/plugin.js +655 -131
  30. package/dist/post-compaction-tracker.d.ts +133 -0
  31. package/dist/post-compaction-tracker.d.ts.map +1 -0
  32. package/dist/swarm-decompose.d.ts +30 -0
  33. package/dist/swarm-decompose.d.ts.map +1 -1
  34. package/dist/swarm-orchestrate.d.ts +23 -0
  35. package/dist/swarm-orchestrate.d.ts.map +1 -1
  36. package/dist/swarm-prompts.d.ts +25 -1
  37. package/dist/swarm-prompts.d.ts.map +1 -1
  38. package/dist/swarm.d.ts +19 -0
  39. package/dist/swarm.d.ts.map +1 -1
  40. package/evals/README.md +595 -94
  41. package/evals/compaction-prompt.eval.ts +149 -0
  42. package/evals/coordinator-behavior.eval.ts +8 -8
  43. package/evals/fixtures/compaction-prompt-cases.ts +305 -0
  44. package/evals/lib/compaction-loader.test.ts +248 -0
  45. package/evals/lib/compaction-loader.ts +320 -0
  46. package/evals/lib/data-loader.test.ts +345 -0
  47. package/evals/lib/data-loader.ts +107 -6
  48. package/evals/scorers/compaction-prompt-scorers.ts +145 -0
  49. package/evals/scorers/compaction-scorers.ts +13 -13
  50. package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
  51. package/evals/scorers/coordinator-discipline.ts +13 -13
  52. package/examples/plugin-wrapper-template.ts +177 -8
  53. package/package.json +7 -2
  54. package/scripts/migrate-unknown-sessions.ts +349 -0
  55. package/src/compaction-capture.integration.test.ts +257 -0
  56. package/src/compaction-hook.test.ts +139 -2
  57. package/src/compaction-hook.ts +113 -2
  58. package/src/compaction-prompt-scorers.test.ts +299 -0
  59. package/src/compaction-prompt-scoring.ts +298 -0
  60. package/src/eval-capture.test.ts +422 -0
  61. package/src/eval-capture.ts +94 -2
  62. package/src/eval-gates.test.ts +306 -0
  63. package/src/eval-gates.ts +218 -0
  64. package/src/eval-history.test.ts +508 -0
  65. package/src/eval-history.ts +214 -0
  66. package/src/eval-learning.test.ts +378 -0
  67. package/src/eval-learning.ts +360 -0
  68. package/src/index.ts +61 -1
  69. package/src/post-compaction-tracker.test.ts +251 -0
  70. package/src/post-compaction-tracker.ts +237 -0
  71. package/src/swarm-decompose.test.ts +40 -47
  72. package/src/swarm-decompose.ts +2 -2
  73. package/src/swarm-orchestrate.test.ts +270 -7
  74. package/src/swarm-orchestrate.ts +100 -13
  75. package/src/swarm-prompts.test.ts +121 -0
  76. package/src/swarm-prompts.ts +297 -4
  77. package/src/swarm-research.integration.test.ts +157 -0
  78. package/src/swarm-review.ts +3 -3
  79. /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
@@ -0,0 +1,237 @@
1
+ /**
2
+ * Post-Compaction Tool Call Tracker
3
+ *
4
+ * Tracks tool calls after compaction resumption to detect coordinator violations
5
+ * and provide learning signals for eval-driven development.
6
+ *
7
+ * ## Purpose
8
+ *
9
+ * When context is compacted, the continuation agent needs observation to learn
10
+ * if it's following coordinator discipline. This tracker:
11
+ *
12
+ * 1. Emits resumption_started on first tool call (marks compaction exit)
13
+ * 2. Tracks up to N tool calls (default 20) with violation detection
14
+ * 3. Stops tracking after limit to avoid noise in long sessions
15
+ *
16
+ * ## Coordinator Violations Detected
17
+ *
18
+ * - **Edit/Write**: Coordinators NEVER edit files - spawn worker instead
19
+ * - **swarmmail_reserve/agentmail_reserve**: Workers reserve, not coordinators
20
+ *
21
+ * ## Integration
22
+ *
23
+ * Used by compaction hook to wire tool.call events → eval capture.
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const tracker = createPostCompactionTracker({
28
+ * sessionId: "session-123",
29
+ * epicId: "bd-epic-456",
30
+ * onEvent: captureCompactionEvent,
31
+ * });
32
+ *
33
+ * // Wire to OpenCode hook
34
+ * hooks["tool.call"] = (input) => {
35
+ * tracker.trackToolCall({
36
+ * tool: input.tool,
37
+ * args: input.args,
38
+ * timestamp: Date.now(),
39
+ * });
40
+ * };
41
+ * ```
42
+ */
43
+
44
+ /**
45
+ * Tool call event structure
46
+ */
47
+ export interface ToolCallEvent {
48
+ tool: string;
49
+ args: Record<string, unknown>;
50
+ timestamp: number;
51
+ }
52
+
53
+ /**
54
+ * Compaction event payload (matches eval-capture.ts structure)
55
+ */
56
+ export interface CompactionEvent {
57
+ session_id: string;
58
+ epic_id: string;
59
+ compaction_type:
60
+ | "detection_complete"
61
+ | "prompt_generated"
62
+ | "context_injected"
63
+ | "resumption_started"
64
+ | "tool_call_tracked";
65
+ payload: {
66
+ session_id?: string;
67
+ epic_id?: string;
68
+ tool?: string;
69
+ args?: Record<string, unknown>;
70
+ call_number?: number;
71
+ is_coordinator_violation?: boolean;
72
+ violation_reason?: string;
73
+ timestamp?: number;
74
+ };
75
+ }
76
+
77
+ /**
78
+ * Tracker configuration
79
+ */
80
+ export interface PostCompactionTrackerConfig {
81
+ sessionId: string;
82
+ epicId: string;
83
+ onEvent: (event: CompactionEvent) => void;
84
+ maxCalls?: number;
85
+ }
86
+
87
+ /**
88
+ * Post-compaction tracker instance
89
+ */
90
+ export interface PostCompactionTracker {
91
+ trackToolCall(event: ToolCallEvent): void;
92
+ isTracking(): boolean;
93
+ }
94
+
95
+ // ============================================================================
96
+ // Constants
97
+ // ============================================================================
98
+
99
+ /**
100
+ * Default maximum number of tool calls to track
101
+ *
102
+ * Chosen to balance:
103
+ * - Enough data for pattern detection (20 calls is ~2-3 minutes of coordinator work)
104
+ * - Avoiding noise pollution in long sessions
105
+ */
106
+ export const DEFAULT_MAX_TRACKED_CALLS = 20;
107
+
108
+ // ============================================================================
109
+ // Coordinator Violation Detection
110
+ // ============================================================================
111
+
112
+ /**
113
+ * Tools that coordinators are NEVER allowed to use
114
+ *
115
+ * Key insight from semantic memory: coordinators lose identity after compaction
116
+ * and start doing implementation work. These violations are observable signals
117
+ * that the coordinator mandate wasn't preserved in continuation prompt.
118
+ */
119
+ const FORBIDDEN_COORDINATOR_TOOLS: Record<string, string> = {
120
+ edit: "Coordinators NEVER edit files - spawn worker instead",
121
+ write: "Coordinators NEVER write files - spawn worker instead",
122
+ swarmmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
123
+ agentmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
124
+ };
125
+
126
+ /**
127
+ * Check if tool call is a coordinator violation
128
+ *
129
+ * @param tool - Tool name from OpenCode tool.call hook
130
+ * @returns Violation status with reason if forbidden
131
+ *
132
+ * @example
133
+ * ```typescript
134
+ * const result = isCoordinatorViolation("edit");
135
+ * // { isViolation: true, reason: "Coordinators NEVER edit..." }
136
+ *
137
+ * const result = isCoordinatorViolation("read");
138
+ * // { isViolation: false }
139
+ * ```
140
+ */
141
+ export function isCoordinatorViolation(tool: string): {
142
+ isViolation: boolean;
143
+ reason?: string;
144
+ } {
145
+ const reason = FORBIDDEN_COORDINATOR_TOOLS[tool];
146
+ return {
147
+ isViolation: !!reason,
148
+ reason,
149
+ };
150
+ }
151
+
152
+ // ============================================================================
153
+ // Tracker Factory
154
+ // ============================================================================
155
+
156
+ /**
157
+ * Create a post-compaction tool call tracker
158
+ *
159
+ * @example
160
+ * ```typescript
161
+ * const tracker = createPostCompactionTracker({
162
+ * sessionId: "session-123",
163
+ * epicId: "bd-epic-456",
164
+ * onEvent: (event) => captureCompactionEvent(event),
165
+ * maxCalls: 20
166
+ * });
167
+ *
168
+ * // Track tool calls
169
+ * tracker.trackToolCall({
170
+ * tool: "read",
171
+ * args: { filePath: "/test.ts" },
172
+ * timestamp: Date.now()
173
+ * });
174
+ * ```
175
+ */
176
+ export function createPostCompactionTracker(
177
+ config: PostCompactionTrackerConfig,
178
+ ): PostCompactionTracker {
179
+ const {
180
+ sessionId,
181
+ epicId,
182
+ onEvent,
183
+ maxCalls = DEFAULT_MAX_TRACKED_CALLS,
184
+ } = config;
185
+
186
+ let callCount = 0;
187
+ let resumptionEmitted = false;
188
+
189
+ return {
190
+ trackToolCall(event: ToolCallEvent): void {
191
+ // Stop tracking after max calls reached
192
+ if (callCount >= maxCalls) {
193
+ return;
194
+ }
195
+
196
+ // Emit resumption_started on first call
197
+ if (!resumptionEmitted) {
198
+ onEvent({
199
+ session_id: sessionId,
200
+ epic_id: epicId,
201
+ compaction_type: "resumption_started",
202
+ payload: {
203
+ session_id: sessionId,
204
+ epic_id: epicId,
205
+ timestamp: event.timestamp,
206
+ },
207
+ });
208
+ resumptionEmitted = true;
209
+ }
210
+
211
+ // Increment before emitting so call_number is 1-based
212
+ callCount++;
213
+
214
+ // Check for coordinator violations
215
+ const violation = isCoordinatorViolation(event.tool);
216
+
217
+ // Emit tool_call_tracked event
218
+ onEvent({
219
+ session_id: sessionId,
220
+ epic_id: epicId,
221
+ compaction_type: "tool_call_tracked",
222
+ payload: {
223
+ tool: event.tool,
224
+ args: event.args,
225
+ call_number: callCount,
226
+ is_coordinator_violation: violation.isViolation,
227
+ violation_reason: violation.reason,
228
+ timestamp: event.timestamp,
229
+ },
230
+ });
231
+ },
232
+
233
+ isTracking(): boolean {
234
+ return callCount < maxCalls;
235
+ },
236
+ };
237
+ }
@@ -6,7 +6,7 @@
6
6
  * TDD: Testing eval capture integration - verifies captureDecomposition() is called
7
7
  * after successful validation with correct parameters.
8
8
  */
9
- import { afterEach, beforeEach, describe, expect, test, mock } from "bun:test";
9
+ import { afterEach, beforeEach, describe, expect, test, spyOn } from "bun:test";
10
10
  import * as fs from "node:fs";
11
11
  import { swarm_validate_decomposition } from "./swarm-decompose";
12
12
  import * as evalCapture from "./eval-capture.js";
@@ -41,15 +41,8 @@ afterEach(() => {
41
41
 
42
42
  describe("captureDecomposition integration", () => {
43
43
  test("calls captureDecomposition after successful validation with all params", async () => {
44
- // Mock captureDecomposition to spy on calls
45
- const captureDecompositionSpy = mock(() => ({
46
- id: "test-epic-123",
47
- timestamp: new Date().toISOString(),
48
- task: "Add user authentication",
49
- }));
50
- const original = evalCapture.captureDecomposition;
51
- // @ts-expect-error - mocking for test
52
- evalCapture.captureDecomposition = captureDecompositionSpy;
44
+ // Spy on captureDecomposition
45
+ const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
53
46
 
54
47
  const validCellTree = JSON.stringify({
55
48
  epic: {
@@ -91,27 +84,37 @@ describe("captureDecomposition integration", () => {
91
84
 
92
85
  // Verify captureDecomposition was called with correct params
93
86
  expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
94
- const callArgs = captureDecompositionSpy.mock.calls[0][0];
95
- expect(callArgs.epicId).toBe("test-epic-123");
96
- expect(callArgs.projectPath).toBe(testProjectPath);
97
- expect(callArgs.task).toBe("Add user authentication");
98
- expect(callArgs.context).toBe("Using NextAuth.js");
99
- expect(callArgs.strategy).toBe("feature-based");
100
- expect(callArgs.epicTitle).toBe("Add OAuth");
101
- expect(callArgs.epicDescription).toBe("Implement OAuth authentication");
102
- expect(callArgs.subtasks).toHaveLength(2);
103
- expect(callArgs.subtasks[0].title).toBe("Add OAuth provider config");
104
-
105
- // Restore
106
- // @ts-expect-error - restoring mock
107
- evalCapture.captureDecomposition = original;
87
+ expect(captureDecompositionSpy).toHaveBeenCalledWith({
88
+ epicId: "test-epic-123",
89
+ projectPath: testProjectPath,
90
+ task: "Add user authentication",
91
+ context: "Using NextAuth.js",
92
+ strategy: "feature-based",
93
+ epicTitle: "Add OAuth",
94
+ epicDescription: "Implement OAuth authentication",
95
+ subtasks: [
96
+ {
97
+ title: "Add OAuth provider config",
98
+ description: "Set up Google OAuth",
99
+ files: ["src/auth/google.ts", "src/auth/config.ts"],
100
+ dependencies: [],
101
+ estimated_complexity: 2,
102
+ },
103
+ {
104
+ title: "Add login UI",
105
+ description: "Create login button component",
106
+ files: ["src/components/LoginButton.tsx"],
107
+ dependencies: [0],
108
+ estimated_complexity: 1,
109
+ },
110
+ ],
111
+ });
112
+
113
+ captureDecompositionSpy.mockRestore();
108
114
  });
109
115
 
110
116
  test("does not call captureDecomposition when validation fails", async () => {
111
- const captureDecompositionSpy = mock(() => ({}));
112
- const original = evalCapture.captureDecomposition;
113
- // @ts-expect-error - mocking for test
114
- evalCapture.captureDecomposition = captureDecompositionSpy;
117
+ const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
115
118
 
116
119
  // Invalid CellTree - missing required fields
117
120
  const invalidCellTree = JSON.stringify({
@@ -136,20 +139,11 @@ describe("captureDecomposition integration", () => {
136
139
  // Verify captureDecomposition was NOT called
137
140
  expect(captureDecompositionSpy).not.toHaveBeenCalled();
138
141
 
139
- // Restore
140
- // @ts-expect-error - restoring mock
141
- evalCapture.captureDecomposition = original;
142
+ captureDecompositionSpy.mockRestore();
142
143
  });
143
144
 
144
145
  test("handles optional context and description fields", async () => {
145
- const captureDecompositionSpy = mock(() => ({
146
- id: "test-epic-789",
147
- timestamp: new Date().toISOString(),
148
- task: "Fix the auth bug",
149
- }));
150
- const original = evalCapture.captureDecomposition;
151
- // @ts-expect-error - mocking for test
152
- evalCapture.captureDecomposition = captureDecompositionSpy;
146
+ const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
153
147
 
154
148
  const validCellTree = JSON.stringify({
155
149
  epic: {
@@ -183,13 +177,12 @@ describe("captureDecomposition integration", () => {
183
177
 
184
178
  // Verify captureDecomposition was called without optional fields
185
179
  expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
186
- const callArgs = captureDecompositionSpy.mock.calls[0][0];
187
- expect(callArgs.epicId).toBe("test-epic-789");
188
- expect(callArgs.context).toBeUndefined();
189
- expect(callArgs.epicDescription).toBeUndefined();
190
-
191
- // Restore
192
- // @ts-expect-error - restoring mock
193
- evalCapture.captureDecomposition = original;
180
+ const call = captureDecompositionSpy.mock.calls[0];
181
+ expect(call[0].epicId).toBe("test-epic-789");
182
+ expect(call[0].context).toBeUndefined();
183
+ // Schema default makes description empty string instead of undefined
184
+ expect(call[0].epicDescription).toBe("");
185
+
186
+ captureDecompositionSpy.mockRestore();
194
187
  });
195
188
  });
@@ -753,7 +753,7 @@ export const swarm_delegate_planning = tool({
753
753
  .default(true)
754
754
  .describe("Query CASS for similar past tasks (default: true)"),
755
755
  },
756
- async execute(args) {
756
+ async execute(args, _ctx) {
757
757
  // Import needed modules
758
758
  const { selectStrategy, formatStrategyGuidelines } =
759
759
  await import("./swarm-strategies");
@@ -777,7 +777,7 @@ export const swarm_delegate_planning = tool({
777
777
  // Capture strategy selection decision
778
778
  try {
779
779
  captureCoordinatorEvent({
780
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
780
+ session_id: _ctx.sessionID || "unknown",
781
781
  epic_id: "planning", // No epic ID yet - this is pre-decomposition
782
782
  timestamp: new Date().toISOString(),
783
783
  event_type: "DECISION",
@@ -6,10 +6,13 @@
6
6
  * - Researcher spawning for identified technologies
7
7
  * - Summary collection from semantic-memory
8
8
  * - Research result aggregation
9
+ * - Eval capture integration (captureSubtaskOutcome wiring)
9
10
  */
10
11
 
11
- import { describe, test, expect, beforeEach } from "bun:test";
12
- import { runResearchPhase, extractTechStack } from "./swarm-orchestrate";
12
+ import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test";
13
+ import { runResearchPhase, extractTechStack, swarm_complete } from "./swarm-orchestrate";
14
+ import * as evalCapture from "./eval-capture.js";
15
+ import * as fs from "node:fs";
13
16
 
14
17
  describe("extractTechStack", () => {
15
18
  test("extracts Next.js from task description", () => {
@@ -115,9 +118,269 @@ describe("runResearchPhase", () => {
115
118
  });
116
119
  });
117
120
 
118
- describe("swarm_research_phase tool", () => {
119
- test.todo("exposes research phase as plugin tool");
120
- test.todo("validates task parameter");
121
- test.todo("validates project_path parameter");
122
- test.todo("returns JSON string with research results");
121
+ // describe("swarm_research_phase tool", () => {
122
+ // test.todo("exposes research phase as plugin tool");
123
+ // test.todo("validates task parameter");
124
+ // test.todo("validates project_path parameter");
125
+ // test.todo("returns JSON string with research results");
126
+ // });
127
+
128
+ // ============================================================================
129
+ // Eval Capture Integration Tests (swarm_complete)
130
+ // ============================================================================
131
+
132
+ describe("captureSubtaskOutcome integration", () => {
133
+ const mockContext = {
134
+ sessionID: `test-complete-${Date.now()}`,
135
+ messageID: `test-message-${Date.now()}`,
136
+ agent: "test-agent",
137
+ abort: new AbortController().signal,
138
+ };
139
+
140
+ let testProjectPath: string;
141
+
142
+ beforeEach(async () => {
143
+ testProjectPath = `/tmp/test-swarm-complete-${Date.now()}`;
144
+ fs.mkdirSync(testProjectPath, { recursive: true });
145
+
146
+ // Create .hive directory and issues.jsonl
147
+ const hiveDir = `${testProjectPath}/.hive`;
148
+ fs.mkdirSync(hiveDir, { recursive: true });
149
+ fs.writeFileSync(`${hiveDir}/issues.jsonl`, "", "utf-8");
150
+
151
+ // Set hive working directory to testProjectPath
152
+ const { setHiveWorkingDirectory } = await import("./hive");
153
+ setHiveWorkingDirectory(testProjectPath);
154
+ });
155
+
156
+ afterEach(() => {
157
+ if (fs.existsSync(testProjectPath)) {
158
+ fs.rmSync(testProjectPath, { recursive: true, force: true });
159
+ }
160
+ });
161
+
162
+ test("calls captureSubtaskOutcome after successful completion with all params", async () => {
163
+ // Import hive tools
164
+ const { hive_create_epic } = await import("./hive");
165
+
166
+ // Spy on captureSubtaskOutcome
167
+ const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
168
+
169
+ // Create an epic with a subtask using hive_create_epic
170
+ const epicResult = await hive_create_epic.execute({
171
+ epic_title: "Add OAuth",
172
+ epic_description: "Implement OAuth authentication",
173
+ subtasks: [
174
+ {
175
+ title: "Add auth service",
176
+ priority: 2,
177
+ files: ["src/auth/service.ts", "src/auth/schema.ts"],
178
+ },
179
+ ],
180
+ }, mockContext);
181
+
182
+ const epicData = JSON.parse(epicResult);
183
+ expect(epicData.success).toBe(true);
184
+
185
+ const epicId = epicData.epic.id;
186
+ const beadId = epicData.subtasks[0].id;
187
+
188
+ const startTime = Date.now() - 120000; // Started 2 minutes ago
189
+ const plannedFiles = ["src/auth/service.ts", "src/auth/schema.ts"];
190
+ const actualFiles = ["src/auth/service.ts", "src/auth/schema.ts", "src/auth/types.ts"];
191
+
192
+ // Call swarm_complete
193
+ const result = await swarm_complete.execute(
194
+ {
195
+ project_key: testProjectPath,
196
+ agent_name: "TestAgent",
197
+ bead_id: beadId,
198
+ summary: "Implemented OAuth service with JWT strategy",
199
+ files_touched: actualFiles,
200
+ skip_verification: true, // Skip verification for test
201
+ skip_review: true, // Skip review for test
202
+ planned_files: plannedFiles,
203
+ start_time: startTime,
204
+ error_count: 0,
205
+ retry_count: 0,
206
+ },
207
+ mockContext,
208
+ );
209
+
210
+ const parsed = JSON.parse(result);
211
+ expect(parsed.success).toBe(true);
212
+
213
+ // Verify captureSubtaskOutcome was called with correct params
214
+ expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
215
+
216
+ const call = captureOutcomeSpy.mock.calls[0][0];
217
+ expect(call.epicId).toBe(epicId);
218
+ expect(call.projectPath).toBe(testProjectPath);
219
+ expect(call.beadId).toBe(beadId);
220
+ expect(call.title).toBe("Add auth service");
221
+ expect(call.plannedFiles).toEqual(plannedFiles);
222
+ expect(call.actualFiles).toEqual(actualFiles);
223
+ expect(call.durationMs).toBeGreaterThan(0);
224
+ expect(call.errorCount).toBe(0);
225
+ expect(call.retryCount).toBe(0);
226
+ expect(call.success).toBe(true);
227
+
228
+ captureOutcomeSpy.mockRestore();
229
+ });
230
+
231
+ test("does not call captureSubtaskOutcome when required params missing", async () => {
232
+ const { hive_create_epic } = await import("./hive");
233
+ const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
234
+
235
+ // Create an epic with a subtask
236
+ const epicResult = await hive_create_epic.execute({
237
+ epic_title: "Fix bug",
238
+ subtasks: [
239
+ {
240
+ title: "Fix auth bug",
241
+ priority: 1,
242
+ files: ["src/auth.ts"],
243
+ },
244
+ ],
245
+ }, mockContext);
246
+
247
+ const epicData = JSON.parse(epicResult);
248
+ const beadId = epicData.subtasks[0].id;
249
+
250
+ // Call without planned_files or start_time
251
+ const result = await swarm_complete.execute(
252
+ {
253
+ project_key: testProjectPath,
254
+ agent_name: "TestAgent",
255
+ bead_id: beadId,
256
+ summary: "Fixed the bug",
257
+ skip_verification: true,
258
+ skip_review: true,
259
+ // No planned_files, start_time
260
+ },
261
+ mockContext,
262
+ );
263
+
264
+ const parsed = JSON.parse(result);
265
+ expect(parsed.success).toBe(true);
266
+
267
+ // Capture should still be called, but with default values
268
+ // (The function is called in all success cases, it just handles missing params)
269
+ expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
270
+
271
+ captureOutcomeSpy.mockRestore();
272
+ });
273
+ });
274
+
275
+ // ============================================================================
276
+ // Eval Capture Integration Tests (swarm_record_outcome)
277
+ // ============================================================================
278
+
279
+ describe("finalizeEvalRecord integration", () => {
280
+ const mockContext = {
281
+ sessionID: `test-finalize-${Date.now()}`,
282
+ messageID: `test-message-${Date.now()}`,
283
+ agent: "test-agent",
284
+ abort: new AbortController().signal,
285
+ };
286
+
287
+ test("calls finalizeEvalRecord when project_path and epic_id provided", async () => {
288
+ const { swarm_record_outcome } = await import("./swarm-orchestrate");
289
+
290
+ // Spy on finalizeEvalRecord
291
+ const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
292
+ finalizeEvalSpy.mockReturnValue(null); // Mock return value
293
+
294
+ const testProjectPath = "/tmp/test-project";
295
+ const testEpicId = "bd-test123";
296
+ const testBeadId = `${testEpicId}.0`;
297
+
298
+ // Call swarm_record_outcome with epic_id and project_path
299
+ await swarm_record_outcome.execute({
300
+ bead_id: testBeadId,
301
+ duration_ms: 120000,
302
+ error_count: 0,
303
+ retry_count: 0,
304
+ success: true,
305
+ files_touched: ["src/test.ts"],
306
+ epic_id: testEpicId,
307
+ project_path: testProjectPath,
308
+ }, mockContext);
309
+
310
+ // Verify finalizeEvalRecord was called
311
+ expect(finalizeEvalSpy).toHaveBeenCalledTimes(1);
312
+ expect(finalizeEvalSpy).toHaveBeenCalledWith({
313
+ epicId: testEpicId,
314
+ projectPath: testProjectPath,
315
+ });
316
+
317
+ finalizeEvalSpy.mockRestore();
318
+ });
319
+
320
+ test("does not call finalizeEvalRecord when epic_id or project_path missing", async () => {
321
+ const { swarm_record_outcome } = await import("./swarm-orchestrate");
322
+
323
+ // Spy on finalizeEvalRecord
324
+ const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
325
+
326
+ const testBeadId = "bd-test123.0";
327
+
328
+ // Call without epic_id or project_path
329
+ await swarm_record_outcome.execute({
330
+ bead_id: testBeadId,
331
+ duration_ms: 120000,
332
+ error_count: 0,
333
+ retry_count: 0,
334
+ success: true,
335
+ }, mockContext);
336
+
337
+ // Verify finalizeEvalRecord was NOT called
338
+ expect(finalizeEvalSpy).toHaveBeenCalledTimes(0);
339
+
340
+ finalizeEvalSpy.mockRestore();
341
+ });
342
+
343
+ test("includes finalized record in response when available", async () => {
344
+ const { swarm_record_outcome } = await import("./swarm-orchestrate");
345
+
346
+ // Mock finalizeEvalRecord to return a record
347
+ const mockFinalRecord = {
348
+ id: "bd-test123",
349
+ timestamp: new Date().toISOString(),
350
+ project_path: "/tmp/test-project",
351
+ task: "Test task",
352
+ strategy: "file-based" as const,
353
+ subtask_count: 2,
354
+ epic_title: "Test Epic",
355
+ subtasks: [],
356
+ overall_success: true,
357
+ total_duration_ms: 240000,
358
+ total_errors: 0,
359
+ };
360
+
361
+ const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
362
+ finalizeEvalSpy.mockReturnValue(mockFinalRecord);
363
+
364
+ const testProjectPath = "/tmp/test-project";
365
+ const testEpicId = "bd-test123";
366
+ const testBeadId = `${testEpicId}.0`;
367
+
368
+ // Call with epic_id and project_path
369
+ const result = await swarm_record_outcome.execute({
370
+ bead_id: testBeadId,
371
+ duration_ms: 120000,
372
+ error_count: 0,
373
+ retry_count: 0,
374
+ success: true,
375
+ epic_id: testEpicId,
376
+ project_path: testProjectPath,
377
+ }, mockContext);
378
+
379
+ // Parse result and check for finalized record
380
+ const parsed = JSON.parse(result);
381
+ expect(parsed).toHaveProperty("finalized_eval_record");
382
+ expect(parsed.finalized_eval_record).toEqual(mockFinalRecord);
383
+
384
+ finalizeEvalSpy.mockRestore();
385
+ });
123
386
  });