opencode-swarm-plugin 0.37.0 → 0.39.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +20 -5
- package/.hive/memories.jsonl +35 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/.turbo/turbo-build.log +4 -4
- package/.turbo/turbo-test.log +319 -319
- package/CHANGELOG.md +258 -0
- package/README.md +50 -0
- package/bin/swarm.test.ts +475 -0
- package/bin/swarm.ts +385 -208
- package/dist/compaction-hook.d.ts +1 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +81 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/hive.d.ts +59 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +87 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +823 -131
- package/dist/plugin.js +655 -131
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/swarm-decompose.d.ts +30 -0
- package/dist/swarm-decompose.d.ts.map +1 -1
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +19 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +595 -94
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
- package/evals/scorers/coordinator-discipline.ts +13 -13
- package/examples/plugin-wrapper-template.ts +177 -8
- package/package.json +7 -2
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +139 -2
- package/src/compaction-hook.ts +113 -2
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +422 -0
- package/src/eval-capture.ts +94 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/index.ts +61 -1
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.test.ts +40 -47
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.test.ts +270 -7
- package/src/swarm-orchestrate.ts +100 -13
- package/src/swarm-prompts.test.ts +121 -0
- package/src/swarm-prompts.ts +297 -4
- package/src/swarm-research.integration.test.ts +157 -0
- package/src/swarm-review.ts +3 -3
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-Compaction Tool Call Tracker
|
|
3
|
+
*
|
|
4
|
+
* Tracks tool calls after compaction resumption to detect coordinator violations
|
|
5
|
+
* and provide learning signals for eval-driven development.
|
|
6
|
+
*
|
|
7
|
+
* ## Purpose
|
|
8
|
+
*
|
|
9
|
+
* When context is compacted, the continuation agent needs observation to learn
|
|
10
|
+
* if it's following coordinator discipline. This tracker:
|
|
11
|
+
*
|
|
12
|
+
* 1. Emits resumption_started on first tool call (marks compaction exit)
|
|
13
|
+
* 2. Tracks up to N tool calls (default 20) with violation detection
|
|
14
|
+
* 3. Stops tracking after limit to avoid noise in long sessions
|
|
15
|
+
*
|
|
16
|
+
* ## Coordinator Violations Detected
|
|
17
|
+
*
|
|
18
|
+
* - **Edit/Write**: Coordinators NEVER edit files - spawn worker instead
|
|
19
|
+
* - **swarmmail_reserve/agentmail_reserve**: Workers reserve, not coordinators
|
|
20
|
+
*
|
|
21
|
+
* ## Integration
|
|
22
|
+
*
|
|
23
|
+
* Used by compaction hook to wire tool.call events → eval capture.
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const tracker = createPostCompactionTracker({
|
|
28
|
+
* sessionId: "session-123",
|
|
29
|
+
* epicId: "bd-epic-456",
|
|
30
|
+
* onEvent: captureCompactionEvent,
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* // Wire to OpenCode hook
|
|
34
|
+
* hooks["tool.call"] = (input) => {
|
|
35
|
+
* tracker.trackToolCall({
|
|
36
|
+
* tool: input.tool,
|
|
37
|
+
* args: input.args,
|
|
38
|
+
* timestamp: Date.now(),
|
|
39
|
+
* });
|
|
40
|
+
* };
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Tool call event structure
|
|
46
|
+
*/
|
|
47
|
+
export interface ToolCallEvent {
|
|
48
|
+
tool: string;
|
|
49
|
+
args: Record<string, unknown>;
|
|
50
|
+
timestamp: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compaction event payload (matches eval-capture.ts structure)
|
|
55
|
+
*/
|
|
56
|
+
export interface CompactionEvent {
|
|
57
|
+
session_id: string;
|
|
58
|
+
epic_id: string;
|
|
59
|
+
compaction_type:
|
|
60
|
+
| "detection_complete"
|
|
61
|
+
| "prompt_generated"
|
|
62
|
+
| "context_injected"
|
|
63
|
+
| "resumption_started"
|
|
64
|
+
| "tool_call_tracked";
|
|
65
|
+
payload: {
|
|
66
|
+
session_id?: string;
|
|
67
|
+
epic_id?: string;
|
|
68
|
+
tool?: string;
|
|
69
|
+
args?: Record<string, unknown>;
|
|
70
|
+
call_number?: number;
|
|
71
|
+
is_coordinator_violation?: boolean;
|
|
72
|
+
violation_reason?: string;
|
|
73
|
+
timestamp?: number;
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Tracker configuration
|
|
79
|
+
*/
|
|
80
|
+
export interface PostCompactionTrackerConfig {
|
|
81
|
+
sessionId: string;
|
|
82
|
+
epicId: string;
|
|
83
|
+
onEvent: (event: CompactionEvent) => void;
|
|
84
|
+
maxCalls?: number;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Post-compaction tracker instance
|
|
89
|
+
*/
|
|
90
|
+
export interface PostCompactionTracker {
|
|
91
|
+
trackToolCall(event: ToolCallEvent): void;
|
|
92
|
+
isTracking(): boolean;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// Constants
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Default maximum number of tool calls to track
|
|
101
|
+
*
|
|
102
|
+
* Chosen to balance:
|
|
103
|
+
* - Enough data for pattern detection (20 calls is ~2-3 minutes of coordinator work)
|
|
104
|
+
* - Avoiding noise pollution in long sessions
|
|
105
|
+
*/
|
|
106
|
+
export const DEFAULT_MAX_TRACKED_CALLS = 20;
|
|
107
|
+
|
|
108
|
+
// ============================================================================
|
|
109
|
+
// Coordinator Violation Detection
|
|
110
|
+
// ============================================================================
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Tools that coordinators are NEVER allowed to use
|
|
114
|
+
*
|
|
115
|
+
* Key insight from semantic memory: coordinators lose identity after compaction
|
|
116
|
+
* and start doing implementation work. These violations are observable signals
|
|
117
|
+
* that the coordinator mandate wasn't preserved in continuation prompt.
|
|
118
|
+
*/
|
|
119
|
+
const FORBIDDEN_COORDINATOR_TOOLS: Record<string, string> = {
|
|
120
|
+
edit: "Coordinators NEVER edit files - spawn worker instead",
|
|
121
|
+
write: "Coordinators NEVER write files - spawn worker instead",
|
|
122
|
+
swarmmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
|
|
123
|
+
agentmail_reserve: "Coordinators NEVER reserve files - workers reserve files",
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Check if tool call is a coordinator violation
|
|
128
|
+
*
|
|
129
|
+
* @param tool - Tool name from OpenCode tool.call hook
|
|
130
|
+
* @returns Violation status with reason if forbidden
|
|
131
|
+
*
|
|
132
|
+
* @example
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const result = isCoordinatorViolation("edit");
|
|
135
|
+
* // { isViolation: true, reason: "Coordinators NEVER edit..." }
|
|
136
|
+
*
|
|
137
|
+
* const result = isCoordinatorViolation("read");
|
|
138
|
+
* // { isViolation: false }
|
|
139
|
+
* ```
|
|
140
|
+
*/
|
|
141
|
+
export function isCoordinatorViolation(tool: string): {
|
|
142
|
+
isViolation: boolean;
|
|
143
|
+
reason?: string;
|
|
144
|
+
} {
|
|
145
|
+
const reason = FORBIDDEN_COORDINATOR_TOOLS[tool];
|
|
146
|
+
return {
|
|
147
|
+
isViolation: !!reason,
|
|
148
|
+
reason,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ============================================================================
|
|
153
|
+
// Tracker Factory
|
|
154
|
+
// ============================================================================
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Create a post-compaction tool call tracker
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```typescript
|
|
161
|
+
* const tracker = createPostCompactionTracker({
|
|
162
|
+
* sessionId: "session-123",
|
|
163
|
+
* epicId: "bd-epic-456",
|
|
164
|
+
* onEvent: (event) => captureCompactionEvent(event),
|
|
165
|
+
* maxCalls: 20
|
|
166
|
+
* });
|
|
167
|
+
*
|
|
168
|
+
* // Track tool calls
|
|
169
|
+
* tracker.trackToolCall({
|
|
170
|
+
* tool: "read",
|
|
171
|
+
* args: { filePath: "/test.ts" },
|
|
172
|
+
* timestamp: Date.now()
|
|
173
|
+
* });
|
|
174
|
+
* ```
|
|
175
|
+
*/
|
|
176
|
+
export function createPostCompactionTracker(
|
|
177
|
+
config: PostCompactionTrackerConfig,
|
|
178
|
+
): PostCompactionTracker {
|
|
179
|
+
const {
|
|
180
|
+
sessionId,
|
|
181
|
+
epicId,
|
|
182
|
+
onEvent,
|
|
183
|
+
maxCalls = DEFAULT_MAX_TRACKED_CALLS,
|
|
184
|
+
} = config;
|
|
185
|
+
|
|
186
|
+
let callCount = 0;
|
|
187
|
+
let resumptionEmitted = false;
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
trackToolCall(event: ToolCallEvent): void {
|
|
191
|
+
// Stop tracking after max calls reached
|
|
192
|
+
if (callCount >= maxCalls) {
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Emit resumption_started on first call
|
|
197
|
+
if (!resumptionEmitted) {
|
|
198
|
+
onEvent({
|
|
199
|
+
session_id: sessionId,
|
|
200
|
+
epic_id: epicId,
|
|
201
|
+
compaction_type: "resumption_started",
|
|
202
|
+
payload: {
|
|
203
|
+
session_id: sessionId,
|
|
204
|
+
epic_id: epicId,
|
|
205
|
+
timestamp: event.timestamp,
|
|
206
|
+
},
|
|
207
|
+
});
|
|
208
|
+
resumptionEmitted = true;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Increment before emitting so call_number is 1-based
|
|
212
|
+
callCount++;
|
|
213
|
+
|
|
214
|
+
// Check for coordinator violations
|
|
215
|
+
const violation = isCoordinatorViolation(event.tool);
|
|
216
|
+
|
|
217
|
+
// Emit tool_call_tracked event
|
|
218
|
+
onEvent({
|
|
219
|
+
session_id: sessionId,
|
|
220
|
+
epic_id: epicId,
|
|
221
|
+
compaction_type: "tool_call_tracked",
|
|
222
|
+
payload: {
|
|
223
|
+
tool: event.tool,
|
|
224
|
+
args: event.args,
|
|
225
|
+
call_number: callCount,
|
|
226
|
+
is_coordinator_violation: violation.isViolation,
|
|
227
|
+
violation_reason: violation.reason,
|
|
228
|
+
timestamp: event.timestamp,
|
|
229
|
+
},
|
|
230
|
+
});
|
|
231
|
+
},
|
|
232
|
+
|
|
233
|
+
isTracking(): boolean {
|
|
234
|
+
return callCount < maxCalls;
|
|
235
|
+
},
|
|
236
|
+
};
|
|
237
|
+
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* TDD: Testing eval capture integration - verifies captureDecomposition() is called
|
|
7
7
|
* after successful validation with correct parameters.
|
|
8
8
|
*/
|
|
9
|
-
import { afterEach, beforeEach, describe, expect, test,
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, test, spyOn } from "bun:test";
|
|
10
10
|
import * as fs from "node:fs";
|
|
11
11
|
import { swarm_validate_decomposition } from "./swarm-decompose";
|
|
12
12
|
import * as evalCapture from "./eval-capture.js";
|
|
@@ -41,15 +41,8 @@ afterEach(() => {
|
|
|
41
41
|
|
|
42
42
|
describe("captureDecomposition integration", () => {
|
|
43
43
|
test("calls captureDecomposition after successful validation with all params", async () => {
|
|
44
|
-
//
|
|
45
|
-
const captureDecompositionSpy =
|
|
46
|
-
id: "test-epic-123",
|
|
47
|
-
timestamp: new Date().toISOString(),
|
|
48
|
-
task: "Add user authentication",
|
|
49
|
-
}));
|
|
50
|
-
const original = evalCapture.captureDecomposition;
|
|
51
|
-
// @ts-expect-error - mocking for test
|
|
52
|
-
evalCapture.captureDecomposition = captureDecompositionSpy;
|
|
44
|
+
// Spy on captureDecomposition
|
|
45
|
+
const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
|
|
53
46
|
|
|
54
47
|
const validCellTree = JSON.stringify({
|
|
55
48
|
epic: {
|
|
@@ -91,27 +84,37 @@ describe("captureDecomposition integration", () => {
|
|
|
91
84
|
|
|
92
85
|
// Verify captureDecomposition was called with correct params
|
|
93
86
|
expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
87
|
+
expect(captureDecompositionSpy).toHaveBeenCalledWith({
|
|
88
|
+
epicId: "test-epic-123",
|
|
89
|
+
projectPath: testProjectPath,
|
|
90
|
+
task: "Add user authentication",
|
|
91
|
+
context: "Using NextAuth.js",
|
|
92
|
+
strategy: "feature-based",
|
|
93
|
+
epicTitle: "Add OAuth",
|
|
94
|
+
epicDescription: "Implement OAuth authentication",
|
|
95
|
+
subtasks: [
|
|
96
|
+
{
|
|
97
|
+
title: "Add OAuth provider config",
|
|
98
|
+
description: "Set up Google OAuth",
|
|
99
|
+
files: ["src/auth/google.ts", "src/auth/config.ts"],
|
|
100
|
+
dependencies: [],
|
|
101
|
+
estimated_complexity: 2,
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
title: "Add login UI",
|
|
105
|
+
description: "Create login button component",
|
|
106
|
+
files: ["src/components/LoginButton.tsx"],
|
|
107
|
+
dependencies: [0],
|
|
108
|
+
estimated_complexity: 1,
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
captureDecompositionSpy.mockRestore();
|
|
108
114
|
});
|
|
109
115
|
|
|
110
116
|
test("does not call captureDecomposition when validation fails", async () => {
|
|
111
|
-
const captureDecompositionSpy =
|
|
112
|
-
const original = evalCapture.captureDecomposition;
|
|
113
|
-
// @ts-expect-error - mocking for test
|
|
114
|
-
evalCapture.captureDecomposition = captureDecompositionSpy;
|
|
117
|
+
const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
|
|
115
118
|
|
|
116
119
|
// Invalid CellTree - missing required fields
|
|
117
120
|
const invalidCellTree = JSON.stringify({
|
|
@@ -136,20 +139,11 @@ describe("captureDecomposition integration", () => {
|
|
|
136
139
|
// Verify captureDecomposition was NOT called
|
|
137
140
|
expect(captureDecompositionSpy).not.toHaveBeenCalled();
|
|
138
141
|
|
|
139
|
-
|
|
140
|
-
// @ts-expect-error - restoring mock
|
|
141
|
-
evalCapture.captureDecomposition = original;
|
|
142
|
+
captureDecompositionSpy.mockRestore();
|
|
142
143
|
});
|
|
143
144
|
|
|
144
145
|
test("handles optional context and description fields", async () => {
|
|
145
|
-
const captureDecompositionSpy =
|
|
146
|
-
id: "test-epic-789",
|
|
147
|
-
timestamp: new Date().toISOString(),
|
|
148
|
-
task: "Fix the auth bug",
|
|
149
|
-
}));
|
|
150
|
-
const original = evalCapture.captureDecomposition;
|
|
151
|
-
// @ts-expect-error - mocking for test
|
|
152
|
-
evalCapture.captureDecomposition = captureDecompositionSpy;
|
|
146
|
+
const captureDecompositionSpy = spyOn(evalCapture, "captureDecomposition");
|
|
153
147
|
|
|
154
148
|
const validCellTree = JSON.stringify({
|
|
155
149
|
epic: {
|
|
@@ -183,13 +177,12 @@ describe("captureDecomposition integration", () => {
|
|
|
183
177
|
|
|
184
178
|
// Verify captureDecomposition was called without optional fields
|
|
185
179
|
expect(captureDecompositionSpy).toHaveBeenCalledTimes(1);
|
|
186
|
-
const
|
|
187
|
-
expect(
|
|
188
|
-
expect(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
evalCapture.captureDecomposition = original;
|
|
180
|
+
const call = captureDecompositionSpy.mock.calls[0];
|
|
181
|
+
expect(call[0].epicId).toBe("test-epic-789");
|
|
182
|
+
expect(call[0].context).toBeUndefined();
|
|
183
|
+
// Schema default makes description empty string instead of undefined
|
|
184
|
+
expect(call[0].epicDescription).toBe("");
|
|
185
|
+
|
|
186
|
+
captureDecompositionSpy.mockRestore();
|
|
194
187
|
});
|
|
195
188
|
});
|
package/src/swarm-decompose.ts
CHANGED
|
@@ -753,7 +753,7 @@ export const swarm_delegate_planning = tool({
|
|
|
753
753
|
.default(true)
|
|
754
754
|
.describe("Query CASS for similar past tasks (default: true)"),
|
|
755
755
|
},
|
|
756
|
-
async execute(args) {
|
|
756
|
+
async execute(args, _ctx) {
|
|
757
757
|
// Import needed modules
|
|
758
758
|
const { selectStrategy, formatStrategyGuidelines } =
|
|
759
759
|
await import("./swarm-strategies");
|
|
@@ -777,7 +777,7 @@ export const swarm_delegate_planning = tool({
|
|
|
777
777
|
// Capture strategy selection decision
|
|
778
778
|
try {
|
|
779
779
|
captureCoordinatorEvent({
|
|
780
|
-
session_id:
|
|
780
|
+
session_id: _ctx.sessionID || "unknown",
|
|
781
781
|
epic_id: "planning", // No epic ID yet - this is pre-decomposition
|
|
782
782
|
timestamp: new Date().toISOString(),
|
|
783
783
|
event_type: "DECISION",
|
|
@@ -6,10 +6,13 @@
|
|
|
6
6
|
* - Researcher spawning for identified technologies
|
|
7
7
|
* - Summary collection from semantic-memory
|
|
8
8
|
* - Research result aggregation
|
|
9
|
+
* - Eval capture integration (captureSubtaskOutcome wiring)
|
|
9
10
|
*/
|
|
10
11
|
|
|
11
|
-
import { describe, test, expect, beforeEach } from "bun:test";
|
|
12
|
-
import { runResearchPhase, extractTechStack } from "./swarm-orchestrate";
|
|
12
|
+
import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test";
|
|
13
|
+
import { runResearchPhase, extractTechStack, swarm_complete } from "./swarm-orchestrate";
|
|
14
|
+
import * as evalCapture from "./eval-capture.js";
|
|
15
|
+
import * as fs from "node:fs";
|
|
13
16
|
|
|
14
17
|
describe("extractTechStack", () => {
|
|
15
18
|
test("extracts Next.js from task description", () => {
|
|
@@ -115,9 +118,269 @@ describe("runResearchPhase", () => {
|
|
|
115
118
|
});
|
|
116
119
|
});
|
|
117
120
|
|
|
118
|
-
describe("swarm_research_phase tool", () => {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
// describe("swarm_research_phase tool", () => {
|
|
122
|
+
// test.todo("exposes research phase as plugin tool");
|
|
123
|
+
// test.todo("validates task parameter");
|
|
124
|
+
// test.todo("validates project_path parameter");
|
|
125
|
+
// test.todo("returns JSON string with research results");
|
|
126
|
+
// });
|
|
127
|
+
|
|
128
|
+
// ============================================================================
|
|
129
|
+
// Eval Capture Integration Tests (swarm_complete)
|
|
130
|
+
// ============================================================================
|
|
131
|
+
|
|
132
|
+
describe("captureSubtaskOutcome integration", () => {
|
|
133
|
+
const mockContext = {
|
|
134
|
+
sessionID: `test-complete-${Date.now()}`,
|
|
135
|
+
messageID: `test-message-${Date.now()}`,
|
|
136
|
+
agent: "test-agent",
|
|
137
|
+
abort: new AbortController().signal,
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
let testProjectPath: string;
|
|
141
|
+
|
|
142
|
+
beforeEach(async () => {
|
|
143
|
+
testProjectPath = `/tmp/test-swarm-complete-${Date.now()}`;
|
|
144
|
+
fs.mkdirSync(testProjectPath, { recursive: true });
|
|
145
|
+
|
|
146
|
+
// Create .hive directory and issues.jsonl
|
|
147
|
+
const hiveDir = `${testProjectPath}/.hive`;
|
|
148
|
+
fs.mkdirSync(hiveDir, { recursive: true });
|
|
149
|
+
fs.writeFileSync(`${hiveDir}/issues.jsonl`, "", "utf-8");
|
|
150
|
+
|
|
151
|
+
// Set hive working directory to testProjectPath
|
|
152
|
+
const { setHiveWorkingDirectory } = await import("./hive");
|
|
153
|
+
setHiveWorkingDirectory(testProjectPath);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
afterEach(() => {
|
|
157
|
+
if (fs.existsSync(testProjectPath)) {
|
|
158
|
+
fs.rmSync(testProjectPath, { recursive: true, force: true });
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test("calls captureSubtaskOutcome after successful completion with all params", async () => {
|
|
163
|
+
// Import hive tools
|
|
164
|
+
const { hive_create_epic } = await import("./hive");
|
|
165
|
+
|
|
166
|
+
// Spy on captureSubtaskOutcome
|
|
167
|
+
const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
|
|
168
|
+
|
|
169
|
+
// Create an epic with a subtask using hive_create_epic
|
|
170
|
+
const epicResult = await hive_create_epic.execute({
|
|
171
|
+
epic_title: "Add OAuth",
|
|
172
|
+
epic_description: "Implement OAuth authentication",
|
|
173
|
+
subtasks: [
|
|
174
|
+
{
|
|
175
|
+
title: "Add auth service",
|
|
176
|
+
priority: 2,
|
|
177
|
+
files: ["src/auth/service.ts", "src/auth/schema.ts"],
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
}, mockContext);
|
|
181
|
+
|
|
182
|
+
const epicData = JSON.parse(epicResult);
|
|
183
|
+
expect(epicData.success).toBe(true);
|
|
184
|
+
|
|
185
|
+
const epicId = epicData.epic.id;
|
|
186
|
+
const beadId = epicData.subtasks[0].id;
|
|
187
|
+
|
|
188
|
+
const startTime = Date.now() - 120000; // Started 2 minutes ago
|
|
189
|
+
const plannedFiles = ["src/auth/service.ts", "src/auth/schema.ts"];
|
|
190
|
+
const actualFiles = ["src/auth/service.ts", "src/auth/schema.ts", "src/auth/types.ts"];
|
|
191
|
+
|
|
192
|
+
// Call swarm_complete
|
|
193
|
+
const result = await swarm_complete.execute(
|
|
194
|
+
{
|
|
195
|
+
project_key: testProjectPath,
|
|
196
|
+
agent_name: "TestAgent",
|
|
197
|
+
bead_id: beadId,
|
|
198
|
+
summary: "Implemented OAuth service with JWT strategy",
|
|
199
|
+
files_touched: actualFiles,
|
|
200
|
+
skip_verification: true, // Skip verification for test
|
|
201
|
+
skip_review: true, // Skip review for test
|
|
202
|
+
planned_files: plannedFiles,
|
|
203
|
+
start_time: startTime,
|
|
204
|
+
error_count: 0,
|
|
205
|
+
retry_count: 0,
|
|
206
|
+
},
|
|
207
|
+
mockContext,
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
const parsed = JSON.parse(result);
|
|
211
|
+
expect(parsed.success).toBe(true);
|
|
212
|
+
|
|
213
|
+
// Verify captureSubtaskOutcome was called with correct params
|
|
214
|
+
expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
|
|
215
|
+
|
|
216
|
+
const call = captureOutcomeSpy.mock.calls[0][0];
|
|
217
|
+
expect(call.epicId).toBe(epicId);
|
|
218
|
+
expect(call.projectPath).toBe(testProjectPath);
|
|
219
|
+
expect(call.beadId).toBe(beadId);
|
|
220
|
+
expect(call.title).toBe("Add auth service");
|
|
221
|
+
expect(call.plannedFiles).toEqual(plannedFiles);
|
|
222
|
+
expect(call.actualFiles).toEqual(actualFiles);
|
|
223
|
+
expect(call.durationMs).toBeGreaterThan(0);
|
|
224
|
+
expect(call.errorCount).toBe(0);
|
|
225
|
+
expect(call.retryCount).toBe(0);
|
|
226
|
+
expect(call.success).toBe(true);
|
|
227
|
+
|
|
228
|
+
captureOutcomeSpy.mockRestore();
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
test("does not call captureSubtaskOutcome when required params missing", async () => {
|
|
232
|
+
const { hive_create_epic } = await import("./hive");
|
|
233
|
+
const captureOutcomeSpy = spyOn(evalCapture, "captureSubtaskOutcome");
|
|
234
|
+
|
|
235
|
+
// Create an epic with a subtask
|
|
236
|
+
const epicResult = await hive_create_epic.execute({
|
|
237
|
+
epic_title: "Fix bug",
|
|
238
|
+
subtasks: [
|
|
239
|
+
{
|
|
240
|
+
title: "Fix auth bug",
|
|
241
|
+
priority: 1,
|
|
242
|
+
files: ["src/auth.ts"],
|
|
243
|
+
},
|
|
244
|
+
],
|
|
245
|
+
}, mockContext);
|
|
246
|
+
|
|
247
|
+
const epicData = JSON.parse(epicResult);
|
|
248
|
+
const beadId = epicData.subtasks[0].id;
|
|
249
|
+
|
|
250
|
+
// Call without planned_files or start_time
|
|
251
|
+
const result = await swarm_complete.execute(
|
|
252
|
+
{
|
|
253
|
+
project_key: testProjectPath,
|
|
254
|
+
agent_name: "TestAgent",
|
|
255
|
+
bead_id: beadId,
|
|
256
|
+
summary: "Fixed the bug",
|
|
257
|
+
skip_verification: true,
|
|
258
|
+
skip_review: true,
|
|
259
|
+
// No planned_files, start_time
|
|
260
|
+
},
|
|
261
|
+
mockContext,
|
|
262
|
+
);
|
|
263
|
+
|
|
264
|
+
const parsed = JSON.parse(result);
|
|
265
|
+
expect(parsed.success).toBe(true);
|
|
266
|
+
|
|
267
|
+
// Capture should still be called, but with default values
|
|
268
|
+
// (The function is called in all success cases, it just handles missing params)
|
|
269
|
+
expect(captureOutcomeSpy).toHaveBeenCalledTimes(1);
|
|
270
|
+
|
|
271
|
+
captureOutcomeSpy.mockRestore();
|
|
272
|
+
});
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
// ============================================================================
|
|
276
|
+
// Eval Capture Integration Tests (swarm_record_outcome)
|
|
277
|
+
// ============================================================================
|
|
278
|
+
|
|
279
|
+
describe("finalizeEvalRecord integration", () => {
|
|
280
|
+
const mockContext = {
|
|
281
|
+
sessionID: `test-finalize-${Date.now()}`,
|
|
282
|
+
messageID: `test-message-${Date.now()}`,
|
|
283
|
+
agent: "test-agent",
|
|
284
|
+
abort: new AbortController().signal,
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
test("calls finalizeEvalRecord when project_path and epic_id provided", async () => {
|
|
288
|
+
const { swarm_record_outcome } = await import("./swarm-orchestrate");
|
|
289
|
+
|
|
290
|
+
// Spy on finalizeEvalRecord
|
|
291
|
+
const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
|
|
292
|
+
finalizeEvalSpy.mockReturnValue(null); // Mock return value
|
|
293
|
+
|
|
294
|
+
const testProjectPath = "/tmp/test-project";
|
|
295
|
+
const testEpicId = "bd-test123";
|
|
296
|
+
const testBeadId = `${testEpicId}.0`;
|
|
297
|
+
|
|
298
|
+
// Call swarm_record_outcome with epic_id and project_path
|
|
299
|
+
await swarm_record_outcome.execute({
|
|
300
|
+
bead_id: testBeadId,
|
|
301
|
+
duration_ms: 120000,
|
|
302
|
+
error_count: 0,
|
|
303
|
+
retry_count: 0,
|
|
304
|
+
success: true,
|
|
305
|
+
files_touched: ["src/test.ts"],
|
|
306
|
+
epic_id: testEpicId,
|
|
307
|
+
project_path: testProjectPath,
|
|
308
|
+
}, mockContext);
|
|
309
|
+
|
|
310
|
+
// Verify finalizeEvalRecord was called
|
|
311
|
+
expect(finalizeEvalSpy).toHaveBeenCalledTimes(1);
|
|
312
|
+
expect(finalizeEvalSpy).toHaveBeenCalledWith({
|
|
313
|
+
epicId: testEpicId,
|
|
314
|
+
projectPath: testProjectPath,
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
finalizeEvalSpy.mockRestore();
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
test("does not call finalizeEvalRecord when epic_id or project_path missing", async () => {
|
|
321
|
+
const { swarm_record_outcome } = await import("./swarm-orchestrate");
|
|
322
|
+
|
|
323
|
+
// Spy on finalizeEvalRecord
|
|
324
|
+
const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
|
|
325
|
+
|
|
326
|
+
const testBeadId = "bd-test123.0";
|
|
327
|
+
|
|
328
|
+
// Call without epic_id or project_path
|
|
329
|
+
await swarm_record_outcome.execute({
|
|
330
|
+
bead_id: testBeadId,
|
|
331
|
+
duration_ms: 120000,
|
|
332
|
+
error_count: 0,
|
|
333
|
+
retry_count: 0,
|
|
334
|
+
success: true,
|
|
335
|
+
}, mockContext);
|
|
336
|
+
|
|
337
|
+
// Verify finalizeEvalRecord was NOT called
|
|
338
|
+
expect(finalizeEvalSpy).toHaveBeenCalledTimes(0);
|
|
339
|
+
|
|
340
|
+
finalizeEvalSpy.mockRestore();
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
test("includes finalized record in response when available", async () => {
|
|
344
|
+
const { swarm_record_outcome } = await import("./swarm-orchestrate");
|
|
345
|
+
|
|
346
|
+
// Mock finalizeEvalRecord to return a record
|
|
347
|
+
const mockFinalRecord = {
|
|
348
|
+
id: "bd-test123",
|
|
349
|
+
timestamp: new Date().toISOString(),
|
|
350
|
+
project_path: "/tmp/test-project",
|
|
351
|
+
task: "Test task",
|
|
352
|
+
strategy: "file-based" as const,
|
|
353
|
+
subtask_count: 2,
|
|
354
|
+
epic_title: "Test Epic",
|
|
355
|
+
subtasks: [],
|
|
356
|
+
overall_success: true,
|
|
357
|
+
total_duration_ms: 240000,
|
|
358
|
+
total_errors: 0,
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
const finalizeEvalSpy = spyOn(evalCapture, "finalizeEvalRecord");
|
|
362
|
+
finalizeEvalSpy.mockReturnValue(mockFinalRecord);
|
|
363
|
+
|
|
364
|
+
const testProjectPath = "/tmp/test-project";
|
|
365
|
+
const testEpicId = "bd-test123";
|
|
366
|
+
const testBeadId = `${testEpicId}.0`;
|
|
367
|
+
|
|
368
|
+
// Call with epic_id and project_path
|
|
369
|
+
const result = await swarm_record_outcome.execute({
|
|
370
|
+
bead_id: testBeadId,
|
|
371
|
+
duration_ms: 120000,
|
|
372
|
+
error_count: 0,
|
|
373
|
+
retry_count: 0,
|
|
374
|
+
success: true,
|
|
375
|
+
epic_id: testEpicId,
|
|
376
|
+
project_path: testProjectPath,
|
|
377
|
+
}, mockContext);
|
|
378
|
+
|
|
379
|
+
// Parse result and check for finalized record
|
|
380
|
+
const parsed = JSON.parse(result);
|
|
381
|
+
expect(parsed).toHaveProperty("finalized_eval_record");
|
|
382
|
+
expect(parsed.finalized_eval_record).toEqual(mockFinalRecord);
|
|
383
|
+
|
|
384
|
+
finalizeEvalSpy.mockRestore();
|
|
385
|
+
});
|
|
123
386
|
});
|