opencode-swarm-plugin 0.38.0 → 0.39.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +11 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +130 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +475 -0
- package/bin/swarm.ts +383 -0
- package/dist/compaction-hook.d.ts +1 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +81 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +370 -13
- package/dist/plugin.js +203 -13
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +589 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
- package/evals/scorers/coordinator-discipline.ts +13 -13
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +81 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +422 -0
- package/src/eval-capture.ts +94 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/index.ts +61 -1
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test for compaction event capture
|
|
3
|
+
*
|
|
4
|
+
* Verifies that captureCompactionEvent writes events to session JSONL
|
|
5
|
+
* and that all event types are captured with correct data.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, expect, it, afterAll } from "bun:test";
|
|
9
|
+
import { existsSync, unlinkSync } from "node:fs";
|
|
10
|
+
import {
|
|
11
|
+
captureCompactionEvent,
|
|
12
|
+
readSessionEvents,
|
|
13
|
+
getSessionPath,
|
|
14
|
+
} from "./eval-capture";
|
|
15
|
+
|
|
16
|
+
describe("Compaction Event Capture Integration", () => {
|
|
17
|
+
const testSessionId = `test-compaction-${Date.now()}`;
|
|
18
|
+
const sessionPath = getSessionPath(testSessionId);
|
|
19
|
+
|
|
20
|
+
afterAll(() => {
|
|
21
|
+
// Clean up test session file
|
|
22
|
+
if (existsSync(sessionPath)) {
|
|
23
|
+
unlinkSync(sessionPath);
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("captures detection_complete event with confidence and reasons", () => {
|
|
28
|
+
captureCompactionEvent({
|
|
29
|
+
session_id: testSessionId,
|
|
30
|
+
epic_id: "bd-test-123",
|
|
31
|
+
compaction_type: "detection_complete",
|
|
32
|
+
payload: {
|
|
33
|
+
confidence: "high",
|
|
34
|
+
detected: true,
|
|
35
|
+
reasons: ["3 cells in_progress", "2 open subtasks"],
|
|
36
|
+
session_scan_contributed: true,
|
|
37
|
+
session_scan_reasons: ["swarm tool calls found in session"],
|
|
38
|
+
epic_id: "bd-test-123",
|
|
39
|
+
epic_title: "Test Epic",
|
|
40
|
+
subtask_count: 5,
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Verify event was written to session file
|
|
45
|
+
expect(existsSync(sessionPath)).toBe(true);
|
|
46
|
+
|
|
47
|
+
// Read events from session
|
|
48
|
+
const events = readSessionEvents(testSessionId);
|
|
49
|
+
expect(events.length).toBe(1);
|
|
50
|
+
|
|
51
|
+
const event = events[0];
|
|
52
|
+
expect(event.session_id).toBe(testSessionId);
|
|
53
|
+
expect(event.epic_id).toBe("bd-test-123");
|
|
54
|
+
expect(event.event_type).toBe("COMPACTION");
|
|
55
|
+
expect(event.compaction_type).toBe("detection_complete");
|
|
56
|
+
|
|
57
|
+
// Verify payload structure
|
|
58
|
+
expect(event.payload.confidence).toBe("high");
|
|
59
|
+
expect(event.payload.detected).toBe(true);
|
|
60
|
+
expect(event.payload.reasons).toEqual(["3 cells in_progress", "2 open subtasks"]);
|
|
61
|
+
expect(event.payload.epic_id).toBe("bd-test-123");
|
|
62
|
+
expect(event.payload.epic_title).toBe("Test Epic");
|
|
63
|
+
expect(event.payload.subtask_count).toBe(5);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it("captures prompt_generated event with FULL prompt content", () => {
|
|
67
|
+
const fullPrompt = `
|
|
68
|
+
┌─────────────────────────────────────────┐
|
|
69
|
+
│ 🐝 YOU ARE THE COORDINATOR 🐝 │
|
|
70
|
+
└─────────────────────────────────────────┘
|
|
71
|
+
|
|
72
|
+
# Swarm Continuation
|
|
73
|
+
|
|
74
|
+
**NON-NEGOTIABLE: YOU ARE THE COORDINATOR.**
|
|
75
|
+
|
|
76
|
+
## Epic State
|
|
77
|
+
**ID:** bd-epic-456
|
|
78
|
+
**Title:** Refactor authentication
|
|
79
|
+
**Status:** 2/5 subtasks complete
|
|
80
|
+
|
|
81
|
+
## Next Actions
|
|
82
|
+
1. Check swarm_status(epic_id="bd-epic-456")
|
|
83
|
+
2. Review completed work
|
|
84
|
+
3. Spawn remaining subtasks
|
|
85
|
+
`.trim();
|
|
86
|
+
|
|
87
|
+
captureCompactionEvent({
|
|
88
|
+
session_id: testSessionId,
|
|
89
|
+
epic_id: "bd-epic-456",
|
|
90
|
+
compaction_type: "prompt_generated",
|
|
91
|
+
payload: {
|
|
92
|
+
prompt_length: fullPrompt.length,
|
|
93
|
+
full_prompt: fullPrompt, // FULL content, not truncated
|
|
94
|
+
context_type: "llm_generated",
|
|
95
|
+
duration_ms: 1234,
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
const events = readSessionEvents(testSessionId);
|
|
100
|
+
const promptEvent = events.find((e) => e.compaction_type === "prompt_generated");
|
|
101
|
+
|
|
102
|
+
expect(promptEvent).toBeDefined();
|
|
103
|
+
if (promptEvent) {
|
|
104
|
+
expect(promptEvent.payload.full_prompt).toBe(fullPrompt);
|
|
105
|
+
expect(promptEvent.payload.prompt_length).toBe(fullPrompt.length);
|
|
106
|
+
expect(promptEvent.payload.context_type).toBe("llm_generated");
|
|
107
|
+
expect(promptEvent.payload.duration_ms).toBe(1234);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("captures context_injected event with FULL content", () => {
|
|
112
|
+
const fullContent = `[Swarm compaction: LLM-generated, high confidence]
|
|
113
|
+
|
|
114
|
+
# 🐝 Swarm State
|
|
115
|
+
|
|
116
|
+
**Epic:** bd-epic-789 - Add user permissions
|
|
117
|
+
**Project:** /Users/test/project
|
|
118
|
+
|
|
119
|
+
**Subtasks:**
|
|
120
|
+
- 2 closed
|
|
121
|
+
- 1 in_progress
|
|
122
|
+
- 2 open
|
|
123
|
+
|
|
124
|
+
## COORDINATOR MANDATES
|
|
125
|
+
|
|
126
|
+
⛔ NEVER use edit/write directly - SPAWN A WORKER
|
|
127
|
+
✅ ALWAYS use swarm_spawn_subtask for implementation
|
|
128
|
+
✅ ALWAYS review with swarm_review
|
|
129
|
+
`;
|
|
130
|
+
|
|
131
|
+
captureCompactionEvent({
|
|
132
|
+
session_id: testSessionId,
|
|
133
|
+
epic_id: "bd-epic-789",
|
|
134
|
+
compaction_type: "context_injected",
|
|
135
|
+
payload: {
|
|
136
|
+
full_content: fullContent, // FULL content, not truncated
|
|
137
|
+
content_length: fullContent.length,
|
|
138
|
+
injection_method: "output.prompt",
|
|
139
|
+
context_type: "llm_generated",
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const events = readSessionEvents(testSessionId);
|
|
144
|
+
const injectEvent = events.find((e) => e.compaction_type === "context_injected");
|
|
145
|
+
|
|
146
|
+
expect(injectEvent).toBeDefined();
|
|
147
|
+
if (injectEvent) {
|
|
148
|
+
expect(injectEvent.payload.full_content).toBe(fullContent);
|
|
149
|
+
expect(injectEvent.payload.content_length).toBe(fullContent.length);
|
|
150
|
+
expect(injectEvent.payload.injection_method).toBe("output.prompt");
|
|
151
|
+
expect(injectEvent.payload.context_type).toBe("llm_generated");
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("captures all three event types in sequence", () => {
|
|
156
|
+
const sequenceSessionId = `test-sequence-${Date.now()}`;
|
|
157
|
+
const sequencePath = getSessionPath(sequenceSessionId);
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
// Simulate compaction lifecycle
|
|
161
|
+
|
|
162
|
+
// 1. Detection
|
|
163
|
+
captureCompactionEvent({
|
|
164
|
+
session_id: sequenceSessionId,
|
|
165
|
+
epic_id: "bd-seq-123",
|
|
166
|
+
compaction_type: "detection_complete",
|
|
167
|
+
payload: {
|
|
168
|
+
confidence: "medium",
|
|
169
|
+
detected: true,
|
|
170
|
+
reasons: ["1 unclosed epic"],
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// 2. Prompt generation
|
|
175
|
+
captureCompactionEvent({
|
|
176
|
+
session_id: sequenceSessionId,
|
|
177
|
+
epic_id: "bd-seq-123",
|
|
178
|
+
compaction_type: "prompt_generated",
|
|
179
|
+
payload: {
|
|
180
|
+
full_prompt: "Test prompt content",
|
|
181
|
+
prompt_length: 19,
|
|
182
|
+
},
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// 3. Context injection
|
|
186
|
+
captureCompactionEvent({
|
|
187
|
+
session_id: sequenceSessionId,
|
|
188
|
+
epic_id: "bd-seq-123",
|
|
189
|
+
compaction_type: "context_injected",
|
|
190
|
+
payload: {
|
|
191
|
+
full_content: "Test context content",
|
|
192
|
+
content_length: 20,
|
|
193
|
+
},
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Verify all three events captured
|
|
197
|
+
const events = readSessionEvents(sequenceSessionId);
|
|
198
|
+
expect(events.length).toBe(3);
|
|
199
|
+
|
|
200
|
+
const types = events.map((e) => e.compaction_type);
|
|
201
|
+
expect(types).toContain("detection_complete");
|
|
202
|
+
expect(types).toContain("prompt_generated");
|
|
203
|
+
expect(types).toContain("context_injected");
|
|
204
|
+
|
|
205
|
+
// Verify order (chronological by timestamp)
|
|
206
|
+
const timestamps = events.map((e) => new Date(e.timestamp).getTime());
|
|
207
|
+
expect(timestamps[0]).toBeLessThanOrEqual(timestamps[1]);
|
|
208
|
+
expect(timestamps[1]).toBeLessThanOrEqual(timestamps[2]);
|
|
209
|
+
} finally {
|
|
210
|
+
// Clean up
|
|
211
|
+
if (existsSync(sequencePath)) {
|
|
212
|
+
unlinkSync(sequencePath);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
it("validates event schema with Zod", () => {
|
|
218
|
+
// This should not throw - captureCompactionEvent validates internally
|
|
219
|
+
expect(() => {
|
|
220
|
+
captureCompactionEvent({
|
|
221
|
+
session_id: testSessionId,
|
|
222
|
+
epic_id: "bd-validate-123",
|
|
223
|
+
compaction_type: "detection_complete",
|
|
224
|
+
payload: { confidence: "high" },
|
|
225
|
+
});
|
|
226
|
+
}).not.toThrow();
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it("rejects invalid compaction_type", () => {
|
|
230
|
+
expect(() => {
|
|
231
|
+
captureCompactionEvent({
|
|
232
|
+
session_id: testSessionId,
|
|
233
|
+
epic_id: "bd-invalid-123",
|
|
234
|
+
// @ts-expect-error - intentionally invalid type
|
|
235
|
+
compaction_type: "invalid_type",
|
|
236
|
+
payload: {},
|
|
237
|
+
});
|
|
238
|
+
}).toThrow();
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it("handles empty epic_id gracefully", () => {
|
|
242
|
+
captureCompactionEvent({
|
|
243
|
+
session_id: testSessionId,
|
|
244
|
+
epic_id: "unknown",
|
|
245
|
+
compaction_type: "detection_complete",
|
|
246
|
+
payload: {
|
|
247
|
+
confidence: "none",
|
|
248
|
+
detected: false,
|
|
249
|
+
reasons: [],
|
|
250
|
+
},
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
const events = readSessionEvents(testSessionId);
|
|
254
|
+
const unknownEvent = events.find((e) => e.epic_id === "unknown");
|
|
255
|
+
expect(unknownEvent).toBeDefined();
|
|
256
|
+
});
|
|
257
|
+
});
|
|
@@ -85,6 +85,48 @@ describe("Compaction Hook", () => {
|
|
|
85
85
|
expect(SWARM_COMPACTION_CONTEXT).toContain("Blocked:");
|
|
86
86
|
expect(SWARM_COMPACTION_CONTEXT).toContain("Completed:");
|
|
87
87
|
});
|
|
88
|
+
|
|
89
|
+
// NEW: Full coordinator workflow must be present post-compaction
|
|
90
|
+
it("contains FULL coordinator workflow phases", () => {
|
|
91
|
+
// Phase 1.5: Research Phase
|
|
92
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_spawn_researcher");
|
|
93
|
+
|
|
94
|
+
// Phase 3: Decompose
|
|
95
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_select_strategy");
|
|
96
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_plan_prompt");
|
|
97
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_validate_decomposition");
|
|
98
|
+
|
|
99
|
+
// Phase 4: Create Cells
|
|
100
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("hive_create_epic");
|
|
101
|
+
|
|
102
|
+
// Phase 6: Spawn Workers
|
|
103
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_spawn_subtask");
|
|
104
|
+
|
|
105
|
+
// Phase 7: Review Loop
|
|
106
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_review");
|
|
107
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_review_feedback");
|
|
108
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("swarm_spawn_retry");
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("contains forbidden tools section with ALL forbidden tools", () => {
|
|
112
|
+
// Repository fetching
|
|
113
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("repo-crawl_file");
|
|
114
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("repo-autopsy");
|
|
115
|
+
|
|
116
|
+
// Web/documentation fetching
|
|
117
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("webfetch");
|
|
118
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("fetch_fetch");
|
|
119
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("context7");
|
|
120
|
+
|
|
121
|
+
// Knowledge base
|
|
122
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("pdf-brain");
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("contains strategy reference table", () => {
|
|
126
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("file-based");
|
|
127
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("feature-based");
|
|
128
|
+
expect(SWARM_COMPACTION_CONTEXT).toContain("risk-based");
|
|
129
|
+
});
|
|
88
130
|
});
|
|
89
131
|
|
|
90
132
|
describe("SWARM_DETECTION_FALLBACK", () => {
|
package/src/compaction-hook.ts
CHANGED
|
@@ -163,6 +163,87 @@ Extract from session context:
|
|
|
163
163
|
- **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
|
|
164
164
|
- **Close the loop** - When all subtasks done, verify and close the epic
|
|
165
165
|
|
|
166
|
+
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## 📋 FULL COORDINATOR WORKFLOW (Reference)
|
|
171
|
+
|
|
172
|
+
You are ALWAYS swarming. Here is the complete workflow for any new work:
|
|
173
|
+
|
|
174
|
+
### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
|
|
175
|
+
|
|
176
|
+
**If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
|
|
177
|
+
|
|
178
|
+
\`\`\`
|
|
179
|
+
swarm_spawn_researcher(
|
|
180
|
+
research_id="research-<topic>",
|
|
181
|
+
epic_id="<epic-id>",
|
|
182
|
+
tech_stack=["<technology>"],
|
|
183
|
+
project_path="<path>"
|
|
184
|
+
)
|
|
185
|
+
// Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
|
|
186
|
+
\`\`\`
|
|
187
|
+
|
|
188
|
+
### Phase 2: Knowledge Gathering
|
|
189
|
+
|
|
190
|
+
\`\`\`
|
|
191
|
+
semantic-memory_find(query="<task keywords>", limit=5) # Past learnings
|
|
192
|
+
cass_search(query="<task description>", limit=5) # Similar past tasks
|
|
193
|
+
skills_list() # Available skills
|
|
194
|
+
\`\`\`
|
|
195
|
+
|
|
196
|
+
### Phase 3: Decompose
|
|
197
|
+
|
|
198
|
+
\`\`\`
|
|
199
|
+
swarm_select_strategy(task="<task>")
|
|
200
|
+
swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
|
|
201
|
+
swarm_validate_decomposition(response="<CellTree JSON>")
|
|
202
|
+
\`\`\`
|
|
203
|
+
|
|
204
|
+
### Phase 4: Create Cells
|
|
205
|
+
|
|
206
|
+
\`hive_create_epic(epic_title="<task>", subtasks=[...])\`
|
|
207
|
+
|
|
208
|
+
### Phase 5: DO NOT Reserve Files
|
|
209
|
+
|
|
210
|
+
> **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
|
|
211
|
+
|
|
212
|
+
### Phase 6: Spawn Workers
|
|
213
|
+
|
|
214
|
+
\`\`\`
|
|
215
|
+
swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
|
|
216
|
+
Task(subagent_type="swarm/worker", prompt="<from above>")
|
|
217
|
+
\`\`\`
|
|
218
|
+
|
|
219
|
+
### Phase 7: MANDATORY Review Loop
|
|
220
|
+
|
|
221
|
+
**AFTER EVERY Task() RETURNS:**
|
|
222
|
+
|
|
223
|
+
1. \`swarmmail_inbox()\` - Check for messages
|
|
224
|
+
2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
|
|
225
|
+
3. Evaluate against epic goals
|
|
226
|
+
4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
|
|
227
|
+
|
|
228
|
+
**If needs_changes:**
|
|
229
|
+
\`\`\`
|
|
230
|
+
swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
|
|
231
|
+
// Spawn NEW worker with Task() using retry prompt
|
|
232
|
+
// Max 3 attempts before marking task blocked
|
|
233
|
+
\`\`\`
|
|
234
|
+
|
|
235
|
+
### Phase 8: Complete
|
|
236
|
+
|
|
237
|
+
\`hive_sync()\` - Sync all cells to git
|
|
238
|
+
|
|
239
|
+
## Strategy Reference
|
|
240
|
+
|
|
241
|
+
| Strategy | Best For | Keywords |
|
|
242
|
+
| -------------- | ------------------------ | -------------------------------------- |
|
|
243
|
+
| file-based | Refactoring, migrations | refactor, migrate, rename, update all |
|
|
244
|
+
| feature-based | New features | add, implement, build, create, feature |
|
|
245
|
+
| risk-based | Bug fixes, security | fix, bug, security, critical, urgent |
|
|
246
|
+
|
|
166
247
|
**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
|
|
167
248
|
`;
|
|
168
249
|
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for compaction prompt quality scorers
|
|
3
|
+
*
|
|
4
|
+
* TDD approach - tests written FIRST to define scorer behavior
|
|
5
|
+
* Tests the PURE scoring functions (not evalite wrappers)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, expect, test } from "bun:test";
|
|
9
|
+
import type { CompactionPrompt } from "./compaction-prompt-scoring.js";
|
|
10
|
+
import {
|
|
11
|
+
scoreActionability,
|
|
12
|
+
scoreCoordinatorIdentity,
|
|
13
|
+
scoreEpicIdSpecificity,
|
|
14
|
+
scoreForbiddenToolsPresent,
|
|
15
|
+
scorePostCompactionDiscipline,
|
|
16
|
+
} from "./compaction-prompt-scoring.js";
|
|
17
|
+
|
|
18
|
+
describe("epicIdSpecificity scorer", () => {
|
|
19
|
+
test("scores 1.0 for real epic IDs", () => {
|
|
20
|
+
const prompt: CompactionPrompt = {
|
|
21
|
+
content: "Continue coordinating epic mjkw81rkq4c",
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const result = scoreEpicIdSpecificity(prompt);
|
|
25
|
+
|
|
26
|
+
expect(result.score).toBe(1.0);
|
|
27
|
+
expect(result.message).toContain("real epic ID");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("scores 0.0 for placeholder IDs like <epic-id>", () => {
|
|
31
|
+
const prompt: CompactionPrompt = {
|
|
32
|
+
content: "Continue coordinating epic <epic-id>",
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const result = scoreEpicIdSpecificity(prompt);
|
|
36
|
+
|
|
37
|
+
expect(result.score).toBe(0.0);
|
|
38
|
+
expect(result.message).toContain("placeholder");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("scores 0.0 for bd-xxx placeholders", () => {
|
|
42
|
+
const prompt: CompactionPrompt = {
|
|
43
|
+
content: "Check status of bd-xxx",
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const result = scoreEpicIdSpecificity(prompt);
|
|
47
|
+
|
|
48
|
+
expect(result.score).toBe(0.0);
|
|
49
|
+
expect(result.message).toContain("placeholder");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("scores 0.0 for generic <path> placeholders", () => {
|
|
53
|
+
const prompt: CompactionPrompt = {
|
|
54
|
+
content: "Project at <path>",
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const result = scoreEpicIdSpecificity(prompt);
|
|
58
|
+
|
|
59
|
+
expect(result.score).toBe(0.0);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("scores 0.0 when no epic ID found", () => {
|
|
63
|
+
const prompt: CompactionPrompt = {
|
|
64
|
+
content: "Continue working on the task",
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const result = scoreEpicIdSpecificity(prompt);
|
|
68
|
+
|
|
69
|
+
expect(result.score).toBe(0.0);
|
|
70
|
+
expect(result.message).toContain("No epic ID");
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe("actionability scorer", () => {
|
|
75
|
+
test("scores 1.0 when swarm_status has real epic ID", () => {
|
|
76
|
+
const prompt: CompactionPrompt = {
|
|
77
|
+
content: `First action:
|
|
78
|
+
swarm_status(epic_id='mjkw81rkq4c', project_key='/path/to/project')`,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const result = scoreActionability(prompt);
|
|
82
|
+
|
|
83
|
+
expect(result.score).toBe(1.0);
|
|
84
|
+
expect(result.message).toContain("actionable tool call");
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("scores 1.0 when swarmmail_inbox is present", () => {
|
|
88
|
+
const prompt: CompactionPrompt = {
|
|
89
|
+
content: `Check messages:
|
|
90
|
+
swarmmail_inbox()`,
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const result = scoreActionability(prompt);
|
|
94
|
+
|
|
95
|
+
expect(result.score).toBe(1.0);
|
|
96
|
+
expect(result.message).toContain("actionable tool call");
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test("scores 0.0 for generic instructions without tool calls", () => {
|
|
100
|
+
const prompt: CompactionPrompt = {
|
|
101
|
+
content: "Check the status of workers and review progress",
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
const result = scoreActionability(prompt);
|
|
105
|
+
|
|
106
|
+
expect(result.score).toBe(0.0);
|
|
107
|
+
expect(result.message).toContain("No actionable");
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("scores 0.0 for swarm_status with placeholders", () => {
|
|
111
|
+
const prompt: CompactionPrompt = {
|
|
112
|
+
content: `swarm_status(epic_id='<epic-id>', project_key='<path>')`,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const result = scoreActionability(prompt);
|
|
116
|
+
|
|
117
|
+
expect(result.score).toBe(0.0);
|
|
118
|
+
expect(result.message).toContain("placeholder");
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe("coordinatorIdentity scorer", () => {
|
|
123
|
+
test("scores 1.0 with ASCII header and strong mandates", () => {
|
|
124
|
+
const prompt: CompactionPrompt = {
|
|
125
|
+
content: `┌─────────────────────────────────────────┐
|
|
126
|
+
│ YOU ARE THE COORDINATOR │
|
|
127
|
+
│ │
|
|
128
|
+
│ NEVER spawn workers yourself │
|
|
129
|
+
│ ALWAYS review worker output │
|
|
130
|
+
└─────────────────────────────────────────┘
|
|
131
|
+
|
|
132
|
+
Continue coordinating the swarm.`,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const result = scoreCoordinatorIdentity(prompt);
|
|
136
|
+
|
|
137
|
+
expect(result.score).toBe(1.0);
|
|
138
|
+
expect(result.message).toContain("ASCII header");
|
|
139
|
+
expect(result.message).toContain("strong mandates");
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
test("scores 0.5 with ASCII header but weak language", () => {
|
|
143
|
+
const prompt: CompactionPrompt = {
|
|
144
|
+
content: `┌─────────────────────────────────────────┐
|
|
145
|
+
│ COORDINATOR MODE │
|
|
146
|
+
└─────────────────────────────────────────┘
|
|
147
|
+
|
|
148
|
+
You should consider delegating work.`,
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const result = scoreCoordinatorIdentity(prompt);
|
|
152
|
+
|
|
153
|
+
expect(result.score).toBe(0.5);
|
|
154
|
+
expect(result.message).toContain("weak language");
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test("scores 0.0 without ASCII header", () => {
|
|
158
|
+
const prompt: CompactionPrompt = {
|
|
159
|
+
content: `You are the coordinator. NEVER do work directly. ALWAYS delegate.`,
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const result = scoreCoordinatorIdentity(prompt);
|
|
163
|
+
|
|
164
|
+
expect(result.score).toBe(0.0);
|
|
165
|
+
expect(result.message).toContain("No ASCII header");
|
|
166
|
+
});
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
describe("forbiddenToolsPresent scorer", () => {
|
|
170
|
+
test("scores 1.0 when all forbidden tools listed", () => {
|
|
171
|
+
const prompt: CompactionPrompt = {
|
|
172
|
+
content: `🚫 FORBIDDEN TOOLS - NEVER call these:
|
|
173
|
+
- Edit (use swarm_spawn_subtask)
|
|
174
|
+
- Write (use swarm_spawn_subtask)
|
|
175
|
+
- swarmmail_reserve (only workers reserve)
|
|
176
|
+
- bash with git commit (workers commit)`,
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const result = scoreForbiddenToolsPresent(prompt);
|
|
180
|
+
|
|
181
|
+
expect(result.score).toBe(1.0);
|
|
182
|
+
expect(result.message).toContain("All 4 forbidden tools");
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test("scores 0.75 when 3 out of 4 tools listed", () => {
|
|
186
|
+
const prompt: CompactionPrompt = {
|
|
187
|
+
content: `🚫 FORBIDDEN TOOLS:
|
|
188
|
+
- Edit
|
|
189
|
+
- Write
|
|
190
|
+
- swarmmail_reserve`,
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
const result = scoreForbiddenToolsPresent(prompt);
|
|
194
|
+
|
|
195
|
+
expect(result.score).toBe(0.75);
|
|
196
|
+
expect(result.message).toContain("3/4");
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
test("scores 0.5 when 2 out of 4 tools listed", () => {
|
|
200
|
+
const prompt: CompactionPrompt = {
|
|
201
|
+
content: `Don't use Edit or Write directly.`,
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
const result = scoreForbiddenToolsPresent(prompt);
|
|
205
|
+
|
|
206
|
+
expect(result.score).toBe(0.5);
|
|
207
|
+
expect(result.message).toContain("2/4");
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
test("scores 0.0 when no forbidden tools listed", () => {
|
|
211
|
+
const prompt: CompactionPrompt = {
|
|
212
|
+
content: "Continue coordinating the epic",
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
const result = scoreForbiddenToolsPresent(prompt);
|
|
216
|
+
|
|
217
|
+
expect(result.score).toBe(0.0);
|
|
218
|
+
expect(result.message).toContain("0/4");
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
describe("postCompactionDiscipline scorer", () => {
|
|
223
|
+
test("scores 1.0 when first tool is swarm_status", () => {
|
|
224
|
+
const prompt: CompactionPrompt = {
|
|
225
|
+
content: `Resume coordination:
|
|
226
|
+
|
|
227
|
+
1. swarm_status(epic_id='mjkw81rkq4c')
|
|
228
|
+
2. Check inbox
|
|
229
|
+
3. Review progress`,
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
233
|
+
|
|
234
|
+
expect(result.score).toBe(1.0);
|
|
235
|
+
expect(result.message).toContain("swarm_status");
|
|
236
|
+
expect(result.message).toContain("correct");
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test("scores 1.0 when first tool is swarmmail_inbox", () => {
|
|
240
|
+
const prompt: CompactionPrompt = {
|
|
241
|
+
content: `Next steps:
|
|
242
|
+
1. swarmmail_inbox()
|
|
243
|
+
2. Review messages`,
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
247
|
+
|
|
248
|
+
expect(result.score).toBe(1.0);
|
|
249
|
+
expect(result.message).toContain("inbox");
|
|
250
|
+
expect(result.message).toContain("correct");
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
test("scores 0.0 when first tool is Edit", () => {
|
|
254
|
+
const prompt: CompactionPrompt = {
|
|
255
|
+
content: `Resume:
|
|
256
|
+
1. Edit(file='src/auth.ts', ...)
|
|
257
|
+
2. Check status`,
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
261
|
+
|
|
262
|
+
expect(result.score).toBe(0.0);
|
|
263
|
+
expect(result.message).toContain("Edit");
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
test("scores 0.0 when first tool is Write", () => {
|
|
267
|
+
const prompt: CompactionPrompt = {
|
|
268
|
+
content: `1. Write(file='README.md', ...)`,
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
272
|
+
|
|
273
|
+
expect(result.score).toBe(0.0);
|
|
274
|
+
expect(result.message).toContain("Write");
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
test("scores 0.0 when first tool is Read", () => {
|
|
278
|
+
const prompt: CompactionPrompt = {
|
|
279
|
+
content: `1. Read(file='src/index.ts')
|
|
280
|
+
2. swarm_status()`,
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
284
|
+
|
|
285
|
+
expect(result.score).toBe(0.0);
|
|
286
|
+
expect(result.message).toContain("Read");
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
test("scores 0.0 when no tool calls mentioned", () => {
|
|
290
|
+
const prompt: CompactionPrompt = {
|
|
291
|
+
content: "Continue coordinating the epic",
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
const result = scorePostCompactionDiscipline(prompt);
|
|
295
|
+
|
|
296
|
+
expect(result.score).toBe(0.0);
|
|
297
|
+
expect(result.message).toContain("No tool");
|
|
298
|
+
});
|
|
299
|
+
});
|