opencode-swarm-plugin 0.44.0 → 0.44.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.ts +16 -10
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.js +7644 -62599
- package/dist/plugin.js +23766 -78721
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-review.d.ts.map +1 -1
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2286
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/cass-baseline.ts +0 -217
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2515
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/contributor-tools.test.ts +0 -133
- package/src/contributor-tools.ts +0 -201
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -940
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/sessions/agent-discovery.test.ts +0 -137
- package/src/sessions/agent-discovery.ts +0 -112
- package/src/sessions/index.ts +0 -15
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Test cases for compaction prompt quality evaluation
|
|
3
|
-
*
|
|
4
|
-
* Each case represents a continuation prompt that should be generated
|
|
5
|
-
* after context compaction. Tests validate that prompts have:
|
|
6
|
-
* - Real epic IDs (not placeholders)
|
|
7
|
-
* - Actionable tool calls with specific values
|
|
8
|
-
* - Strong coordinator identity
|
|
9
|
-
* - Explicit forbidden tools list
|
|
10
|
-
* - Correct first tool suggestion
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import type { CompactionPrompt } from "../../src/compaction-prompt-scoring.js";
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Compaction prompt test case structure
|
|
17
|
-
*/
|
|
18
|
-
export interface CompactionPromptTestCase {
|
|
19
|
-
name: string;
|
|
20
|
-
description: string;
|
|
21
|
-
/**
|
|
22
|
-
* The generated continuation prompt
|
|
23
|
-
*/
|
|
24
|
-
prompt: CompactionPrompt;
|
|
25
|
-
/**
|
|
26
|
-
* Expected scoring outcomes
|
|
27
|
-
*/
|
|
28
|
-
expected: {
|
|
29
|
-
/**
|
|
30
|
-
* Should have real epic IDs (not placeholders)
|
|
31
|
-
*/
|
|
32
|
-
hasRealEpicId: boolean;
|
|
33
|
-
/**
|
|
34
|
-
* Should have actionable tool calls
|
|
35
|
-
*/
|
|
36
|
-
isActionable: boolean;
|
|
37
|
-
/**
|
|
38
|
-
* Should have strong coordinator identity
|
|
39
|
-
*/
|
|
40
|
-
hasCoordinatorIdentity: boolean;
|
|
41
|
-
/**
|
|
42
|
-
* Should list forbidden tools by name
|
|
43
|
-
*/
|
|
44
|
-
listsForbiddenTools: boolean;
|
|
45
|
-
/**
|
|
46
|
-
* First suggested tool should be correct
|
|
47
|
-
*/
|
|
48
|
-
hasCorrectFirstTool: boolean;
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export const compactionPromptCases: CompactionPromptTestCase[] = [
|
|
53
|
-
// ============================================================================
|
|
54
|
-
// PERFECT PROMPT: All criteria met
|
|
55
|
-
// ============================================================================
|
|
56
|
-
{
|
|
57
|
-
name: "Perfect coordinator resumption prompt",
|
|
58
|
-
description:
|
|
59
|
-
"Ideal continuation prompt with all quality criteria met: real IDs, actionable tools, strong identity, forbidden list, correct first tool",
|
|
60
|
-
prompt: {
|
|
61
|
-
content: `
|
|
62
|
-
┌─────────────────────────────────────────────────────────────┐
|
|
63
|
-
│ 🐝 COORDINATOR RESUMPTION │
|
|
64
|
-
│ Context Compacted │
|
|
65
|
-
└─────────────────────────────────────────────────────────────┘
|
|
66
|
-
|
|
67
|
-
You are the COORDINATOR of swarm epic mjkweh2p4u5.
|
|
68
|
-
|
|
69
|
-
## IMMEDIATE ACTIONS (Do These FIRST)
|
|
70
|
-
|
|
71
|
-
1. swarm_status(epic_id="mjkweh2p4u5", project_key="/Users/joel/Code/myapp")
|
|
72
|
-
2. swarmmail_inbox(limit=5)
|
|
73
|
-
3. Review any completed work
|
|
74
|
-
|
|
75
|
-
## FORBIDDEN TOOLS (NEVER Use These)
|
|
76
|
-
|
|
77
|
-
Coordinators do NOT edit code directly. These tools are FORBIDDEN:
|
|
78
|
-
- edit
|
|
79
|
-
- write
|
|
80
|
-
- bash (for file modifications)
|
|
81
|
-
- swarmmail_reserve (only workers reserve)
|
|
82
|
-
- git commit (workers commit)
|
|
83
|
-
|
|
84
|
-
Use swarm_spawn_subtask to delegate work to workers.
|
|
85
|
-
|
|
86
|
-
## Your Role
|
|
87
|
-
|
|
88
|
-
You orchestrate. You do NOT implement. Spawn workers, monitor progress, unblock, ship.
|
|
89
|
-
|
|
90
|
-
ALWAYS spawn workers for file modifications.
|
|
91
|
-
NEVER edit files yourself.
|
|
92
|
-
NON-NEGOTIABLE: Check status and inbox before making decisions.
|
|
93
|
-
`,
|
|
94
|
-
},
|
|
95
|
-
expected: {
|
|
96
|
-
hasRealEpicId: true,
|
|
97
|
-
isActionable: true,
|
|
98
|
-
hasCoordinatorIdentity: true,
|
|
99
|
-
listsForbiddenTools: true,
|
|
100
|
-
hasCorrectFirstTool: true,
|
|
101
|
-
},
|
|
102
|
-
},
|
|
103
|
-
|
|
104
|
-
// ============================================================================
|
|
105
|
-
// BAD PROMPT: Placeholder epic ID
|
|
106
|
-
// ============================================================================
|
|
107
|
-
{
|
|
108
|
-
name: "Prompt with placeholder epic ID",
|
|
109
|
-
description:
|
|
110
|
-
"Contains placeholder <epic-id> instead of real ID - fails specificity check",
|
|
111
|
-
prompt: {
|
|
112
|
-
content: `
|
|
113
|
-
## Coordinator Resumption
|
|
114
|
-
|
|
115
|
-
You are coordinating epic <epic-id>.
|
|
116
|
-
|
|
117
|
-
Check the status with:
|
|
118
|
-
1. swarm_status(epic_id="<epic-id>", project_key="<path>")
|
|
119
|
-
2. swarmmail_inbox()
|
|
120
|
-
|
|
121
|
-
Continue orchestrating the swarm.
|
|
122
|
-
`,
|
|
123
|
-
},
|
|
124
|
-
expected: {
|
|
125
|
-
hasRealEpicId: false, // <epic-id> is a placeholder
|
|
126
|
-
isActionable: false, // Has placeholders in tool calls
|
|
127
|
-
hasCoordinatorIdentity: false, // No ASCII header or strong language
|
|
128
|
-
listsForbiddenTools: false, // Doesn't list forbidden tools
|
|
129
|
-
hasCorrectFirstTool: true, // First tool is swarm_status (correct)
|
|
130
|
-
},
|
|
131
|
-
},
|
|
132
|
-
|
|
133
|
-
// ============================================================================
|
|
134
|
-
// BAD PROMPT: Generic instructions, no actionable tools
|
|
135
|
-
// ============================================================================
|
|
136
|
-
{
|
|
137
|
-
name: "Generic instructions without specific tools",
|
|
138
|
-
description:
|
|
139
|
-
"Vague language like 'check status' without actual tool calls - fails actionability",
|
|
140
|
-
prompt: {
|
|
141
|
-
content: `
|
|
142
|
-
You were coordinating a swarm before compaction.
|
|
143
|
-
|
|
144
|
-
To resume:
|
|
145
|
-
- Check the status of workers
|
|
146
|
-
- Read your messages
|
|
147
|
-
- Continue where you left off
|
|
148
|
-
|
|
149
|
-
Remember, you're the coordinator. Keep the work moving forward.
|
|
150
|
-
`,
|
|
151
|
-
},
|
|
152
|
-
expected: {
|
|
153
|
-
hasRealEpicId: false, // No epic ID at all
|
|
154
|
-
isActionable: false, // No specific tool calls
|
|
155
|
-
hasCoordinatorIdentity: false, // No strong identity reinforcement
|
|
156
|
-
listsForbiddenTools: false, // No forbidden tools list
|
|
157
|
-
hasCorrectFirstTool: false, // No first tool specified
|
|
158
|
-
},
|
|
159
|
-
},
|
|
160
|
-
|
|
161
|
-
// ============================================================================
|
|
162
|
-
// BAD PROMPT: Weak coordinator identity
|
|
163
|
-
// ============================================================================
|
|
164
|
-
{
|
|
165
|
-
name: "Weak coordinator identity",
|
|
166
|
-
description:
|
|
167
|
-
"Has real ID and tools but lacks strong identity reinforcement - fails coordinator identity check",
|
|
168
|
-
prompt: {
|
|
169
|
-
content: `
|
|
170
|
-
## Swarm Resumption
|
|
171
|
-
|
|
172
|
-
Epic ID: mjkweh9x2a1
|
|
173
|
-
Project: /Users/joel/Code/myapp
|
|
174
|
-
|
|
175
|
-
You can check status with:
|
|
176
|
-
swarm_status(epic_id="mjkweh9x2a1", project_key="/Users/joel/Code/myapp")
|
|
177
|
-
|
|
178
|
-
And read messages:
|
|
179
|
-
swarmmail_inbox(limit=5)
|
|
180
|
-
|
|
181
|
-
Please continue coordinating.
|
|
182
|
-
`,
|
|
183
|
-
},
|
|
184
|
-
expected: {
|
|
185
|
-
hasRealEpicId: true, // Has real ID
|
|
186
|
-
isActionable: true, // Has specific tool calls
|
|
187
|
-
hasCoordinatorIdentity: false, // No ASCII header, no NEVER/ALWAYS/NON-NEGOTIABLE
|
|
188
|
-
listsForbiddenTools: false, // No forbidden tools list
|
|
189
|
-
hasCorrectFirstTool: true, // First tool is swarm_status
|
|
190
|
-
},
|
|
191
|
-
},
|
|
192
|
-
|
|
193
|
-
// ============================================================================
|
|
194
|
-
// BAD PROMPT: Missing forbidden tools list
|
|
195
|
-
// ============================================================================
|
|
196
|
-
{
|
|
197
|
-
name: "Missing forbidden tools list",
|
|
198
|
-
description:
|
|
199
|
-
"Good prompt but doesn't explicitly list forbidden tools - coordinators need this reminder",
|
|
200
|
-
prompt: {
|
|
201
|
-
content: `
|
|
202
|
-
┌─────────────────────────────────────────────────────────────┐
|
|
203
|
-
│ 🐝 COORDINATOR RESUMPTION │
|
|
204
|
-
└─────────────────────────────────────────────────────────────┘
|
|
205
|
-
|
|
206
|
-
You are the COORDINATOR of epic mjkweh3k8p2.
|
|
207
|
-
|
|
208
|
-
## IMMEDIATE ACTIONS
|
|
209
|
-
|
|
210
|
-
1. swarm_status(epic_id="mjkweh3k8p2", project_key="/Users/joel/Code/myapp")
|
|
211
|
-
2. swarmmail_inbox(limit=5)
|
|
212
|
-
|
|
213
|
-
## Your Role
|
|
214
|
-
|
|
215
|
-
ALWAYS delegate to workers.
|
|
216
|
-
NEVER edit files directly.
|
|
217
|
-
|
|
218
|
-
Coordinators orchestrate, workers implement.
|
|
219
|
-
`,
|
|
220
|
-
},
|
|
221
|
-
expected: {
|
|
222
|
-
hasRealEpicId: true,
|
|
223
|
-
isActionable: true,
|
|
224
|
-
hasCoordinatorIdentity: true, // Has ASCII + NEVER/ALWAYS
|
|
225
|
-
listsForbiddenTools: false, // Doesn't list "edit", "write", "bash" by name
|
|
226
|
-
hasCorrectFirstTool: true,
|
|
227
|
-
},
|
|
228
|
-
},
|
|
229
|
-
|
|
230
|
-
// ============================================================================
|
|
231
|
-
// BAD PROMPT: Wrong first tool (edit instead of swarm_status)
|
|
232
|
-
// ============================================================================
|
|
233
|
-
{
|
|
234
|
-
name: "Wrong first tool suggestion",
|
|
235
|
-
description:
|
|
236
|
-
"Suggests edit/write as first action - coordinator discipline failure",
|
|
237
|
-
prompt: {
|
|
238
|
-
content: `
|
|
239
|
-
┌─────────────────────────────────────────────────────────────┐
|
|
240
|
-
│ 🐝 COORDINATOR RESUMPTION │
|
|
241
|
-
└─────────────────────────────────────────────────────────────┘
|
|
242
|
-
|
|
243
|
-
You are the COORDINATOR of epic mjkweh7q9n4.
|
|
244
|
-
|
|
245
|
-
## IMMEDIATE ACTIONS
|
|
246
|
-
|
|
247
|
-
1. edit(filePath="/src/app.ts", oldString="...", newString="...")
|
|
248
|
-
2. swarm_status(epic_id="mjkweh7q9n4", project_key="/Users/joel/Code/myapp")
|
|
249
|
-
|
|
250
|
-
## FORBIDDEN TOOLS
|
|
251
|
-
- edit
|
|
252
|
-
- write
|
|
253
|
-
- bash (for file mods)
|
|
254
|
-
- swarmmail_reserve (only workers)
|
|
255
|
-
- git commit (workers only)
|
|
256
|
-
|
|
257
|
-
NEVER edit files yourself.
|
|
258
|
-
ALWAYS delegate to workers.
|
|
259
|
-
`,
|
|
260
|
-
},
|
|
261
|
-
expected: {
|
|
262
|
-
hasRealEpicId: true,
|
|
263
|
-
isActionable: true,
|
|
264
|
-
hasCoordinatorIdentity: true,
|
|
265
|
-
listsForbiddenTools: true,
|
|
266
|
-
hasCorrectFirstTool: false, // First tool is edit, should be swarm_status/inbox
|
|
267
|
-
},
|
|
268
|
-
},
|
|
269
|
-
|
|
270
|
-
// ============================================================================
|
|
271
|
-
// EDGE CASE: Multiple epics mentioned
|
|
272
|
-
// ============================================================================
|
|
273
|
-
{
|
|
274
|
-
name: "Multiple epic IDs in prompt",
|
|
275
|
-
description:
|
|
276
|
-
"Prompt references multiple epics - should still pass if at least one is real",
|
|
277
|
-
prompt: {
|
|
278
|
-
content: `
|
|
279
|
-
┌─────────────────────────────────────────────────────────────┐
|
|
280
|
-
│ 🐝 COORDINATOR RESUMPTION │
|
|
281
|
-
└─────────────────────────────────────────────────────────────┘
|
|
282
|
-
|
|
283
|
-
You are coordinating epics:
|
|
284
|
-
- mjkweh5t2x8 (in progress)
|
|
285
|
-
- mjkweh6u3y9 (blocked)
|
|
286
|
-
|
|
287
|
-
## IMMEDIATE ACTIONS
|
|
288
|
-
|
|
289
|
-
1. swarm_status(epic_id="mjkweh5t2x8", project_key="/Users/joel/Code/myapp")
|
|
290
|
-
2. swarmmail_inbox(limit=5)
|
|
291
|
-
|
|
292
|
-
## FORBIDDEN TOOLS
|
|
293
|
-
- edit
|
|
294
|
-
- write
|
|
295
|
-
- bash
|
|
296
|
-
- swarmmail_reserve
|
|
297
|
-
- git commit
|
|
298
|
-
|
|
299
|
-
ALWAYS check status first.
|
|
300
|
-
NEVER edit files directly.
|
|
301
|
-
`,
|
|
302
|
-
},
|
|
303
|
-
expected: {
|
|
304
|
-
hasRealEpicId: true, // Has real IDs
|
|
305
|
-
isActionable: true,
|
|
306
|
-
hasCoordinatorIdentity: true,
|
|
307
|
-
listsForbiddenTools: true,
|
|
308
|
-
hasCorrectFirstTool: true,
|
|
309
|
-
},
|
|
310
|
-
},
|
|
311
|
-
];
|
|
@@ -1,328 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Coordinator Session Test Fixtures
|
|
3
|
-
*
|
|
4
|
-
* Synthetic coordinator sessions for testing coordinator-discipline scorers.
|
|
5
|
-
* Each fixture demonstrates good or bad coordinator behavior.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { CoordinatorSession } from "../../src/eval-capture.js";
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* PERFECT COORDINATOR
|
|
12
|
-
*
|
|
13
|
-
* - No violations (no direct edits, tests, or reservations)
|
|
14
|
-
* - 100% spawn efficiency (3/3 workers spawned)
|
|
15
|
-
* - 100% review thoroughness (all workers reviewed)
|
|
16
|
-
* - Fast time to first spawn (30s)
|
|
17
|
-
*/
|
|
18
|
-
export const perfectCoordinator: CoordinatorSession = {
|
|
19
|
-
session_id: "test-session-perfect",
|
|
20
|
-
epic_id: "test-epic-perfect",
|
|
21
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
22
|
-
end_time: "2025-01-01T10:30:00.000Z",
|
|
23
|
-
events: [
|
|
24
|
-
// 1. Decomposition complete
|
|
25
|
-
{
|
|
26
|
-
session_id: "test-session-perfect",
|
|
27
|
-
epic_id: "test-epic-perfect",
|
|
28
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
29
|
-
event_type: "DECISION",
|
|
30
|
-
decision_type: "decomposition_complete",
|
|
31
|
-
payload: { subtask_count: 3 },
|
|
32
|
-
},
|
|
33
|
-
// 2. First spawn (30s after decomp)
|
|
34
|
-
{
|
|
35
|
-
session_id: "test-session-perfect",
|
|
36
|
-
epic_id: "test-epic-perfect",
|
|
37
|
-
timestamp: "2025-01-01T10:00:30.000Z",
|
|
38
|
-
event_type: "DECISION",
|
|
39
|
-
decision_type: "worker_spawned",
|
|
40
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-perfect.1" },
|
|
41
|
-
},
|
|
42
|
-
// 3. Second spawn
|
|
43
|
-
{
|
|
44
|
-
session_id: "test-session-perfect",
|
|
45
|
-
epic_id: "test-epic-perfect",
|
|
46
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
47
|
-
event_type: "DECISION",
|
|
48
|
-
decision_type: "worker_spawned",
|
|
49
|
-
payload: { worker: "GreenMountain", bead_id: "test-epic-perfect.2" },
|
|
50
|
-
},
|
|
51
|
-
// 4. Third spawn
|
|
52
|
-
{
|
|
53
|
-
session_id: "test-session-perfect",
|
|
54
|
-
epic_id: "test-epic-perfect",
|
|
55
|
-
timestamp: "2025-01-01T10:01:30.000Z",
|
|
56
|
-
event_type: "DECISION",
|
|
57
|
-
decision_type: "worker_spawned",
|
|
58
|
-
payload: { worker: "RedForest", bead_id: "test-epic-perfect.3" },
|
|
59
|
-
},
|
|
60
|
-
// 5. First worker completes
|
|
61
|
-
{
|
|
62
|
-
session_id: "test-session-perfect",
|
|
63
|
-
epic_id: "test-epic-perfect",
|
|
64
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
65
|
-
event_type: "OUTCOME",
|
|
66
|
-
outcome_type: "subtask_success",
|
|
67
|
-
payload: { bead_id: "test-epic-perfect.1", worker: "BlueLake" },
|
|
68
|
-
},
|
|
69
|
-
// 6. First review
|
|
70
|
-
{
|
|
71
|
-
session_id: "test-session-perfect",
|
|
72
|
-
epic_id: "test-epic-perfect",
|
|
73
|
-
timestamp: "2025-01-01T10:11:00.000Z",
|
|
74
|
-
event_type: "DECISION",
|
|
75
|
-
decision_type: "review_completed",
|
|
76
|
-
payload: {
|
|
77
|
-
bead_id: "test-epic-perfect.1",
|
|
78
|
-
approved: true,
|
|
79
|
-
issues: [],
|
|
80
|
-
},
|
|
81
|
-
},
|
|
82
|
-
// 7. Second worker completes
|
|
83
|
-
{
|
|
84
|
-
session_id: "test-session-perfect",
|
|
85
|
-
epic_id: "test-epic-perfect",
|
|
86
|
-
timestamp: "2025-01-01T10:15:00.000Z",
|
|
87
|
-
event_type: "OUTCOME",
|
|
88
|
-
outcome_type: "subtask_success",
|
|
89
|
-
payload: { bead_id: "test-epic-perfect.2", worker: "GreenMountain" },
|
|
90
|
-
},
|
|
91
|
-
// 8. Second review
|
|
92
|
-
{
|
|
93
|
-
session_id: "test-session-perfect",
|
|
94
|
-
epic_id: "test-epic-perfect",
|
|
95
|
-
timestamp: "2025-01-01T10:16:00.000Z",
|
|
96
|
-
event_type: "DECISION",
|
|
97
|
-
decision_type: "review_completed",
|
|
98
|
-
payload: {
|
|
99
|
-
bead_id: "test-epic-perfect.2",
|
|
100
|
-
approved: true,
|
|
101
|
-
issues: [],
|
|
102
|
-
},
|
|
103
|
-
},
|
|
104
|
-
// 9. Third worker completes
|
|
105
|
-
{
|
|
106
|
-
session_id: "test-session-perfect",
|
|
107
|
-
epic_id: "test-epic-perfect",
|
|
108
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
109
|
-
event_type: "OUTCOME",
|
|
110
|
-
outcome_type: "subtask_success",
|
|
111
|
-
payload: { bead_id: "test-epic-perfect.3", worker: "RedForest" },
|
|
112
|
-
},
|
|
113
|
-
// 10. Third review
|
|
114
|
-
{
|
|
115
|
-
session_id: "test-session-perfect",
|
|
116
|
-
epic_id: "test-epic-perfect",
|
|
117
|
-
timestamp: "2025-01-01T10:21:00.000Z",
|
|
118
|
-
event_type: "DECISION",
|
|
119
|
-
decision_type: "review_completed",
|
|
120
|
-
payload: {
|
|
121
|
-
bead_id: "test-epic-perfect.3",
|
|
122
|
-
approved: true,
|
|
123
|
-
issues: [],
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
// 11. Epic complete
|
|
127
|
-
{
|
|
128
|
-
session_id: "test-session-perfect",
|
|
129
|
-
epic_id: "test-epic-perfect",
|
|
130
|
-
timestamp: "2025-01-01T10:30:00.000Z",
|
|
131
|
-
event_type: "OUTCOME",
|
|
132
|
-
outcome_type: "epic_complete",
|
|
133
|
-
payload: { epic_id: "test-epic-perfect", total_subtasks: 3 },
|
|
134
|
-
},
|
|
135
|
-
],
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* BAD COORDINATOR - Multiple Violations
|
|
140
|
-
*
|
|
141
|
-
* - 3 violations (edited file, ran tests, reserved files)
|
|
142
|
-
* - 33% spawn efficiency (only 1/3 workers spawned)
|
|
143
|
-
* - 0% review thoroughness (no reviews)
|
|
144
|
-
* - Slow time to first spawn (10 minutes)
|
|
145
|
-
*/
|
|
146
|
-
export const badCoordinator: CoordinatorSession = {
|
|
147
|
-
session_id: "test-session-bad",
|
|
148
|
-
epic_id: "test-epic-bad",
|
|
149
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
150
|
-
end_time: "2025-01-01T11:00:00.000Z",
|
|
151
|
-
events: [
|
|
152
|
-
// 1. Decomposition complete
|
|
153
|
-
{
|
|
154
|
-
session_id: "test-session-bad",
|
|
155
|
-
epic_id: "test-epic-bad",
|
|
156
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
157
|
-
event_type: "DECISION",
|
|
158
|
-
decision_type: "decomposition_complete",
|
|
159
|
-
payload: { subtask_count: 3 },
|
|
160
|
-
},
|
|
161
|
-
// 2. VIOLATION: Coordinator edited file directly
|
|
162
|
-
{
|
|
163
|
-
session_id: "test-session-bad",
|
|
164
|
-
epic_id: "test-epic-bad",
|
|
165
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
166
|
-
event_type: "VIOLATION",
|
|
167
|
-
violation_type: "coordinator_edited_file",
|
|
168
|
-
payload: { file: "src/auth.ts", reason: "should spawn worker instead" },
|
|
169
|
-
},
|
|
170
|
-
// 3. VIOLATION: Coordinator ran tests
|
|
171
|
-
{
|
|
172
|
-
session_id: "test-session-bad",
|
|
173
|
-
epic_id: "test-epic-bad",
|
|
174
|
-
timestamp: "2025-01-01T10:02:00.000Z",
|
|
175
|
-
event_type: "VIOLATION",
|
|
176
|
-
violation_type: "coordinator_ran_tests",
|
|
177
|
-
payload: { command: "bun test", reason: "workers do verification" },
|
|
178
|
-
},
|
|
179
|
-
// 4. VIOLATION: Coordinator reserved files
|
|
180
|
-
{
|
|
181
|
-
session_id: "test-session-bad",
|
|
182
|
-
epic_id: "test-epic-bad",
|
|
183
|
-
timestamp: "2025-01-01T10:03:00.000Z",
|
|
184
|
-
event_type: "VIOLATION",
|
|
185
|
-
violation_type: "coordinator_reserved_files",
|
|
186
|
-
payload: { paths: ["src/**"], reason: "only workers reserve" },
|
|
187
|
-
},
|
|
188
|
-
// 5. First spawn (10 minutes after decomp - way too slow)
|
|
189
|
-
{
|
|
190
|
-
session_id: "test-session-bad",
|
|
191
|
-
epic_id: "test-epic-bad",
|
|
192
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
193
|
-
event_type: "DECISION",
|
|
194
|
-
decision_type: "worker_spawned",
|
|
195
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-bad.1" },
|
|
196
|
-
},
|
|
197
|
-
// 6. Worker completes (but no review!)
|
|
198
|
-
{
|
|
199
|
-
session_id: "test-session-bad",
|
|
200
|
-
epic_id: "test-epic-bad",
|
|
201
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
202
|
-
event_type: "OUTCOME",
|
|
203
|
-
outcome_type: "subtask_success",
|
|
204
|
-
payload: { bead_id: "test-epic-bad.1", worker: "BlueLake" },
|
|
205
|
-
},
|
|
206
|
-
// 7. VIOLATION: No worker spawned for subtask 2
|
|
207
|
-
{
|
|
208
|
-
session_id: "test-session-bad",
|
|
209
|
-
epic_id: "test-epic-bad",
|
|
210
|
-
timestamp: "2025-01-01T10:30:00.000Z",
|
|
211
|
-
event_type: "VIOLATION",
|
|
212
|
-
violation_type: "no_worker_spawned",
|
|
213
|
-
payload: { bead_id: "test-epic-bad.2", reason: "coordinator did work directly" },
|
|
214
|
-
},
|
|
215
|
-
// 8. VIOLATION: No worker spawned for subtask 3
|
|
216
|
-
{
|
|
217
|
-
session_id: "test-session-bad",
|
|
218
|
-
epic_id: "test-epic-bad",
|
|
219
|
-
timestamp: "2025-01-01T10:40:00.000Z",
|
|
220
|
-
event_type: "VIOLATION",
|
|
221
|
-
violation_type: "no_worker_spawned",
|
|
222
|
-
payload: { bead_id: "test-epic-bad.3", reason: "coordinator did work directly" },
|
|
223
|
-
},
|
|
224
|
-
],
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
/**
|
|
228
|
-
* DECENT COORDINATOR - Some Issues
|
|
229
|
-
*
|
|
230
|
-
* - 1 violation (ran tests once)
|
|
231
|
-
* - 100% spawn efficiency (2/2 workers spawned)
|
|
232
|
-
* - 50% review thoroughness (reviewed only 1/2)
|
|
233
|
-
* - Good time to first spawn (45s)
|
|
234
|
-
*/
|
|
235
|
-
export const decentCoordinator: CoordinatorSession = {
|
|
236
|
-
session_id: "test-session-decent",
|
|
237
|
-
epic_id: "test-epic-decent",
|
|
238
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
239
|
-
end_time: "2025-01-01T10:25:00.000Z",
|
|
240
|
-
events: [
|
|
241
|
-
// 1. Decomposition complete
|
|
242
|
-
{
|
|
243
|
-
session_id: "test-session-decent",
|
|
244
|
-
epic_id: "test-epic-decent",
|
|
245
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
246
|
-
event_type: "DECISION",
|
|
247
|
-
decision_type: "decomposition_complete",
|
|
248
|
-
payload: { subtask_count: 2 },
|
|
249
|
-
},
|
|
250
|
-
// 2. First spawn (45s - acceptable)
|
|
251
|
-
{
|
|
252
|
-
session_id: "test-session-decent",
|
|
253
|
-
epic_id: "test-epic-decent",
|
|
254
|
-
timestamp: "2025-01-01T10:00:45.000Z",
|
|
255
|
-
event_type: "DECISION",
|
|
256
|
-
decision_type: "worker_spawned",
|
|
257
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-decent.1" },
|
|
258
|
-
},
|
|
259
|
-
// 3. Second spawn
|
|
260
|
-
{
|
|
261
|
-
session_id: "test-session-decent",
|
|
262
|
-
epic_id: "test-epic-decent",
|
|
263
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
264
|
-
event_type: "DECISION",
|
|
265
|
-
decision_type: "worker_spawned",
|
|
266
|
-
payload: { worker: "GreenMountain", bead_id: "test-epic-decent.2" },
|
|
267
|
-
},
|
|
268
|
-
// 4. First worker completes
|
|
269
|
-
{
|
|
270
|
-
session_id: "test-session-decent",
|
|
271
|
-
epic_id: "test-epic-decent",
|
|
272
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
273
|
-
event_type: "OUTCOME",
|
|
274
|
-
outcome_type: "subtask_success",
|
|
275
|
-
payload: { bead_id: "test-epic-decent.1", worker: "BlueLake" },
|
|
276
|
-
},
|
|
277
|
-
// 5. First review
|
|
278
|
-
{
|
|
279
|
-
session_id: "test-session-decent",
|
|
280
|
-
epic_id: "test-epic-decent",
|
|
281
|
-
timestamp: "2025-01-01T10:11:00.000Z",
|
|
282
|
-
event_type: "DECISION",
|
|
283
|
-
decision_type: "review_completed",
|
|
284
|
-
payload: {
|
|
285
|
-
bead_id: "test-epic-decent.1",
|
|
286
|
-
approved: true,
|
|
287
|
-
issues: [],
|
|
288
|
-
},
|
|
289
|
-
},
|
|
290
|
-
// 6. VIOLATION: Ran tests (one slip-up)
|
|
291
|
-
{
|
|
292
|
-
session_id: "test-session-decent",
|
|
293
|
-
epic_id: "test-epic-decent",
|
|
294
|
-
timestamp: "2025-01-01T10:15:00.000Z",
|
|
295
|
-
event_type: "VIOLATION",
|
|
296
|
-
violation_type: "coordinator_ran_tests",
|
|
297
|
-
payload: { command: "bun test", reason: "should let worker verify" },
|
|
298
|
-
},
|
|
299
|
-
// 7. Second worker completes
|
|
300
|
-
{
|
|
301
|
-
session_id: "test-session-decent",
|
|
302
|
-
epic_id: "test-epic-decent",
|
|
303
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
304
|
-
event_type: "OUTCOME",
|
|
305
|
-
outcome_type: "subtask_success",
|
|
306
|
-
payload: { bead_id: "test-epic-decent.2", worker: "GreenMountain" },
|
|
307
|
-
},
|
|
308
|
-
// 8. No review for second worker (50% review rate)
|
|
309
|
-
// 9. Epic complete
|
|
310
|
-
{
|
|
311
|
-
session_id: "test-session-decent",
|
|
312
|
-
epic_id: "test-epic-decent",
|
|
313
|
-
timestamp: "2025-01-01T10:25:00.000Z",
|
|
314
|
-
event_type: "OUTCOME",
|
|
315
|
-
outcome_type: "epic_complete",
|
|
316
|
-
payload: { epic_id: "test-epic-decent", total_subtasks: 2 },
|
|
317
|
-
},
|
|
318
|
-
],
|
|
319
|
-
};
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* All test fixtures
|
|
323
|
-
*/
|
|
324
|
-
export const coordinatorSessionFixtures = [
|
|
325
|
-
perfectCoordinator,
|
|
326
|
-
badCoordinator,
|
|
327
|
-
decentCoordinator,
|
|
328
|
-
];
|