opencode-swarm-plugin 0.43.0 → 0.44.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cass.characterization.test.ts +422 -0
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.test.ts +68 -0
- package/bin/swarm.ts +81 -8
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/contributor-tools.d.ts +42 -0
- package/dist/contributor-tools.d.ts.map +1 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7728 -62590
- package/dist/plugin.js +23833 -78695
- package/dist/sessions/agent-discovery.d.ts +59 -0
- package/dist/sessions/agent-discovery.d.ts.map +1 -0
- package/dist/sessions/index.d.ts +10 -0
- package/dist/sessions/index.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-review.d.ts.map +1 -1
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2255
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2426
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -935
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
|
@@ -1,328 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Coordinator Session Test Fixtures
|
|
3
|
-
*
|
|
4
|
-
* Synthetic coordinator sessions for testing coordinator-discipline scorers.
|
|
5
|
-
* Each fixture demonstrates good or bad coordinator behavior.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { CoordinatorSession } from "../../src/eval-capture.js";
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* PERFECT COORDINATOR
|
|
12
|
-
*
|
|
13
|
-
* - No violations (no direct edits, tests, or reservations)
|
|
14
|
-
* - 100% spawn efficiency (3/3 workers spawned)
|
|
15
|
-
* - 100% review thoroughness (all workers reviewed)
|
|
16
|
-
* - Fast time to first spawn (30s)
|
|
17
|
-
*/
|
|
18
|
-
export const perfectCoordinator: CoordinatorSession = {
|
|
19
|
-
session_id: "test-session-perfect",
|
|
20
|
-
epic_id: "test-epic-perfect",
|
|
21
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
22
|
-
end_time: "2025-01-01T10:30:00.000Z",
|
|
23
|
-
events: [
|
|
24
|
-
// 1. Decomposition complete
|
|
25
|
-
{
|
|
26
|
-
session_id: "test-session-perfect",
|
|
27
|
-
epic_id: "test-epic-perfect",
|
|
28
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
29
|
-
event_type: "DECISION",
|
|
30
|
-
decision_type: "decomposition_complete",
|
|
31
|
-
payload: { subtask_count: 3 },
|
|
32
|
-
},
|
|
33
|
-
// 2. First spawn (30s after decomp)
|
|
34
|
-
{
|
|
35
|
-
session_id: "test-session-perfect",
|
|
36
|
-
epic_id: "test-epic-perfect",
|
|
37
|
-
timestamp: "2025-01-01T10:00:30.000Z",
|
|
38
|
-
event_type: "DECISION",
|
|
39
|
-
decision_type: "worker_spawned",
|
|
40
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-perfect.1" },
|
|
41
|
-
},
|
|
42
|
-
// 3. Second spawn
|
|
43
|
-
{
|
|
44
|
-
session_id: "test-session-perfect",
|
|
45
|
-
epic_id: "test-epic-perfect",
|
|
46
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
47
|
-
event_type: "DECISION",
|
|
48
|
-
decision_type: "worker_spawned",
|
|
49
|
-
payload: { worker: "GreenMountain", bead_id: "test-epic-perfect.2" },
|
|
50
|
-
},
|
|
51
|
-
// 4. Third spawn
|
|
52
|
-
{
|
|
53
|
-
session_id: "test-session-perfect",
|
|
54
|
-
epic_id: "test-epic-perfect",
|
|
55
|
-
timestamp: "2025-01-01T10:01:30.000Z",
|
|
56
|
-
event_type: "DECISION",
|
|
57
|
-
decision_type: "worker_spawned",
|
|
58
|
-
payload: { worker: "RedForest", bead_id: "test-epic-perfect.3" },
|
|
59
|
-
},
|
|
60
|
-
// 5. First worker completes
|
|
61
|
-
{
|
|
62
|
-
session_id: "test-session-perfect",
|
|
63
|
-
epic_id: "test-epic-perfect",
|
|
64
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
65
|
-
event_type: "OUTCOME",
|
|
66
|
-
outcome_type: "subtask_success",
|
|
67
|
-
payload: { bead_id: "test-epic-perfect.1", worker: "BlueLake" },
|
|
68
|
-
},
|
|
69
|
-
// 6. First review
|
|
70
|
-
{
|
|
71
|
-
session_id: "test-session-perfect",
|
|
72
|
-
epic_id: "test-epic-perfect",
|
|
73
|
-
timestamp: "2025-01-01T10:11:00.000Z",
|
|
74
|
-
event_type: "DECISION",
|
|
75
|
-
decision_type: "review_completed",
|
|
76
|
-
payload: {
|
|
77
|
-
bead_id: "test-epic-perfect.1",
|
|
78
|
-
approved: true,
|
|
79
|
-
issues: [],
|
|
80
|
-
},
|
|
81
|
-
},
|
|
82
|
-
// 7. Second worker completes
|
|
83
|
-
{
|
|
84
|
-
session_id: "test-session-perfect",
|
|
85
|
-
epic_id: "test-epic-perfect",
|
|
86
|
-
timestamp: "2025-01-01T10:15:00.000Z",
|
|
87
|
-
event_type: "OUTCOME",
|
|
88
|
-
outcome_type: "subtask_success",
|
|
89
|
-
payload: { bead_id: "test-epic-perfect.2", worker: "GreenMountain" },
|
|
90
|
-
},
|
|
91
|
-
// 8. Second review
|
|
92
|
-
{
|
|
93
|
-
session_id: "test-session-perfect",
|
|
94
|
-
epic_id: "test-epic-perfect",
|
|
95
|
-
timestamp: "2025-01-01T10:16:00.000Z",
|
|
96
|
-
event_type: "DECISION",
|
|
97
|
-
decision_type: "review_completed",
|
|
98
|
-
payload: {
|
|
99
|
-
bead_id: "test-epic-perfect.2",
|
|
100
|
-
approved: true,
|
|
101
|
-
issues: [],
|
|
102
|
-
},
|
|
103
|
-
},
|
|
104
|
-
// 9. Third worker completes
|
|
105
|
-
{
|
|
106
|
-
session_id: "test-session-perfect",
|
|
107
|
-
epic_id: "test-epic-perfect",
|
|
108
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
109
|
-
event_type: "OUTCOME",
|
|
110
|
-
outcome_type: "subtask_success",
|
|
111
|
-
payload: { bead_id: "test-epic-perfect.3", worker: "RedForest" },
|
|
112
|
-
},
|
|
113
|
-
// 10. Third review
|
|
114
|
-
{
|
|
115
|
-
session_id: "test-session-perfect",
|
|
116
|
-
epic_id: "test-epic-perfect",
|
|
117
|
-
timestamp: "2025-01-01T10:21:00.000Z",
|
|
118
|
-
event_type: "DECISION",
|
|
119
|
-
decision_type: "review_completed",
|
|
120
|
-
payload: {
|
|
121
|
-
bead_id: "test-epic-perfect.3",
|
|
122
|
-
approved: true,
|
|
123
|
-
issues: [],
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
// 11. Epic complete
|
|
127
|
-
{
|
|
128
|
-
session_id: "test-session-perfect",
|
|
129
|
-
epic_id: "test-epic-perfect",
|
|
130
|
-
timestamp: "2025-01-01T10:30:00.000Z",
|
|
131
|
-
event_type: "OUTCOME",
|
|
132
|
-
outcome_type: "epic_complete",
|
|
133
|
-
payload: { epic_id: "test-epic-perfect", total_subtasks: 3 },
|
|
134
|
-
},
|
|
135
|
-
],
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* BAD COORDINATOR - Multiple Violations
|
|
140
|
-
*
|
|
141
|
-
* - 3 violations (edited file, ran tests, reserved files)
|
|
142
|
-
* - 33% spawn efficiency (only 1/3 workers spawned)
|
|
143
|
-
* - 0% review thoroughness (no reviews)
|
|
144
|
-
* - Slow time to first spawn (10 minutes)
|
|
145
|
-
*/
|
|
146
|
-
export const badCoordinator: CoordinatorSession = {
|
|
147
|
-
session_id: "test-session-bad",
|
|
148
|
-
epic_id: "test-epic-bad",
|
|
149
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
150
|
-
end_time: "2025-01-01T11:00:00.000Z",
|
|
151
|
-
events: [
|
|
152
|
-
// 1. Decomposition complete
|
|
153
|
-
{
|
|
154
|
-
session_id: "test-session-bad",
|
|
155
|
-
epic_id: "test-epic-bad",
|
|
156
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
157
|
-
event_type: "DECISION",
|
|
158
|
-
decision_type: "decomposition_complete",
|
|
159
|
-
payload: { subtask_count: 3 },
|
|
160
|
-
},
|
|
161
|
-
// 2. VIOLATION: Coordinator edited file directly
|
|
162
|
-
{
|
|
163
|
-
session_id: "test-session-bad",
|
|
164
|
-
epic_id: "test-epic-bad",
|
|
165
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
166
|
-
event_type: "VIOLATION",
|
|
167
|
-
violation_type: "coordinator_edited_file",
|
|
168
|
-
payload: { file: "src/auth.ts", reason: "should spawn worker instead" },
|
|
169
|
-
},
|
|
170
|
-
// 3. VIOLATION: Coordinator ran tests
|
|
171
|
-
{
|
|
172
|
-
session_id: "test-session-bad",
|
|
173
|
-
epic_id: "test-epic-bad",
|
|
174
|
-
timestamp: "2025-01-01T10:02:00.000Z",
|
|
175
|
-
event_type: "VIOLATION",
|
|
176
|
-
violation_type: "coordinator_ran_tests",
|
|
177
|
-
payload: { command: "bun test", reason: "workers do verification" },
|
|
178
|
-
},
|
|
179
|
-
// 4. VIOLATION: Coordinator reserved files
|
|
180
|
-
{
|
|
181
|
-
session_id: "test-session-bad",
|
|
182
|
-
epic_id: "test-epic-bad",
|
|
183
|
-
timestamp: "2025-01-01T10:03:00.000Z",
|
|
184
|
-
event_type: "VIOLATION",
|
|
185
|
-
violation_type: "coordinator_reserved_files",
|
|
186
|
-
payload: { paths: ["src/**"], reason: "only workers reserve" },
|
|
187
|
-
},
|
|
188
|
-
// 5. First spawn (10 minutes after decomp - way too slow)
|
|
189
|
-
{
|
|
190
|
-
session_id: "test-session-bad",
|
|
191
|
-
epic_id: "test-epic-bad",
|
|
192
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
193
|
-
event_type: "DECISION",
|
|
194
|
-
decision_type: "worker_spawned",
|
|
195
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-bad.1" },
|
|
196
|
-
},
|
|
197
|
-
// 6. Worker completes (but no review!)
|
|
198
|
-
{
|
|
199
|
-
session_id: "test-session-bad",
|
|
200
|
-
epic_id: "test-epic-bad",
|
|
201
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
202
|
-
event_type: "OUTCOME",
|
|
203
|
-
outcome_type: "subtask_success",
|
|
204
|
-
payload: { bead_id: "test-epic-bad.1", worker: "BlueLake" },
|
|
205
|
-
},
|
|
206
|
-
// 7. VIOLATION: No worker spawned for subtask 2
|
|
207
|
-
{
|
|
208
|
-
session_id: "test-session-bad",
|
|
209
|
-
epic_id: "test-epic-bad",
|
|
210
|
-
timestamp: "2025-01-01T10:30:00.000Z",
|
|
211
|
-
event_type: "VIOLATION",
|
|
212
|
-
violation_type: "no_worker_spawned",
|
|
213
|
-
payload: { bead_id: "test-epic-bad.2", reason: "coordinator did work directly" },
|
|
214
|
-
},
|
|
215
|
-
// 8. VIOLATION: No worker spawned for subtask 3
|
|
216
|
-
{
|
|
217
|
-
session_id: "test-session-bad",
|
|
218
|
-
epic_id: "test-epic-bad",
|
|
219
|
-
timestamp: "2025-01-01T10:40:00.000Z",
|
|
220
|
-
event_type: "VIOLATION",
|
|
221
|
-
violation_type: "no_worker_spawned",
|
|
222
|
-
payload: { bead_id: "test-epic-bad.3", reason: "coordinator did work directly" },
|
|
223
|
-
},
|
|
224
|
-
],
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
/**
|
|
228
|
-
* DECENT COORDINATOR - Some Issues
|
|
229
|
-
*
|
|
230
|
-
* - 1 violation (ran tests once)
|
|
231
|
-
* - 100% spawn efficiency (2/2 workers spawned)
|
|
232
|
-
* - 50% review thoroughness (reviewed only 1/2)
|
|
233
|
-
* - Good time to first spawn (45s)
|
|
234
|
-
*/
|
|
235
|
-
export const decentCoordinator: CoordinatorSession = {
|
|
236
|
-
session_id: "test-session-decent",
|
|
237
|
-
epic_id: "test-epic-decent",
|
|
238
|
-
start_time: "2025-01-01T10:00:00.000Z",
|
|
239
|
-
end_time: "2025-01-01T10:25:00.000Z",
|
|
240
|
-
events: [
|
|
241
|
-
// 1. Decomposition complete
|
|
242
|
-
{
|
|
243
|
-
session_id: "test-session-decent",
|
|
244
|
-
epic_id: "test-epic-decent",
|
|
245
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
246
|
-
event_type: "DECISION",
|
|
247
|
-
decision_type: "decomposition_complete",
|
|
248
|
-
payload: { subtask_count: 2 },
|
|
249
|
-
},
|
|
250
|
-
// 2. First spawn (45s - acceptable)
|
|
251
|
-
{
|
|
252
|
-
session_id: "test-session-decent",
|
|
253
|
-
epic_id: "test-epic-decent",
|
|
254
|
-
timestamp: "2025-01-01T10:00:45.000Z",
|
|
255
|
-
event_type: "DECISION",
|
|
256
|
-
decision_type: "worker_spawned",
|
|
257
|
-
payload: { worker: "BlueLake", bead_id: "test-epic-decent.1" },
|
|
258
|
-
},
|
|
259
|
-
// 3. Second spawn
|
|
260
|
-
{
|
|
261
|
-
session_id: "test-session-decent",
|
|
262
|
-
epic_id: "test-epic-decent",
|
|
263
|
-
timestamp: "2025-01-01T10:01:00.000Z",
|
|
264
|
-
event_type: "DECISION",
|
|
265
|
-
decision_type: "worker_spawned",
|
|
266
|
-
payload: { worker: "GreenMountain", bead_id: "test-epic-decent.2" },
|
|
267
|
-
},
|
|
268
|
-
// 4. First worker completes
|
|
269
|
-
{
|
|
270
|
-
session_id: "test-session-decent",
|
|
271
|
-
epic_id: "test-epic-decent",
|
|
272
|
-
timestamp: "2025-01-01T10:10:00.000Z",
|
|
273
|
-
event_type: "OUTCOME",
|
|
274
|
-
outcome_type: "subtask_success",
|
|
275
|
-
payload: { bead_id: "test-epic-decent.1", worker: "BlueLake" },
|
|
276
|
-
},
|
|
277
|
-
// 5. First review
|
|
278
|
-
{
|
|
279
|
-
session_id: "test-session-decent",
|
|
280
|
-
epic_id: "test-epic-decent",
|
|
281
|
-
timestamp: "2025-01-01T10:11:00.000Z",
|
|
282
|
-
event_type: "DECISION",
|
|
283
|
-
decision_type: "review_completed",
|
|
284
|
-
payload: {
|
|
285
|
-
bead_id: "test-epic-decent.1",
|
|
286
|
-
approved: true,
|
|
287
|
-
issues: [],
|
|
288
|
-
},
|
|
289
|
-
},
|
|
290
|
-
// 6. VIOLATION: Ran tests (one slip-up)
|
|
291
|
-
{
|
|
292
|
-
session_id: "test-session-decent",
|
|
293
|
-
epic_id: "test-epic-decent",
|
|
294
|
-
timestamp: "2025-01-01T10:15:00.000Z",
|
|
295
|
-
event_type: "VIOLATION",
|
|
296
|
-
violation_type: "coordinator_ran_tests",
|
|
297
|
-
payload: { command: "bun test", reason: "should let worker verify" },
|
|
298
|
-
},
|
|
299
|
-
// 7. Second worker completes
|
|
300
|
-
{
|
|
301
|
-
session_id: "test-session-decent",
|
|
302
|
-
epic_id: "test-epic-decent",
|
|
303
|
-
timestamp: "2025-01-01T10:20:00.000Z",
|
|
304
|
-
event_type: "OUTCOME",
|
|
305
|
-
outcome_type: "subtask_success",
|
|
306
|
-
payload: { bead_id: "test-epic-decent.2", worker: "GreenMountain" },
|
|
307
|
-
},
|
|
308
|
-
// 8. No review for second worker (50% review rate)
|
|
309
|
-
// 9. Epic complete
|
|
310
|
-
{
|
|
311
|
-
session_id: "test-session-decent",
|
|
312
|
-
epic_id: "test-epic-decent",
|
|
313
|
-
timestamp: "2025-01-01T10:25:00.000Z",
|
|
314
|
-
event_type: "OUTCOME",
|
|
315
|
-
outcome_type: "epic_complete",
|
|
316
|
-
payload: { epic_id: "test-epic-decent", total_subtasks: 2 },
|
|
317
|
-
},
|
|
318
|
-
],
|
|
319
|
-
};
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* All test fixtures
|
|
323
|
-
*/
|
|
324
|
-
export const coordinatorSessionFixtures = [
|
|
325
|
-
perfectCoordinator,
|
|
326
|
-
badCoordinator,
|
|
327
|
-
decentCoordinator,
|
|
328
|
-
];
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Test cases for swarm task decomposition
|
|
3
|
-
*
|
|
4
|
-
* Each case includes:
|
|
5
|
-
* - input: task description and optional context
|
|
6
|
-
* - expected: validation criteria (min/max subtasks, required files)
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
export interface DecompositionTestCase {
|
|
10
|
-
input: {
|
|
11
|
-
task: string;
|
|
12
|
-
context?: string;
|
|
13
|
-
};
|
|
14
|
-
expected: {
|
|
15
|
-
minSubtasks: number;
|
|
16
|
-
maxSubtasks: number;
|
|
17
|
-
requiredFiles?: string[];
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export const decompositionCases: DecompositionTestCase[] = [
|
|
22
|
-
{
|
|
23
|
-
input: {
|
|
24
|
-
task: "Add user authentication with OAuth",
|
|
25
|
-
context: "Next.js App Router application with existing user model",
|
|
26
|
-
},
|
|
27
|
-
expected: {
|
|
28
|
-
minSubtasks: 3,
|
|
29
|
-
maxSubtasks: 6,
|
|
30
|
-
requiredFiles: [
|
|
31
|
-
"src/auth/oauth.ts",
|
|
32
|
-
"src/auth/middleware.ts",
|
|
33
|
-
"app/api/auth/[...nextauth]/route.ts",
|
|
34
|
-
],
|
|
35
|
-
},
|
|
36
|
-
},
|
|
37
|
-
{
|
|
38
|
-
input: {
|
|
39
|
-
task: "Implement rate limiting for API endpoints",
|
|
40
|
-
context: "Express.js API with Redis available",
|
|
41
|
-
},
|
|
42
|
-
expected: {
|
|
43
|
-
minSubtasks: 2,
|
|
44
|
-
maxSubtasks: 4,
|
|
45
|
-
requiredFiles: [
|
|
46
|
-
"src/middleware/rate-limit.ts",
|
|
47
|
-
"src/utils/redis-client.ts",
|
|
48
|
-
],
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
input: {
|
|
53
|
-
task: "Add TypeScript strict mode to legacy JavaScript project",
|
|
54
|
-
context: "Large codebase with 50+ JS files, currently untyped",
|
|
55
|
-
},
|
|
56
|
-
expected: {
|
|
57
|
-
minSubtasks: 4,
|
|
58
|
-
maxSubtasks: 8,
|
|
59
|
-
requiredFiles: ["tsconfig.json"],
|
|
60
|
-
},
|
|
61
|
-
},
|
|
62
|
-
{
|
|
63
|
-
input: {
|
|
64
|
-
task: "Create admin dashboard for user management",
|
|
65
|
-
context: "React app with existing component library and API client",
|
|
66
|
-
},
|
|
67
|
-
expected: {
|
|
68
|
-
minSubtasks: 4,
|
|
69
|
-
maxSubtasks: 7,
|
|
70
|
-
requiredFiles: [
|
|
71
|
-
"src/pages/admin/Dashboard.tsx",
|
|
72
|
-
"src/components/admin/UserTable.tsx",
|
|
73
|
-
"src/api/admin.ts",
|
|
74
|
-
],
|
|
75
|
-
},
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
input: {
|
|
79
|
-
task: "Fix memory leak in long-running background job",
|
|
80
|
-
context:
|
|
81
|
-
"Node.js worker that processes queue messages, memory grows over time",
|
|
82
|
-
},
|
|
83
|
-
expected: {
|
|
84
|
-
minSubtasks: 2,
|
|
85
|
-
maxSubtasks: 4,
|
|
86
|
-
requiredFiles: ["src/workers/queue-processor.ts"],
|
|
87
|
-
},
|
|
88
|
-
},
|
|
89
|
-
{
|
|
90
|
-
input: {
|
|
91
|
-
task: "Implement feature flag system with remote config",
|
|
92
|
-
context:
|
|
93
|
-
"Microservices architecture, need runtime toggles without deploys",
|
|
94
|
-
},
|
|
95
|
-
expected: {
|
|
96
|
-
minSubtasks: 3,
|
|
97
|
-
maxSubtasks: 6,
|
|
98
|
-
requiredFiles: [
|
|
99
|
-
"src/feature-flags/client.ts",
|
|
100
|
-
"src/feature-flags/middleware.ts",
|
|
101
|
-
"src/feature-flags/types.ts",
|
|
102
|
-
],
|
|
103
|
-
},
|
|
104
|
-
},
|
|
105
|
-
];
|
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Compaction Data Loader Tests
|
|
3
|
-
*
|
|
4
|
-
* Tests loading COMPACTION events from session JSONL files.
|
|
5
|
-
*/
|
|
6
|
-
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
7
|
-
import * as fs from "node:fs";
|
|
8
|
-
import * as os from "node:os";
|
|
9
|
-
import * as path from "node:path";
|
|
10
|
-
import type { CoordinatorEvent } from "../../src/eval-capture.js";
|
|
11
|
-
import {
|
|
12
|
-
loadCompactionEvents,
|
|
13
|
-
loadCompactionSessions,
|
|
14
|
-
} from "./compaction-loader.js";
|
|
15
|
-
|
|
16
|
-
// Test fixtures directory
|
|
17
|
-
const TEST_SESSION_DIR = path.join(
|
|
18
|
-
os.tmpdir(),
|
|
19
|
-
`test-sessions-${Date.now()}`,
|
|
20
|
-
);
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Create a test session JSONL file
|
|
24
|
-
*/
|
|
25
|
-
function createSessionFile(
|
|
26
|
-
sessionId: string,
|
|
27
|
-
events: CoordinatorEvent[],
|
|
28
|
-
): void {
|
|
29
|
-
const sessionPath = path.join(TEST_SESSION_DIR, `${sessionId}.jsonl`);
|
|
30
|
-
const lines = events.map((e) => JSON.stringify(e)).join("\n");
|
|
31
|
-
fs.writeFileSync(sessionPath, `${lines}\n`, "utf-8");
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
beforeAll(() => {
|
|
35
|
-
// Create test session directory
|
|
36
|
-
fs.mkdirSync(TEST_SESSION_DIR, { recursive: true });
|
|
37
|
-
|
|
38
|
-
// Create test session files with COMPACTION events
|
|
39
|
-
createSessionFile("session-1", [
|
|
40
|
-
{
|
|
41
|
-
session_id: "session-1",
|
|
42
|
-
epic_id: "epic-1",
|
|
43
|
-
timestamp: "2025-01-01T10:00:00.000Z",
|
|
44
|
-
event_type: "DECISION",
|
|
45
|
-
decision_type: "decomposition_complete",
|
|
46
|
-
payload: { subtask_count: 3 },
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
session_id: "session-1",
|
|
50
|
-
epic_id: "epic-1",
|
|
51
|
-
timestamp: "2025-01-01T10:05:00.000Z",
|
|
52
|
-
event_type: "COMPACTION",
|
|
53
|
-
compaction_type: "detection_complete",
|
|
54
|
-
payload: {
|
|
55
|
-
confidence: "high",
|
|
56
|
-
context_type: "full",
|
|
57
|
-
epic_id: "epic-1",
|
|
58
|
-
},
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
session_id: "session-1",
|
|
62
|
-
epic_id: "epic-1",
|
|
63
|
-
timestamp: "2025-01-01T10:06:00.000Z",
|
|
64
|
-
event_type: "COMPACTION",
|
|
65
|
-
compaction_type: "prompt_generated",
|
|
66
|
-
payload: {
|
|
67
|
-
prompt_length: 5000,
|
|
68
|
-
full_prompt: "You are a coordinator...",
|
|
69
|
-
context_type: "full",
|
|
70
|
-
},
|
|
71
|
-
},
|
|
72
|
-
]);
|
|
73
|
-
|
|
74
|
-
createSessionFile("session-2", [
|
|
75
|
-
{
|
|
76
|
-
session_id: "session-2",
|
|
77
|
-
epic_id: "epic-2",
|
|
78
|
-
timestamp: "2025-01-02T10:00:00.000Z",
|
|
79
|
-
event_type: "COMPACTION",
|
|
80
|
-
compaction_type: "context_injected",
|
|
81
|
-
payload: {
|
|
82
|
-
injection_point: "tool_call",
|
|
83
|
-
context_length: 3000,
|
|
84
|
-
},
|
|
85
|
-
},
|
|
86
|
-
{
|
|
87
|
-
session_id: "session-2",
|
|
88
|
-
epic_id: "epic-2",
|
|
89
|
-
timestamp: "2025-01-02T10:01:00.000Z",
|
|
90
|
-
event_type: "COMPACTION",
|
|
91
|
-
compaction_type: "resumption_started",
|
|
92
|
-
payload: {
|
|
93
|
-
epic_id: "epic-2",
|
|
94
|
-
resumption_type: "coordinator",
|
|
95
|
-
},
|
|
96
|
-
},
|
|
97
|
-
]);
|
|
98
|
-
|
|
99
|
-
// Session with no COMPACTION events
|
|
100
|
-
createSessionFile("session-3", [
|
|
101
|
-
{
|
|
102
|
-
session_id: "session-3",
|
|
103
|
-
epic_id: "epic-3",
|
|
104
|
-
timestamp: "2025-01-03T10:00:00.000Z",
|
|
105
|
-
event_type: "DECISION",
|
|
106
|
-
decision_type: "worker_spawned",
|
|
107
|
-
payload: { worker: "BlueLake", bead_id: "epic-3.1" },
|
|
108
|
-
},
|
|
109
|
-
]);
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
afterAll(() => {
|
|
113
|
-
// Clean up test session directory
|
|
114
|
-
if (fs.existsSync(TEST_SESSION_DIR)) {
|
|
115
|
-
fs.rmSync(TEST_SESSION_DIR, { recursive: true });
|
|
116
|
-
}
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
describe("loadCompactionEvents", () => {
|
|
120
|
-
test("loads all COMPACTION events from session directory", async () => {
|
|
121
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR);
|
|
122
|
-
|
|
123
|
-
expect(events.length).toBe(4);
|
|
124
|
-
expect(events.every((e) => e.event_type === "COMPACTION")).toBe(true);
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
test("filters by compaction_type", async () => {
|
|
128
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR, {
|
|
129
|
-
compaction_type: "detection_complete",
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
expect(events.length).toBe(1);
|
|
133
|
-
expect(events[0].compaction_type).toBe("detection_complete");
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
test("filters by session_ids", async () => {
|
|
137
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR, {
|
|
138
|
-
sessionIds: ["session-1"],
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
expect(events.length).toBe(2);
|
|
142
|
-
expect(events.every((e) => e.session_id === "session-1")).toBe(true);
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
test("applies limit", async () => {
|
|
146
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR, {
|
|
147
|
-
limit: 2,
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
expect(events.length).toBe(2);
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
test("combines filters", async () => {
|
|
154
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR, {
|
|
155
|
-
compaction_type: "prompt_generated",
|
|
156
|
-
sessionIds: ["session-1"],
|
|
157
|
-
limit: 1,
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
expect(events.length).toBe(1);
|
|
161
|
-
expect(events[0].compaction_type).toBe("prompt_generated");
|
|
162
|
-
expect(events[0].session_id).toBe("session-1");
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
test("returns empty array for non-existent directory", async () => {
|
|
166
|
-
const events = await loadCompactionEvents("/non/existent/path");
|
|
167
|
-
|
|
168
|
-
expect(events).toEqual([]);
|
|
169
|
-
});
|
|
170
|
-
|
|
171
|
-
test("skips invalid JSONL lines", async () => {
|
|
172
|
-
// Create session with invalid JSON
|
|
173
|
-
const invalidPath = path.join(TEST_SESSION_DIR, "session-invalid.jsonl");
|
|
174
|
-
fs.writeFileSync(
|
|
175
|
-
invalidPath,
|
|
176
|
-
'invalid json\n{"session_id": "session-valid", "event_type": "COMPACTION", "compaction_type": "detection_complete", "epic_id": "epic-4", "timestamp": "2025-01-04T10:00:00.000Z", "payload": {}}\n',
|
|
177
|
-
"utf-8",
|
|
178
|
-
);
|
|
179
|
-
|
|
180
|
-
const events = await loadCompactionEvents(TEST_SESSION_DIR);
|
|
181
|
-
|
|
182
|
-
// Should skip invalid line but include valid one
|
|
183
|
-
expect(events.some((e) => e.session_id === "session-valid")).toBe(true);
|
|
184
|
-
|
|
185
|
-
// Clean up
|
|
186
|
-
fs.unlinkSync(invalidPath);
|
|
187
|
-
});
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
describe("loadCompactionSessions", () => {
|
|
191
|
-
test("groups events by session_id", async () => {
|
|
192
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR);
|
|
193
|
-
|
|
194
|
-
expect(sessions.length).toBe(2); // session-1 and session-2 (session-3 has no COMPACTION events)
|
|
195
|
-
expect(sessions[0].session_id).toBeDefined();
|
|
196
|
-
expect(sessions[0].events.length).toBeGreaterThan(0);
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
test("includes session metadata", async () => {
|
|
200
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR);
|
|
201
|
-
|
|
202
|
-
const session1 = sessions.find((s) => s.session_id === "session-1");
|
|
203
|
-
expect(session1).toBeDefined();
|
|
204
|
-
if (session1) {
|
|
205
|
-
expect(session1.epic_id).toBe("epic-1");
|
|
206
|
-
expect(session1.start_time).toBeDefined();
|
|
207
|
-
expect(session1.end_time).toBeDefined();
|
|
208
|
-
}
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
test("filters by compaction_type", async () => {
|
|
212
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR, {
|
|
213
|
-
compaction_type: "detection_complete",
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
expect(sessions.length).toBe(1);
|
|
217
|
-
expect(sessions[0].session_id).toBe("session-1");
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
test("filters by session_ids", async () => {
|
|
221
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR, {
|
|
222
|
-
sessionIds: ["session-2"],
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
expect(sessions.length).toBe(1);
|
|
226
|
-
expect(sessions[0].session_id).toBe("session-2");
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
test("applies limit", async () => {
|
|
230
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR, {
|
|
231
|
-
limit: 1,
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
expect(sessions.length).toBe(1);
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
test("returns empty array for non-existent directory", async () => {
|
|
238
|
-
const sessions = await loadCompactionSessions("/non/existent/path");
|
|
239
|
-
|
|
240
|
-
expect(sessions).toEqual([]);
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
test("excludes sessions with no COMPACTION events", async () => {
|
|
244
|
-
const sessions = await loadCompactionSessions(TEST_SESSION_DIR);
|
|
245
|
-
|
|
246
|
-
expect(sessions.every((s) => s.session_id !== "session-3")).toBe(true);
|
|
247
|
-
});
|
|
248
|
-
});
|