opencode-swarm-plugin 0.44.0 → 0.44.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.ts +18 -12
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/hive.js +14834 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7743 -62593
- package/dist/plugin.js +24052 -78907
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-prompts.js +39407 -0
- package/dist/swarm-review.d.ts.map +1 -1
- package/dist/swarm-validation.d.ts +127 -0
- package/dist/swarm-validation.d.ts.map +1 -0
- package/dist/validators/index.d.ts +7 -0
- package/dist/validators/index.d.ts.map +1 -0
- package/dist/validators/schema-validator.d.ts +58 -0
- package/dist/validators/schema-validator.d.ts.map +1 -0
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2286
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/cass-baseline.ts +0 -217
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2515
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/contributor-tools.test.ts +0 -133
- package/src/contributor-tools.ts +0 -201
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -940
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/sessions/agent-discovery.test.ts +0 -137
- package/src/sessions/agent-discovery.ts +0 -112
- package/src/sessions/index.ts +0 -15
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Compaction Prompt Quality Evaluation
|
|
3
|
-
*
|
|
4
|
-
* Tests that continuation prompts generated after context compaction meet
|
|
5
|
-
* quality criteria for coordinator resumption:
|
|
6
|
-
*
|
|
7
|
-
* 1. Epic ID Specificity (20%) - Real IDs not placeholders
|
|
8
|
-
* 2. Actionability (20%) - Specific tool calls with real values
|
|
9
|
-
* 3. Coordinator Identity (25%) - ASCII header + strong mandates
|
|
10
|
-
* 4. Forbidden Tools (15%) - Lists forbidden tools by name
|
|
11
|
-
* 5. Post-Compaction Discipline (20%) - First tool is correct
|
|
12
|
-
*
|
|
13
|
-
* ## Why This Matters
|
|
14
|
-
*
|
|
15
|
-
* After compaction, coordinators lose context. The continuation prompt is
|
|
16
|
-
* their ONLY guide to resume. Bad prompts cause:
|
|
17
|
-
* - Coordinators editing files (should delegate to workers)
|
|
18
|
-
* - Generic "check status" instead of actual tool calls
|
|
19
|
-
* - Lost epic IDs (can't resume coordination)
|
|
20
|
-
*
|
|
21
|
-
* ## Test Strategy
|
|
22
|
-
*
|
|
23
|
-
* - 6 synthetic fixtures covering perfect/bad prompts
|
|
24
|
-
* - Each fixture tests specific failure modes
|
|
25
|
-
* - Composite scorer validates overall quality
|
|
26
|
-
*
|
|
27
|
-
* Run with: bun run eval:compaction
|
|
28
|
-
*/
|
|
29
|
-
|
|
30
|
-
import { evalite } from "evalite";
|
|
31
|
-
import { compactionPromptCases } from "./fixtures/compaction-prompt-cases.js";
|
|
32
|
-
import {
|
|
33
|
-
actionability,
|
|
34
|
-
coordinatorIdentity,
|
|
35
|
-
epicIdSpecificity,
|
|
36
|
-
forbiddenToolsPresent,
|
|
37
|
-
postCompactionDiscipline,
|
|
38
|
-
} from "./scorers/compaction-prompt-scorers.js";
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Main eval: Compaction Prompt Quality
|
|
42
|
-
*
|
|
43
|
-
* Tests all cases from fixtures/compaction-prompt-cases.ts
|
|
44
|
-
*/
|
|
45
|
-
evalite("Compaction Prompt Quality", {
|
|
46
|
-
data: async () =>
|
|
47
|
-
compactionPromptCases.map((testCase) => ({
|
|
48
|
-
input: testCase.prompt,
|
|
49
|
-
expected: testCase.expected,
|
|
50
|
-
})),
|
|
51
|
-
|
|
52
|
-
task: async (input) => {
|
|
53
|
-
// Identity task - fixture already has the prompt
|
|
54
|
-
// In real usage, this would call the LLM to generate the prompt
|
|
55
|
-
return JSON.stringify(input);
|
|
56
|
-
},
|
|
57
|
-
|
|
58
|
-
scorers: [
|
|
59
|
-
epicIdSpecificity,
|
|
60
|
-
actionability,
|
|
61
|
-
coordinatorIdentity,
|
|
62
|
-
forbiddenToolsPresent,
|
|
63
|
-
postCompactionDiscipline,
|
|
64
|
-
],
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Perfect Prompt Verification
|
|
69
|
-
*
|
|
70
|
-
* Ensures our "perfect" fixture actually scores 100%
|
|
71
|
-
*/
|
|
72
|
-
evalite("Perfect Prompt Scores 100%", {
|
|
73
|
-
data: async () => [
|
|
74
|
-
{
|
|
75
|
-
input: compactionPromptCases[0].prompt, // First case is "perfect"
|
|
76
|
-
expected: {
|
|
77
|
-
hasRealEpicId: true,
|
|
78
|
-
isActionable: true,
|
|
79
|
-
hasCoordinatorIdentity: true,
|
|
80
|
-
listsForbiddenTools: true,
|
|
81
|
-
hasCorrectFirstTool: true,
|
|
82
|
-
},
|
|
83
|
-
},
|
|
84
|
-
],
|
|
85
|
-
|
|
86
|
-
task: async (input) => JSON.stringify(input),
|
|
87
|
-
|
|
88
|
-
scorers: [
|
|
89
|
-
epicIdSpecificity,
|
|
90
|
-
actionability,
|
|
91
|
-
coordinatorIdentity,
|
|
92
|
-
forbiddenToolsPresent,
|
|
93
|
-
postCompactionDiscipline,
|
|
94
|
-
],
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Placeholder Detection
|
|
99
|
-
*
|
|
100
|
-
* Ensures we catch common placeholder patterns
|
|
101
|
-
*/
|
|
102
|
-
evalite("Placeholder Detection", {
|
|
103
|
-
data: async () => [
|
|
104
|
-
{
|
|
105
|
-
input: compactionPromptCases[1].prompt, // Placeholder case
|
|
106
|
-
expected: { hasRealEpicId: false },
|
|
107
|
-
},
|
|
108
|
-
],
|
|
109
|
-
|
|
110
|
-
task: async (input) => JSON.stringify(input),
|
|
111
|
-
|
|
112
|
-
scorers: [epicIdSpecificity],
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* Generic Instructions Detection
|
|
117
|
-
*
|
|
118
|
-
* Ensures we fail prompts with vague language instead of tool calls
|
|
119
|
-
*/
|
|
120
|
-
evalite("Generic Instructions Fail", {
|
|
121
|
-
data: async () => [
|
|
122
|
-
{
|
|
123
|
-
input: compactionPromptCases[2].prompt, // Generic case
|
|
124
|
-
expected: { isActionable: false },
|
|
125
|
-
},
|
|
126
|
-
],
|
|
127
|
-
|
|
128
|
-
task: async (input) => JSON.stringify(input),
|
|
129
|
-
|
|
130
|
-
scorers: [actionability],
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* First Tool Discipline
|
|
135
|
-
*
|
|
136
|
-
* Ensures first suggested tool is correct (swarm_status/inbox, not edit)
|
|
137
|
-
*/
|
|
138
|
-
evalite("First Tool Discipline", {
|
|
139
|
-
data: async () => [
|
|
140
|
-
{
|
|
141
|
-
input: compactionPromptCases[5].prompt, // Wrong first tool
|
|
142
|
-
expected: { hasCorrectFirstTool: false },
|
|
143
|
-
},
|
|
144
|
-
],
|
|
145
|
-
|
|
146
|
-
task: async (input) => JSON.stringify(input),
|
|
147
|
-
|
|
148
|
-
scorers: [postCompactionDiscipline],
|
|
149
|
-
});
|
|
@@ -1,289 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Compaction Hook Coordinator Resumption Eval
|
|
3
|
-
*
|
|
4
|
-
* Tests that the compaction hook correctly detects swarm state and injects
|
|
5
|
-
* appropriate context for coordinator resumption.
|
|
6
|
-
*
|
|
7
|
-
* ## Bug Being Tested
|
|
8
|
-
*
|
|
9
|
-
* Root cause: The compaction hook injects generic "you are a coordinator"
|
|
10
|
-
* context but doesn't include the SPECIFIC epic ID, subtask status, or
|
|
11
|
-
* project path. This causes coordinators to lose identity after compaction.
|
|
12
|
-
*
|
|
13
|
-
* ## Test Cases
|
|
14
|
-
*
|
|
15
|
-
* 1. Active swarm with in_progress epic - should inject full context with epic ID
|
|
16
|
-
* 2. Multiple epics - should identify the in_progress one
|
|
17
|
-
* 3. No active swarm - should not inject coordinator context
|
|
18
|
-
* 4. Blocked epic - should still detect as active swarm
|
|
19
|
-
*
|
|
20
|
-
* Run with: pnpm eval:dev (watch mode) or pnpm eval:run (once)
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
import { evalite } from "evalite";
|
|
24
|
-
import type { Cell } from "swarm-mail";
|
|
25
|
-
import { compactionCases } from "./fixtures/compaction-cases.js";
|
|
26
|
-
import type { CompactionResult } from "./scorers/compaction-scorers.js";
|
|
27
|
-
import {
|
|
28
|
-
compactionQuality,
|
|
29
|
-
confidenceAccuracy,
|
|
30
|
-
contextInjectionCorrectness,
|
|
31
|
-
forbiddenPatternsAbsent,
|
|
32
|
-
requiredPatternsPresent,
|
|
33
|
-
} from "./scorers/index.js";
|
|
34
|
-
|
|
35
|
-
// Copy context constants from compaction-hook.ts to avoid import issues
|
|
36
|
-
const SWARM_COMPACTION_CONTEXT = `## 🐝 SWARM ACTIVE - Keep Cooking
|
|
37
|
-
|
|
38
|
-
You are the **COORDINATOR** of an active swarm. Context was compacted but the swarm is still running.
|
|
39
|
-
|
|
40
|
-
**YOUR JOB:** Keep orchestrating. Spawn agents. Monitor progress. Unblock work. Ship it.
|
|
41
|
-
|
|
42
|
-
### On Resume - IMMEDIATELY
|
|
43
|
-
|
|
44
|
-
1. \`swarm_status(epic_id="<epic>", project_key="<path>")\` - Get current state
|
|
45
|
-
2. \`swarmmail_inbox(limit=5)\` - Check for agent messages
|
|
46
|
-
3. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Review any completed work
|
|
47
|
-
4. **Spawn ready subtasks** - Don't wait, fire them off
|
|
48
|
-
|
|
49
|
-
### Keep the Swarm Cooking
|
|
50
|
-
|
|
51
|
-
- **Spawn aggressively** - If a subtask is ready and unblocked, spawn an agent
|
|
52
|
-
- **Monitor actively** - Check status, read messages, respond to blockers
|
|
53
|
-
- **Close the loop** - When all subtasks done, verify and close the epic
|
|
54
|
-
|
|
55
|
-
**You are not waiting for instructions. You are the coordinator. Coordinate.**
|
|
56
|
-
`;
|
|
57
|
-
|
|
58
|
-
const SWARM_DETECTION_FALLBACK = `## 🐝 Swarm Detection - Check Your Context
|
|
59
|
-
|
|
60
|
-
**IMPORTANT:** Before summarizing, check if this session involves an active swarm.
|
|
61
|
-
|
|
62
|
-
Look for ANY of these patterns in the conversation:
|
|
63
|
-
|
|
64
|
-
### Tool Calls (definite swarm sign)
|
|
65
|
-
- \`swarm_decompose\`, \`swarm_spawn_subtask\`, \`swarm_status\`, \`swarm_complete\`
|
|
66
|
-
- \`swarmmail_init\`, \`swarmmail_reserve\`, \`swarmmail_send\`
|
|
67
|
-
- \`hive_create_epic\`, \`hive_start\`, \`hive_close\`
|
|
68
|
-
|
|
69
|
-
### If You Find Swarm Evidence
|
|
70
|
-
|
|
71
|
-
Include this in your summary and tell the resumed session:
|
|
72
|
-
"This is an active swarm. Check swarm_status and swarmmail_inbox immediately."
|
|
73
|
-
`;
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Simulate compaction hook execution with given hive state
|
|
77
|
-
*
|
|
78
|
-
* Simplified version that simulates detection logic without running full hook.
|
|
79
|
-
* This tests the CONTEXT CONTENT itself, not the detection logic.
|
|
80
|
-
*/
|
|
81
|
-
async function runCompactionHook(testCase: {
|
|
82
|
-
hiveCells: Array<Omit<Cell, "created_at" | "updated_at" | "closed_at">>;
|
|
83
|
-
swarmMailState: {
|
|
84
|
-
agents: number;
|
|
85
|
-
reservations: number;
|
|
86
|
-
messages: number;
|
|
87
|
-
};
|
|
88
|
-
}): Promise<CompactionResult> {
|
|
89
|
-
// Simulate detection logic based on test case state
|
|
90
|
-
const hasInProgressCells = testCase.hiveCells.some(
|
|
91
|
-
(c) => c.status === "in_progress",
|
|
92
|
-
);
|
|
93
|
-
const hasReservations = testCase.swarmMailState.reservations > 0;
|
|
94
|
-
const hasOpenSubtasks = testCase.hiveCells.some(
|
|
95
|
-
(c) => c.status === "open" && c.parent_id,
|
|
96
|
-
);
|
|
97
|
-
const hasOpenEpics = testCase.hiveCells.some(
|
|
98
|
-
(c) => c.type === "epic" && c.status !== "closed",
|
|
99
|
-
);
|
|
100
|
-
const hasCells = testCase.hiveCells.length > 0;
|
|
101
|
-
|
|
102
|
-
// Determine confidence based on signals
|
|
103
|
-
let confidence: "high" | "medium" | "low" | "none" = "none";
|
|
104
|
-
let contextType: "full" | "fallback" | "none" = "none";
|
|
105
|
-
let injectedContext = "";
|
|
106
|
-
|
|
107
|
-
if (hasInProgressCells || hasReservations) {
|
|
108
|
-
confidence = "high";
|
|
109
|
-
contextType = "full";
|
|
110
|
-
injectedContext = `[Swarm detected: ${hasInProgressCells ? "cells in_progress" : ""}, ${hasReservations ? "active reservations" : ""}]\n\n${SWARM_COMPACTION_CONTEXT}`;
|
|
111
|
-
} else if (hasOpenSubtasks || hasOpenEpics) {
|
|
112
|
-
confidence = "medium";
|
|
113
|
-
contextType = "full";
|
|
114
|
-
injectedContext = `[Swarm detected: ${hasOpenSubtasks ? "open subtasks" : "unclosed epic"}]\n\n${SWARM_COMPACTION_CONTEXT}`;
|
|
115
|
-
} else if (hasCells) {
|
|
116
|
-
confidence = "low";
|
|
117
|
-
contextType = "fallback";
|
|
118
|
-
injectedContext = `[Possible swarm: cells exist]\n\n${SWARM_DETECTION_FALLBACK}`;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
return {
|
|
122
|
-
detected: confidence !== "none",
|
|
123
|
-
confidence,
|
|
124
|
-
contextInjected: contextType !== "none",
|
|
125
|
-
contextType,
|
|
126
|
-
injectedContext,
|
|
127
|
-
};
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* Main eval: Compaction Hook Coordinator Resumption
|
|
132
|
-
*
|
|
133
|
-
* Tests all cases from fixtures/compaction-cases.ts
|
|
134
|
-
*/
|
|
135
|
-
evalite("Compaction Hook Coordinator Resumption", {
|
|
136
|
-
data: async () =>
|
|
137
|
-
compactionCases.map((testCase) => ({
|
|
138
|
-
input: testCase,
|
|
139
|
-
expected: testCase.expected,
|
|
140
|
-
})),
|
|
141
|
-
|
|
142
|
-
task: async (input) => {
|
|
143
|
-
const result = await runCompactionHook({
|
|
144
|
-
hiveCells: input.hiveCells,
|
|
145
|
-
swarmMailState: input.swarmMailState,
|
|
146
|
-
});
|
|
147
|
-
|
|
148
|
-
// Return as JSON string for scorers
|
|
149
|
-
return JSON.stringify(result);
|
|
150
|
-
},
|
|
151
|
-
|
|
152
|
-
scorers: [
|
|
153
|
-
confidenceAccuracy,
|
|
154
|
-
contextInjectionCorrectness,
|
|
155
|
-
requiredPatternsPresent,
|
|
156
|
-
forbiddenPatternsAbsent,
|
|
157
|
-
compactionQuality,
|
|
158
|
-
],
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
/**
|
|
162
|
-
* Edge Case Eval: Epic ID Specificity
|
|
163
|
-
*
|
|
164
|
-
* Ensures injected context includes SPECIFIC epic IDs, not placeholders
|
|
165
|
-
*/
|
|
166
|
-
evalite("Epic ID Specificity", {
|
|
167
|
-
data: async () => [
|
|
168
|
-
{
|
|
169
|
-
input: {
|
|
170
|
-
name: "Epic ID must be specific, not placeholder",
|
|
171
|
-
hiveCells: [
|
|
172
|
-
{
|
|
173
|
-
id: "my-app-lf2p4u-epic999",
|
|
174
|
-
project_key: "/my/app",
|
|
175
|
-
type: "epic" as const,
|
|
176
|
-
status: "in_progress" as const,
|
|
177
|
-
title: "Implement feature X",
|
|
178
|
-
description: "Description here",
|
|
179
|
-
priority: 2,
|
|
180
|
-
parent_id: null,
|
|
181
|
-
assignee: "coordinator",
|
|
182
|
-
closed_reason: null,
|
|
183
|
-
deleted_at: null,
|
|
184
|
-
deleted_by: null,
|
|
185
|
-
delete_reason: null,
|
|
186
|
-
created_by: "coordinator",
|
|
187
|
-
},
|
|
188
|
-
],
|
|
189
|
-
swarmMailState: {
|
|
190
|
-
agents: 1,
|
|
191
|
-
reservations: 1,
|
|
192
|
-
messages: 2,
|
|
193
|
-
},
|
|
194
|
-
},
|
|
195
|
-
expected: {
|
|
196
|
-
confidence: "high" as const,
|
|
197
|
-
contextInjected: true,
|
|
198
|
-
contextType: "full" as const,
|
|
199
|
-
mustContain: ["SWARM ACTIVE", "COORDINATOR"],
|
|
200
|
-
// The bug: injected context should NOT contain generic placeholders
|
|
201
|
-
mustNotContain: ["bd-xxx", "<epic>", "<path>", "placeholder"],
|
|
202
|
-
},
|
|
203
|
-
},
|
|
204
|
-
],
|
|
205
|
-
|
|
206
|
-
task: async (input) => {
|
|
207
|
-
const result = await runCompactionHook({
|
|
208
|
-
hiveCells: input.hiveCells,
|
|
209
|
-
swarmMailState: input.swarmMailState,
|
|
210
|
-
});
|
|
211
|
-
return JSON.stringify(result);
|
|
212
|
-
},
|
|
213
|
-
|
|
214
|
-
scorers: [requiredPatternsPresent, forbiddenPatternsAbsent],
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* Edge Case Eval: No False Positives
|
|
219
|
-
*
|
|
220
|
-
* Ensures we don't inject coordinator context when there's no swarm
|
|
221
|
-
*/
|
|
222
|
-
evalite("No False Positives", {
|
|
223
|
-
data: async () => [
|
|
224
|
-
{
|
|
225
|
-
input: {
|
|
226
|
-
name: "Empty hive should not trigger injection",
|
|
227
|
-
hiveCells: [],
|
|
228
|
-
swarmMailState: {
|
|
229
|
-
agents: 0,
|
|
230
|
-
reservations: 0,
|
|
231
|
-
messages: 0,
|
|
232
|
-
},
|
|
233
|
-
},
|
|
234
|
-
expected: {
|
|
235
|
-
confidence: "none" as const,
|
|
236
|
-
contextInjected: false,
|
|
237
|
-
contextType: "none" as const,
|
|
238
|
-
mustContain: [],
|
|
239
|
-
mustNotContain: ["SWARM", "COORDINATOR", "swarm_status"],
|
|
240
|
-
},
|
|
241
|
-
},
|
|
242
|
-
{
|
|
243
|
-
input: {
|
|
244
|
-
name: "Closed epic should not trigger full context",
|
|
245
|
-
hiveCells: [
|
|
246
|
-
{
|
|
247
|
-
id: "test-project-lf2p4u-epic100",
|
|
248
|
-
project_key: "/test/project",
|
|
249
|
-
type: "epic" as const,
|
|
250
|
-
status: "closed" as const,
|
|
251
|
-
title: "Completed epic",
|
|
252
|
-
description: null,
|
|
253
|
-
priority: 2,
|
|
254
|
-
parent_id: null,
|
|
255
|
-
assignee: null,
|
|
256
|
-
closed_reason: "Done",
|
|
257
|
-
deleted_at: null,
|
|
258
|
-
deleted_by: null,
|
|
259
|
-
delete_reason: null,
|
|
260
|
-
created_by: null,
|
|
261
|
-
},
|
|
262
|
-
],
|
|
263
|
-
swarmMailState: {
|
|
264
|
-
agents: 0,
|
|
265
|
-
reservations: 0,
|
|
266
|
-
messages: 0,
|
|
267
|
-
},
|
|
268
|
-
},
|
|
269
|
-
expected: {
|
|
270
|
-
// Should be low confidence (cells exist but no active work)
|
|
271
|
-
confidence: "low" as const,
|
|
272
|
-
contextInjected: true,
|
|
273
|
-
contextType: "fallback" as const,
|
|
274
|
-
mustContain: ["Swarm Detection", "Check Your Context"],
|
|
275
|
-
mustNotContain: ["SWARM ACTIVE", "COORDINATOR"],
|
|
276
|
-
},
|
|
277
|
-
},
|
|
278
|
-
],
|
|
279
|
-
|
|
280
|
-
task: async (input) => {
|
|
281
|
-
const result = await runCompactionHook({
|
|
282
|
-
hiveCells: input.hiveCells,
|
|
283
|
-
swarmMailState: input.swarmMailState,
|
|
284
|
-
});
|
|
285
|
-
return JSON.stringify(result);
|
|
286
|
-
},
|
|
287
|
-
|
|
288
|
-
scorers: [confidenceAccuracy, forbiddenPatternsAbsent],
|
|
289
|
-
});
|