opencode-swarm-plugin 0.44.0 → 0.44.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.ts +18 -12
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/hive.js +14834 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7743 -62593
- package/dist/plugin.js +24052 -78907
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-prompts.js +39407 -0
- package/dist/swarm-review.d.ts.map +1 -1
- package/dist/swarm-validation.d.ts +127 -0
- package/dist/swarm-validation.d.ts.map +1 -0
- package/dist/validators/index.d.ts +7 -0
- package/dist/validators/index.d.ts.map +1 -0
- package/dist/validators/schema-validator.d.ts +58 -0
- package/dist/validators/schema-validator.d.ts.map +1 -0
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2286
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/cass-baseline.ts +0 -217
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2515
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/contributor-tools.test.ts +0 -133
- package/src/contributor-tools.ts +0 -201
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -940
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/sessions/agent-discovery.test.ts +0 -137
- package/src/sessions/agent-discovery.ts +0 -112
- package/src/sessions/index.ts +0 -15
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
|
@@ -1,289 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Data Loader Tests
|
|
3
|
-
*
|
|
4
|
-
* Tests the PGlite-backed eval data loader functions.
|
|
5
|
-
* Uses a real in-memory PGlite database for accurate testing.
|
|
6
|
-
*/
|
|
7
|
-
import { describe, it, expect, beforeAll, afterAll } from "bun:test";
|
|
8
|
-
import {
|
|
9
|
-
loadEvalCases,
|
|
10
|
-
hasRealEvalData,
|
|
11
|
-
getEvalDataSummary,
|
|
12
|
-
} from "./data-loader.js";
|
|
13
|
-
import {
|
|
14
|
-
appendEvent,
|
|
15
|
-
getDatabase,
|
|
16
|
-
closeDatabase,
|
|
17
|
-
type DecompositionGeneratedEvent,
|
|
18
|
-
type SubtaskOutcomeEvent,
|
|
19
|
-
} from "swarm-mail";
|
|
20
|
-
import * as fs from "node:fs";
|
|
21
|
-
import * as path from "node:path";
|
|
22
|
-
import * as os from "node:os";
|
|
23
|
-
|
|
24
|
-
const TEST_PROJECT_KEY = "test-project-eval-loader";
|
|
25
|
-
|
|
26
|
-
// Create a unique temp directory for this test run
|
|
27
|
-
let testDir: string;
|
|
28
|
-
|
|
29
|
-
describe("Data Loader", () => {
|
|
30
|
-
beforeAll(async () => {
|
|
31
|
-
// Create temp directory for test database
|
|
32
|
-
testDir = fs.mkdtempSync(path.join(os.tmpdir(), "eval-loader-test-"));
|
|
33
|
-
|
|
34
|
-
// Initialize database by getting it (lazy init)
|
|
35
|
-
await getDatabase(testDir);
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
afterAll(async () => {
|
|
39
|
-
await closeDatabase(testDir);
|
|
40
|
-
// Clean up temp directory
|
|
41
|
-
fs.rmSync(testDir, { recursive: true, force: true });
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
describe("loadEvalCases", () => {
|
|
45
|
-
it("transforms eval records to EvalCase format", async () => {
|
|
46
|
-
// Insert a decomposition event
|
|
47
|
-
const decompositionEvent: DecompositionGeneratedEvent = {
|
|
48
|
-
type: "decomposition_generated",
|
|
49
|
-
timestamp: Date.now(),
|
|
50
|
-
project_key: TEST_PROJECT_KEY,
|
|
51
|
-
epic_id: "epic-load-1",
|
|
52
|
-
task: "Add authentication",
|
|
53
|
-
context: "Next.js app",
|
|
54
|
-
strategy: "feature-based",
|
|
55
|
-
epic_title: "Auth Epic",
|
|
56
|
-
subtasks: [
|
|
57
|
-
{ title: "OAuth setup", files: ["src/auth/oauth.ts"], priority: 1 },
|
|
58
|
-
{
|
|
59
|
-
title: "Session management",
|
|
60
|
-
files: ["src/auth/session.ts"],
|
|
61
|
-
priority: 2,
|
|
62
|
-
},
|
|
63
|
-
],
|
|
64
|
-
};
|
|
65
|
-
await appendEvent(decompositionEvent, testDir);
|
|
66
|
-
|
|
67
|
-
// Insert outcome events for both subtasks
|
|
68
|
-
const outcome1: SubtaskOutcomeEvent = {
|
|
69
|
-
type: "subtask_outcome",
|
|
70
|
-
timestamp: Date.now(),
|
|
71
|
-
project_key: TEST_PROJECT_KEY,
|
|
72
|
-
epic_id: "epic-load-1",
|
|
73
|
-
bead_id: "epic-load-1.1",
|
|
74
|
-
planned_files: ["src/auth/oauth.ts"],
|
|
75
|
-
actual_files: ["src/auth/oauth.ts"],
|
|
76
|
-
duration_ms: 5000,
|
|
77
|
-
error_count: 0,
|
|
78
|
-
retry_count: 0,
|
|
79
|
-
success: true,
|
|
80
|
-
};
|
|
81
|
-
await appendEvent(outcome1, testDir);
|
|
82
|
-
|
|
83
|
-
const outcome2: SubtaskOutcomeEvent = {
|
|
84
|
-
type: "subtask_outcome",
|
|
85
|
-
timestamp: Date.now(),
|
|
86
|
-
project_key: TEST_PROJECT_KEY,
|
|
87
|
-
epic_id: "epic-load-1",
|
|
88
|
-
bead_id: "epic-load-1.2",
|
|
89
|
-
planned_files: ["src/auth/session.ts"],
|
|
90
|
-
actual_files: ["src/auth/session.ts"],
|
|
91
|
-
duration_ms: 3000,
|
|
92
|
-
error_count: 0,
|
|
93
|
-
retry_count: 0,
|
|
94
|
-
success: true,
|
|
95
|
-
};
|
|
96
|
-
await appendEvent(outcome2, testDir);
|
|
97
|
-
|
|
98
|
-
const cases = await loadEvalCases(TEST_PROJECT_KEY, {
|
|
99
|
-
projectPath: testDir,
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
expect(cases.length).toBeGreaterThanOrEqual(1);
|
|
103
|
-
const authCase = cases.find((c) => c.input.task === "Add authentication");
|
|
104
|
-
expect(authCase).toBeDefined();
|
|
105
|
-
expect(authCase!.input.context).toBe("Next.js app");
|
|
106
|
-
expect(authCase!.expected.minSubtasks).toBe(2);
|
|
107
|
-
expect(authCase!.expected.maxSubtasks).toBe(2);
|
|
108
|
-
expect(authCase!.expected.requiredFiles).toContain("src/auth/oauth.ts");
|
|
109
|
-
expect(authCase!.expected.requiredFiles).toContain("src/auth/session.ts");
|
|
110
|
-
expect(authCase!.actual).toBeDefined();
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
it("filters by success when successOnly is true", async () => {
|
|
114
|
-
// Insert a successful decomposition
|
|
115
|
-
const successEvent: DecompositionGeneratedEvent = {
|
|
116
|
-
type: "decomposition_generated",
|
|
117
|
-
timestamp: Date.now(),
|
|
118
|
-
project_key: TEST_PROJECT_KEY,
|
|
119
|
-
epic_id: "epic-success-filter",
|
|
120
|
-
task: "Success task for filter",
|
|
121
|
-
strategy: "feature-based",
|
|
122
|
-
epic_title: "Success Epic",
|
|
123
|
-
subtasks: [{ title: "Sub", files: ["src/success.ts"], priority: 1 }],
|
|
124
|
-
};
|
|
125
|
-
await appendEvent(successEvent, testDir);
|
|
126
|
-
|
|
127
|
-
// Mark it successful
|
|
128
|
-
const successOutcome: SubtaskOutcomeEvent = {
|
|
129
|
-
type: "subtask_outcome",
|
|
130
|
-
timestamp: Date.now(),
|
|
131
|
-
project_key: TEST_PROJECT_KEY,
|
|
132
|
-
epic_id: "epic-success-filter",
|
|
133
|
-
bead_id: "epic-success-filter.1",
|
|
134
|
-
planned_files: ["src/success.ts"],
|
|
135
|
-
actual_files: ["src/success.ts"],
|
|
136
|
-
duration_ms: 1000,
|
|
137
|
-
error_count: 0,
|
|
138
|
-
retry_count: 0,
|
|
139
|
-
success: true,
|
|
140
|
-
};
|
|
141
|
-
await appendEvent(successOutcome, testDir);
|
|
142
|
-
|
|
143
|
-
// Insert a failed decomposition
|
|
144
|
-
const failEvent: DecompositionGeneratedEvent = {
|
|
145
|
-
type: "decomposition_generated",
|
|
146
|
-
timestamp: Date.now(),
|
|
147
|
-
project_key: TEST_PROJECT_KEY,
|
|
148
|
-
epic_id: "epic-fail-filter",
|
|
149
|
-
task: "Failed task for filter",
|
|
150
|
-
strategy: "feature-based",
|
|
151
|
-
epic_title: "Failed Epic",
|
|
152
|
-
subtasks: [{ title: "Sub", files: ["src/fail.ts"], priority: 1 }],
|
|
153
|
-
};
|
|
154
|
-
await appendEvent(failEvent, testDir);
|
|
155
|
-
|
|
156
|
-
// Mark it failed
|
|
157
|
-
const failOutcome: SubtaskOutcomeEvent = {
|
|
158
|
-
type: "subtask_outcome",
|
|
159
|
-
timestamp: Date.now(),
|
|
160
|
-
project_key: TEST_PROJECT_KEY,
|
|
161
|
-
epic_id: "epic-fail-filter",
|
|
162
|
-
bead_id: "epic-fail-filter.1",
|
|
163
|
-
planned_files: ["src/fail.ts"],
|
|
164
|
-
actual_files: [],
|
|
165
|
-
duration_ms: 500,
|
|
166
|
-
error_count: 3,
|
|
167
|
-
retry_count: 2,
|
|
168
|
-
success: false,
|
|
169
|
-
};
|
|
170
|
-
await appendEvent(failOutcome, testDir);
|
|
171
|
-
|
|
172
|
-
const successCases = await loadEvalCases(TEST_PROJECT_KEY, {
|
|
173
|
-
successOnly: true,
|
|
174
|
-
projectPath: testDir,
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
// Should only include successful cases
|
|
178
|
-
const failedCase = successCases.find(
|
|
179
|
-
(c) => c.input.task === "Failed task for filter",
|
|
180
|
-
);
|
|
181
|
-
expect(failedCase).toBeUndefined();
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
it("passes strategy filter to getEvalRecords", async () => {
|
|
185
|
-
// Insert file-based decomposition
|
|
186
|
-
const fileBasedEvent: DecompositionGeneratedEvent = {
|
|
187
|
-
type: "decomposition_generated",
|
|
188
|
-
timestamp: Date.now(),
|
|
189
|
-
project_key: TEST_PROJECT_KEY,
|
|
190
|
-
epic_id: "epic-file-based",
|
|
191
|
-
task: "File-based task",
|
|
192
|
-
strategy: "file-based",
|
|
193
|
-
epic_title: "File Epic",
|
|
194
|
-
subtasks: [{ title: "Sub", files: ["src/file.ts"], priority: 1 }],
|
|
195
|
-
};
|
|
196
|
-
await appendEvent(fileBasedEvent, testDir);
|
|
197
|
-
|
|
198
|
-
const fileBasedCases = await loadEvalCases(TEST_PROJECT_KEY, {
|
|
199
|
-
strategy: "file-based",
|
|
200
|
-
projectPath: testDir,
|
|
201
|
-
});
|
|
202
|
-
|
|
203
|
-
// All returned cases should be file-based
|
|
204
|
-
for (const c of fileBasedCases) {
|
|
205
|
-
expect(c.actual?.strategy).toBe("file-based");
|
|
206
|
-
}
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
it("passes limit to getEvalRecords", async () => {
|
|
210
|
-
const cases = await loadEvalCases(TEST_PROJECT_KEY, {
|
|
211
|
-
limit: 2,
|
|
212
|
-
projectPath: testDir,
|
|
213
|
-
});
|
|
214
|
-
|
|
215
|
-
expect(cases.length).toBeLessThanOrEqual(2);
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
it("handles records with no context", async () => {
|
|
219
|
-
const noContextEvent: DecompositionGeneratedEvent = {
|
|
220
|
-
type: "decomposition_generated",
|
|
221
|
-
timestamp: Date.now(),
|
|
222
|
-
project_key: TEST_PROJECT_KEY,
|
|
223
|
-
epic_id: "epic-no-context",
|
|
224
|
-
task: "Task without context",
|
|
225
|
-
// context is undefined
|
|
226
|
-
strategy: "feature-based",
|
|
227
|
-
epic_title: "No Context Epic",
|
|
228
|
-
subtasks: [{ title: "Sub", files: [], priority: 1 }],
|
|
229
|
-
};
|
|
230
|
-
await appendEvent(noContextEvent, testDir);
|
|
231
|
-
|
|
232
|
-
const cases = await loadEvalCases(TEST_PROJECT_KEY, {
|
|
233
|
-
projectPath: testDir,
|
|
234
|
-
});
|
|
235
|
-
const noContextCase = cases.find(
|
|
236
|
-
(c) => c.input.task === "Task without context",
|
|
237
|
-
);
|
|
238
|
-
|
|
239
|
-
expect(noContextCase).toBeDefined();
|
|
240
|
-
expect(noContextCase!.input.context).toBeUndefined();
|
|
241
|
-
});
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
describe("hasRealEvalData", () => {
|
|
245
|
-
it("returns true when enough records exist", async () => {
|
|
246
|
-
// We've inserted several records above, should have enough
|
|
247
|
-
const hasData = await hasRealEvalData(TEST_PROJECT_KEY, 1, testDir);
|
|
248
|
-
expect(hasData).toBe(true);
|
|
249
|
-
});
|
|
250
|
-
|
|
251
|
-
it("returns false when not enough records exist", async () => {
|
|
252
|
-
// Use a project key with no data
|
|
253
|
-
const hasData = await hasRealEvalData("nonexistent-project", 5, testDir);
|
|
254
|
-
expect(hasData).toBe(false);
|
|
255
|
-
});
|
|
256
|
-
|
|
257
|
-
it("uses custom minRecords threshold", async () => {
|
|
258
|
-
// Should have at least 1 record
|
|
259
|
-
const hasData = await hasRealEvalData(TEST_PROJECT_KEY, 1, testDir);
|
|
260
|
-
expect(hasData).toBe(true);
|
|
261
|
-
|
|
262
|
-
// Should not have 1000 records
|
|
263
|
-
const hasLotsOfData = await hasRealEvalData(
|
|
264
|
-
TEST_PROJECT_KEY,
|
|
265
|
-
1000,
|
|
266
|
-
testDir,
|
|
267
|
-
);
|
|
268
|
-
expect(hasLotsOfData).toBe(false);
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
|
|
272
|
-
describe("getEvalDataSummary", () => {
|
|
273
|
-
it("returns formatted summary with hasEnoughData flag", async () => {
|
|
274
|
-
const summary = await getEvalDataSummary(TEST_PROJECT_KEY, testDir);
|
|
275
|
-
|
|
276
|
-
expect(summary.totalRecords).toBeGreaterThanOrEqual(1);
|
|
277
|
-
expect(typeof summary.successRate).toBe("number");
|
|
278
|
-
expect(typeof summary.byStrategy).toBe("object");
|
|
279
|
-
expect(typeof summary.hasEnoughData).toBe("boolean");
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
it("sets hasEnoughData based on record count", async () => {
|
|
283
|
-
// Empty project should not have enough data
|
|
284
|
-
const emptySummary = await getEvalDataSummary("empty-project", testDir);
|
|
285
|
-
expect(emptySummary.hasEnoughData).toBe(false);
|
|
286
|
-
expect(emptySummary.totalRecords).toBe(0);
|
|
287
|
-
});
|
|
288
|
-
});
|
|
289
|
-
});
|
|
@@ -1,345 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for data-loader quality filters
|
|
3
|
-
*
|
|
4
|
-
* TDD approach: RED → GREEN → REFACTOR
|
|
5
|
-
*/
|
|
6
|
-
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
7
|
-
import * as fs from "node:fs";
|
|
8
|
-
import * as os from "node:os";
|
|
9
|
-
import * as path from "node:path";
|
|
10
|
-
import type { CoordinatorEvent } from "../../src/eval-capture.js";
|
|
11
|
-
import { loadCapturedSessions } from "./data-loader.js";
|
|
12
|
-
|
|
13
|
-
// Test helper: create a temp session directory
|
|
14
|
-
let tempSessionDir: string;
|
|
15
|
-
|
|
16
|
-
beforeEach(() => {
|
|
17
|
-
tempSessionDir = fs.mkdtempSync(path.join(os.tmpdir(), "test-sessions-"));
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
afterEach(() => {
|
|
21
|
-
if (fs.existsSync(tempSessionDir)) {
|
|
22
|
-
fs.rmSync(tempSessionDir, { recursive: true });
|
|
23
|
-
}
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Helper: create a session JSONL file with events
|
|
28
|
-
*/
|
|
29
|
-
function createSessionFile(
|
|
30
|
-
sessionId: string,
|
|
31
|
-
events: CoordinatorEvent[],
|
|
32
|
-
): void {
|
|
33
|
-
const filePath = path.join(tempSessionDir, `${sessionId}.jsonl`);
|
|
34
|
-
const lines = events.map((e) => JSON.stringify(e)).join("\n") + "\n";
|
|
35
|
-
fs.writeFileSync(filePath, lines, "utf-8");
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Helper: create minimal events
|
|
40
|
-
*/
|
|
41
|
-
function createEvent(
|
|
42
|
-
sessionId: string,
|
|
43
|
-
epicId: string,
|
|
44
|
-
type: "DECISION" | "VIOLATION" | "OUTCOME",
|
|
45
|
-
subtype: string,
|
|
46
|
-
): CoordinatorEvent {
|
|
47
|
-
const base = {
|
|
48
|
-
session_id: sessionId,
|
|
49
|
-
epic_id: epicId,
|
|
50
|
-
timestamp: new Date().toISOString(),
|
|
51
|
-
payload: {},
|
|
52
|
-
};
|
|
53
|
-
|
|
54
|
-
if (type === "DECISION") {
|
|
55
|
-
return {
|
|
56
|
-
...base,
|
|
57
|
-
event_type: "DECISION" as const,
|
|
58
|
-
decision_type: subtype as any,
|
|
59
|
-
};
|
|
60
|
-
} else if (type === "VIOLATION") {
|
|
61
|
-
return {
|
|
62
|
-
...base,
|
|
63
|
-
event_type: "VIOLATION" as const,
|
|
64
|
-
violation_type: subtype as any,
|
|
65
|
-
};
|
|
66
|
-
} else {
|
|
67
|
-
return {
|
|
68
|
-
...base,
|
|
69
|
-
event_type: "OUTCOME" as const,
|
|
70
|
-
outcome_type: subtype as any,
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
describe("loadCapturedSessions - quality filters", () => {
|
|
76
|
-
test("filters out sessions with fewer than minEvents (default: 3)", async () => {
|
|
77
|
-
// Create sessions with different event counts
|
|
78
|
-
createSessionFile("session-2-events", [
|
|
79
|
-
createEvent("session-2-events", "epic-1", "DECISION", "worker_spawned"),
|
|
80
|
-
createEvent("session-2-events", "epic-1", "OUTCOME", "subtask_success"),
|
|
81
|
-
]);
|
|
82
|
-
|
|
83
|
-
createSessionFile("session-3-events", [
|
|
84
|
-
createEvent("session-3-events", "epic-2", "DECISION", "worker_spawned"),
|
|
85
|
-
createEvent("session-3-events", "epic-2", "DECISION", "review_completed"),
|
|
86
|
-
createEvent("session-3-events", "epic-2", "OUTCOME", "subtask_success"),
|
|
87
|
-
]);
|
|
88
|
-
|
|
89
|
-
createSessionFile("session-5-events", [
|
|
90
|
-
createEvent("session-5-events", "epic-3", "DECISION", "worker_spawned"),
|
|
91
|
-
createEvent("session-5-events", "epic-3", "DECISION", "review_completed"),
|
|
92
|
-
createEvent("session-5-events", "epic-3", "OUTCOME", "subtask_success"),
|
|
93
|
-
createEvent("session-5-events", "epic-3", "OUTCOME", "subtask_success"),
|
|
94
|
-
createEvent("session-5-events", "epic-3", "OUTCOME", "epic_complete"),
|
|
95
|
-
]);
|
|
96
|
-
|
|
97
|
-
const sessions = await loadCapturedSessions({
|
|
98
|
-
minEvents: 3,
|
|
99
|
-
sessionDir: tempSessionDir,
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
// Should only get sessions with >= 3 events
|
|
103
|
-
expect(sessions.length).toBe(2);
|
|
104
|
-
expect(
|
|
105
|
-
sessions.some((s) => s.session.session_id === "session-3-events"),
|
|
106
|
-
).toBe(true);
|
|
107
|
-
expect(
|
|
108
|
-
sessions.some((s) => s.session.session_id === "session-5-events"),
|
|
109
|
-
).toBe(true);
|
|
110
|
-
expect(
|
|
111
|
-
sessions.some((s) => s.session.session_id === "session-2-events"),
|
|
112
|
-
).toBe(false);
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
test("filters out sessions without worker_spawned event when requireWorkerSpawn=true", async () => {
|
|
116
|
-
// Session WITH worker_spawned
|
|
117
|
-
createSessionFile("session-with-spawn", [
|
|
118
|
-
createEvent("session-with-spawn", "epic-1", "DECISION", "worker_spawned"),
|
|
119
|
-
createEvent(
|
|
120
|
-
"session-with-spawn",
|
|
121
|
-
"epic-1",
|
|
122
|
-
"DECISION",
|
|
123
|
-
"review_completed",
|
|
124
|
-
),
|
|
125
|
-
createEvent("session-with-spawn", "epic-1", "OUTCOME", "subtask_success"),
|
|
126
|
-
]);
|
|
127
|
-
|
|
128
|
-
// Session WITHOUT worker_spawned
|
|
129
|
-
createSessionFile("session-no-spawn", [
|
|
130
|
-
createEvent(
|
|
131
|
-
"session-no-spawn",
|
|
132
|
-
"epic-2",
|
|
133
|
-
"DECISION",
|
|
134
|
-
"strategy_selected",
|
|
135
|
-
),
|
|
136
|
-
createEvent(
|
|
137
|
-
"session-no-spawn",
|
|
138
|
-
"epic-2",
|
|
139
|
-
"DECISION",
|
|
140
|
-
"decomposition_complete",
|
|
141
|
-
),
|
|
142
|
-
createEvent("session-no-spawn", "epic-2", "OUTCOME", "epic_complete"),
|
|
143
|
-
]);
|
|
144
|
-
|
|
145
|
-
const sessions = await loadCapturedSessions({
|
|
146
|
-
requireWorkerSpawn: true,
|
|
147
|
-
sessionDir: tempSessionDir,
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
expect(sessions.length).toBe(1);
|
|
151
|
-
expect(sessions[0]?.session.session_id).toBe("session-with-spawn");
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
test("filters out sessions without review_completed event when requireReview=true", async () => {
|
|
155
|
-
// Session WITH review
|
|
156
|
-
createSessionFile("session-with-review", [
|
|
157
|
-
createEvent(
|
|
158
|
-
"session-with-review",
|
|
159
|
-
"epic-1",
|
|
160
|
-
"DECISION",
|
|
161
|
-
"worker_spawned",
|
|
162
|
-
),
|
|
163
|
-
createEvent(
|
|
164
|
-
"session-with-review",
|
|
165
|
-
"epic-1",
|
|
166
|
-
"DECISION",
|
|
167
|
-
"review_completed",
|
|
168
|
-
),
|
|
169
|
-
createEvent("session-with-review", "epic-1", "OUTCOME", "subtask_success"),
|
|
170
|
-
]);
|
|
171
|
-
|
|
172
|
-
// Session WITHOUT review
|
|
173
|
-
createSessionFile("session-no-review", [
|
|
174
|
-
createEvent("session-no-review", "epic-2", "DECISION", "worker_spawned"),
|
|
175
|
-
createEvent("session-no-review", "epic-2", "OUTCOME", "subtask_success"),
|
|
176
|
-
createEvent("session-no-review", "epic-2", "OUTCOME", "epic_complete"),
|
|
177
|
-
]);
|
|
178
|
-
|
|
179
|
-
const sessions = await loadCapturedSessions({
|
|
180
|
-
requireReview: true,
|
|
181
|
-
sessionDir: tempSessionDir,
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
expect(sessions.length).toBe(1);
|
|
185
|
-
expect(sessions[0]?.session.session_id).toBe("session-with-review");
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
test("allows disabling filters individually", async () => {
|
|
189
|
-
// Session with only 2 events, no worker_spawned, no review
|
|
190
|
-
createSessionFile("session-low-quality", [
|
|
191
|
-
createEvent(
|
|
192
|
-
"session-low-quality",
|
|
193
|
-
"epic-1",
|
|
194
|
-
"DECISION",
|
|
195
|
-
"strategy_selected",
|
|
196
|
-
),
|
|
197
|
-
createEvent("session-low-quality", "epic-1", "OUTCOME", "epic_complete"),
|
|
198
|
-
]);
|
|
199
|
-
|
|
200
|
-
// Disable all filters
|
|
201
|
-
const sessions = await loadCapturedSessions({
|
|
202
|
-
minEvents: 0,
|
|
203
|
-
requireWorkerSpawn: false,
|
|
204
|
-
requireReview: false,
|
|
205
|
-
sessionDir: tempSessionDir,
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
expect(sessions.length).toBe(1);
|
|
209
|
-
expect(sessions[0]?.session.session_id).toBe("session-low-quality");
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
test("applies limit AFTER filtering", async () => {
|
|
213
|
-
// Create 5 high-quality sessions
|
|
214
|
-
for (let i = 1; i <= 5; i++) {
|
|
215
|
-
createSessionFile(`session-${i}`, [
|
|
216
|
-
createEvent(`session-${i}`, `epic-${i}`, "DECISION", "worker_spawned"),
|
|
217
|
-
createEvent(
|
|
218
|
-
`session-${i}`,
|
|
219
|
-
`epic-${i}`,
|
|
220
|
-
"DECISION",
|
|
221
|
-
"review_completed",
|
|
222
|
-
),
|
|
223
|
-
createEvent(`session-${i}`, `epic-${i}`, "OUTCOME", "subtask_success"),
|
|
224
|
-
]);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Create 3 low-quality sessions (will be filtered out)
|
|
228
|
-
for (let i = 6; i <= 8; i++) {
|
|
229
|
-
createSessionFile(`session-${i}`, [
|
|
230
|
-
createEvent(`session-${i}`, `epic-${i}`, "DECISION", "strategy_selected"),
|
|
231
|
-
]);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Filter first (remove 3 low-quality), then limit to 2
|
|
235
|
-
const sessions = await loadCapturedSessions({
|
|
236
|
-
minEvents: 3,
|
|
237
|
-
requireWorkerSpawn: true,
|
|
238
|
-
requireReview: true,
|
|
239
|
-
limit: 2,
|
|
240
|
-
sessionDir: tempSessionDir,
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
// Should get 2 sessions from the 5 high-quality ones
|
|
244
|
-
expect(sessions.length).toBe(2);
|
|
245
|
-
expect(sessions.every((s) => s.session.events.length >= 3)).toBe(true);
|
|
246
|
-
});
|
|
247
|
-
|
|
248
|
-
test("combines all filters correctly", async () => {
|
|
249
|
-
// High-quality session (passes all filters)
|
|
250
|
-
createSessionFile("session-high-quality", [
|
|
251
|
-
createEvent(
|
|
252
|
-
"session-high-quality",
|
|
253
|
-
"epic-1",
|
|
254
|
-
"DECISION",
|
|
255
|
-
"worker_spawned",
|
|
256
|
-
),
|
|
257
|
-
createEvent(
|
|
258
|
-
"session-high-quality",
|
|
259
|
-
"epic-1",
|
|
260
|
-
"DECISION",
|
|
261
|
-
"review_completed",
|
|
262
|
-
),
|
|
263
|
-
createEvent("session-high-quality", "epic-1", "OUTCOME", "subtask_success"),
|
|
264
|
-
createEvent("session-high-quality", "epic-1", "OUTCOME", "epic_complete"),
|
|
265
|
-
]);
|
|
266
|
-
|
|
267
|
-
// Missing worker_spawned
|
|
268
|
-
createSessionFile("session-no-spawn", [
|
|
269
|
-
createEvent(
|
|
270
|
-
"session-no-spawn",
|
|
271
|
-
"epic-2",
|
|
272
|
-
"DECISION",
|
|
273
|
-
"review_completed",
|
|
274
|
-
),
|
|
275
|
-
createEvent("session-no-spawn", "epic-2", "OUTCOME", "subtask_success"),
|
|
276
|
-
createEvent("session-no-spawn", "epic-2", "OUTCOME", "epic_complete"),
|
|
277
|
-
]);
|
|
278
|
-
|
|
279
|
-
// Missing review_completed
|
|
280
|
-
createSessionFile("session-no-review", [
|
|
281
|
-
createEvent("session-no-review", "epic-3", "DECISION", "worker_spawned"),
|
|
282
|
-
createEvent("session-no-review", "epic-3", "OUTCOME", "subtask_success"),
|
|
283
|
-
createEvent("session-no-review", "epic-3", "OUTCOME", "epic_complete"),
|
|
284
|
-
]);
|
|
285
|
-
|
|
286
|
-
// Too few events
|
|
287
|
-
createSessionFile("session-too-few", [
|
|
288
|
-
createEvent("session-too-few", "epic-4", "DECISION", "worker_spawned"),
|
|
289
|
-
createEvent("session-too-few", "epic-4", "DECISION", "review_completed"),
|
|
290
|
-
]);
|
|
291
|
-
|
|
292
|
-
const sessions = await loadCapturedSessions({
|
|
293
|
-
minEvents: 3,
|
|
294
|
-
requireWorkerSpawn: true,
|
|
295
|
-
requireReview: true,
|
|
296
|
-
sessionDir: tempSessionDir,
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
// Only high-quality session should pass
|
|
300
|
-
expect(sessions.length).toBe(1);
|
|
301
|
-
expect(sessions[0]?.session.session_id).toBe("session-high-quality");
|
|
302
|
-
});
|
|
303
|
-
|
|
304
|
-
test("defaults are: minEvents=3, requireWorkerSpawn=true, requireReview=true", async () => {
|
|
305
|
-
// Create one session that meets defaults
|
|
306
|
-
createSessionFile("session-meets-defaults", [
|
|
307
|
-
createEvent(
|
|
308
|
-
"session-meets-defaults",
|
|
309
|
-
"epic-1",
|
|
310
|
-
"DECISION",
|
|
311
|
-
"worker_spawned",
|
|
312
|
-
),
|
|
313
|
-
createEvent(
|
|
314
|
-
"session-meets-defaults",
|
|
315
|
-
"epic-1",
|
|
316
|
-
"DECISION",
|
|
317
|
-
"review_completed",
|
|
318
|
-
),
|
|
319
|
-
createEvent(
|
|
320
|
-
"session-meets-defaults",
|
|
321
|
-
"epic-1",
|
|
322
|
-
"OUTCOME",
|
|
323
|
-
"subtask_success",
|
|
324
|
-
),
|
|
325
|
-
]);
|
|
326
|
-
|
|
327
|
-
// Create one that doesn't
|
|
328
|
-
createSessionFile("session-fails-defaults", [
|
|
329
|
-
createEvent(
|
|
330
|
-
"session-fails-defaults",
|
|
331
|
-
"epic-2",
|
|
332
|
-
"DECISION",
|
|
333
|
-
"strategy_selected",
|
|
334
|
-
),
|
|
335
|
-
]);
|
|
336
|
-
|
|
337
|
-
// Call with NO options except sessionDir - should use defaults
|
|
338
|
-
const sessions = await loadCapturedSessions({
|
|
339
|
-
sessionDir: tempSessionDir,
|
|
340
|
-
});
|
|
341
|
-
|
|
342
|
-
expect(sessions.length).toBe(1);
|
|
343
|
-
expect(sessions[0]?.session.session_id).toBe("session-meets-defaults");
|
|
344
|
-
});
|
|
345
|
-
});
|