opencode-swarm-plugin 0.40.0 → 0.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
- package/.hive/analysis/session-data-quality-audit.md +320 -0
- package/.hive/eval-results.json +481 -24
- package/.hive/issues.jsonl +67 -16
- package/.hive/memories.jsonl +159 -1
- package/.opencode/eval-history.jsonl +315 -0
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +165 -0
- package/README.md +2 -0
- package/SCORER-ANALYSIS.md +598 -0
- package/bin/eval-gate.test.ts +158 -0
- package/bin/eval-gate.ts +74 -0
- package/bin/swarm.serve.test.ts +46 -0
- package/bin/swarm.test.ts +661 -732
- package/bin/swarm.ts +335 -0
- package/dist/compaction-hook.d.ts +7 -5
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-prompt-scoring.d.ts +1 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -1
- package/dist/eval-runner.d.ts +134 -0
- package/dist/eval-runner.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +29 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +99741 -58858
- package/dist/memory-tools.d.ts +70 -2
- package/dist/memory-tools.d.ts.map +1 -1
- package/dist/memory.d.ts +37 -0
- package/dist/memory.d.ts.map +1 -1
- package/dist/observability-tools.d.ts +64 -0
- package/dist/observability-tools.d.ts.map +1 -1
- package/dist/plugin.js +99356 -58318
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +32 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
- package/evals/ARCHITECTURE.md +1189 -0
- package/evals/example.eval.ts +3 -4
- package/evals/fixtures/compaction-prompt-cases.ts +6 -0
- package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
- package/evals/scorers/coordinator-discipline.ts +0 -323
- package/evals/swarm-decomposition.eval.ts +4 -2
- package/package.json +4 -3
- package/src/compaction-prompt-scorers.test.ts +185 -9
- package/src/compaction-prompt-scoring.ts +7 -5
- package/src/eval-runner.test.ts +128 -1
- package/src/eval-runner.ts +46 -0
- package/src/hive.ts +43 -42
- package/src/memory-tools.test.ts +84 -0
- package/src/memory-tools.ts +68 -3
- package/src/memory.test.ts +2 -112
- package/src/memory.ts +88 -49
- package/src/observability-tools.test.ts +13 -0
- package/src/observability-tools.ts +277 -0
- package/src/swarm-orchestrate.test.ts +162 -0
- package/src/swarm-orchestrate.ts +7 -5
- package/src/swarm-prompts.test.ts +168 -4
- package/src/swarm-prompts.ts +228 -7
- package/.env +0 -2
- package/.turbo/turbo-test.log +0 -481
- package/.turbo/turbo-typecheck.log +0 -1
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Tests for eval-gate CLI
|
|
4
|
+
*
|
|
5
|
+
* TDD: Write tests first to verify behavior before implementing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, test, expect, beforeEach, mock } from "bun:test";
|
|
9
|
+
import type { RunEvalsResult } from "../src/eval-runner.js";
|
|
10
|
+
|
|
11
|
+
// Mock process.exit to prevent test from actually exiting
|
|
12
|
+
const mockExit = mock((code?: number) => {
|
|
13
|
+
throw new Error(`EXIT:${code ?? 0}`);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
beforeEach(() => {
|
|
17
|
+
mockExit.mockClear();
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
describe("eval-gate CLI", () => {
|
|
21
|
+
test("exits 0 when all gates pass", async () => {
|
|
22
|
+
const mockResult: RunEvalsResult = {
|
|
23
|
+
success: true,
|
|
24
|
+
totalSuites: 2,
|
|
25
|
+
totalEvals: 10,
|
|
26
|
+
averageScore: 0.95,
|
|
27
|
+
suites: [],
|
|
28
|
+
gateResults: [
|
|
29
|
+
{
|
|
30
|
+
suite: "example",
|
|
31
|
+
passed: true,
|
|
32
|
+
phase: "production",
|
|
33
|
+
message: "Passed",
|
|
34
|
+
currentScore: 0.95,
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// Simulate main() execution with mocked runEvals
|
|
40
|
+
let exitCode: number | undefined;
|
|
41
|
+
try {
|
|
42
|
+
// Would call main() here if we extract it to a function
|
|
43
|
+
// For now, verify exit logic manually
|
|
44
|
+
const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
|
|
45
|
+
if (failedGates.length > 0 || !mockResult.success) {
|
|
46
|
+
exitCode = 1;
|
|
47
|
+
} else {
|
|
48
|
+
exitCode = 0;
|
|
49
|
+
}
|
|
50
|
+
} catch (e) {
|
|
51
|
+
// Extract exit code from mocked error
|
|
52
|
+
if (e instanceof Error && e.message.startsWith("EXIT:")) {
|
|
53
|
+
exitCode = parseInt(e.message.split(":")[1]);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
expect(exitCode).toBe(0);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test("exits 1 when gates fail", async () => {
|
|
61
|
+
const mockResult: RunEvalsResult = {
|
|
62
|
+
success: false,
|
|
63
|
+
totalSuites: 2,
|
|
64
|
+
totalEvals: 10,
|
|
65
|
+
averageScore: 0.45,
|
|
66
|
+
suites: [],
|
|
67
|
+
gateResults: [
|
|
68
|
+
{
|
|
69
|
+
suite: "coordinator",
|
|
70
|
+
passed: false,
|
|
71
|
+
phase: "production",
|
|
72
|
+
message: "Regression detected",
|
|
73
|
+
currentScore: 0.45,
|
|
74
|
+
baseline: 0.85,
|
|
75
|
+
regressionPercent: -47,
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
let exitCode: number | undefined;
|
|
81
|
+
const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
|
|
82
|
+
if (failedGates.length > 0 || !mockResult.success) {
|
|
83
|
+
exitCode = 1;
|
|
84
|
+
} else {
|
|
85
|
+
exitCode = 0;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
expect(exitCode).toBe(1);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test("exits 1 when threshold check fails", async () => {
|
|
92
|
+
const mockResult: RunEvalsResult = {
|
|
93
|
+
success: false, // Threshold failed
|
|
94
|
+
totalSuites: 2,
|
|
95
|
+
totalEvals: 10,
|
|
96
|
+
averageScore: 0.65, // Below threshold of 80
|
|
97
|
+
suites: [],
|
|
98
|
+
gateResults: [],
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
let exitCode: number | undefined;
|
|
102
|
+
const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
|
|
103
|
+
if (failedGates.length > 0 || !mockResult.success) {
|
|
104
|
+
exitCode = 1;
|
|
105
|
+
} else {
|
|
106
|
+
exitCode = 0;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
expect(exitCode).toBe(1);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test("parses --suite argument", () => {
|
|
113
|
+
const args = ["--suite", "coordinator"];
|
|
114
|
+
let suiteFilter: string | undefined;
|
|
115
|
+
|
|
116
|
+
for (let i = 0; i < args.length; i++) {
|
|
117
|
+
if (args[i] === "--suite" && args[i + 1]) {
|
|
118
|
+
suiteFilter = args[i + 1];
|
|
119
|
+
i++;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
expect(suiteFilter).toBe("coordinator");
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("parses --threshold argument", () => {
|
|
127
|
+
const args = ["--threshold", "85"];
|
|
128
|
+
let scoreThreshold: number | undefined;
|
|
129
|
+
|
|
130
|
+
for (let i = 0; i < args.length; i++) {
|
|
131
|
+
if (args[i] === "--threshold" && args[i + 1]) {
|
|
132
|
+
scoreThreshold = parseInt(args[i + 1], 10);
|
|
133
|
+
i++;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
expect(scoreThreshold).toBe(85);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test("handles missing arguments gracefully", () => {
|
|
141
|
+
const args: string[] = [];
|
|
142
|
+
let suiteFilter: string | undefined;
|
|
143
|
+
let scoreThreshold: number | undefined;
|
|
144
|
+
|
|
145
|
+
for (let i = 0; i < args.length; i++) {
|
|
146
|
+
if (args[i] === "--suite" && args[i + 1]) {
|
|
147
|
+
suiteFilter = args[i + 1];
|
|
148
|
+
i++;
|
|
149
|
+
} else if (args[i] === "--threshold" && args[i + 1]) {
|
|
150
|
+
scoreThreshold = parseInt(args[i + 1], 10);
|
|
151
|
+
i++;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
expect(suiteFilter).toBeUndefined();
|
|
156
|
+
expect(scoreThreshold).toBeUndefined();
|
|
157
|
+
});
|
|
158
|
+
});
|
package/bin/eval-gate.ts
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Eval Gate CLI - Run evals and fail on regression
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* bun run bin/eval-gate.ts # Run all evals
|
|
7
|
+
* bun run bin/eval-gate.ts --suite coordinator # Run specific suite
|
|
8
|
+
* bun run bin/eval-gate.ts --threshold 80 # Custom score threshold
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { runEvals } from "../src/eval-runner.js";
|
|
12
|
+
|
|
13
|
+
const args = process.argv.slice(2);
|
|
14
|
+
|
|
15
|
+
// Parse args
|
|
16
|
+
let suiteFilter: string | undefined;
|
|
17
|
+
let scoreThreshold: number | undefined;
|
|
18
|
+
|
|
19
|
+
for (let i = 0; i < args.length; i++) {
|
|
20
|
+
if (args[i] === "--suite" && args[i + 1]) {
|
|
21
|
+
suiteFilter = args[i + 1];
|
|
22
|
+
i++;
|
|
23
|
+
} else if (args[i] === "--threshold" && args[i + 1]) {
|
|
24
|
+
scoreThreshold = parseInt(args[i + 1], 10);
|
|
25
|
+
i++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async function main() {
|
|
30
|
+
console.log("🔍 Running eval gates...\n");
|
|
31
|
+
|
|
32
|
+
const result = await runEvals({
|
|
33
|
+
cwd: process.cwd(),
|
|
34
|
+
suiteFilter,
|
|
35
|
+
scoreThreshold,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
// Print results
|
|
39
|
+
console.log(`📊 Results:`);
|
|
40
|
+
console.log(` Suites: ${result.totalSuites}`);
|
|
41
|
+
console.log(` Evals: ${result.totalEvals}`);
|
|
42
|
+
console.log(` Average Score: ${(result.averageScore * 100).toFixed(1)}%\n`);
|
|
43
|
+
|
|
44
|
+
// Print gate results
|
|
45
|
+
if (result.gateResults && result.gateResults.length > 0) {
|
|
46
|
+
console.log("🚦 Gate Results:");
|
|
47
|
+
for (const gate of result.gateResults) {
|
|
48
|
+
const icon = gate.passed ? "✅" : "❌";
|
|
49
|
+
console.log(` ${icon} ${gate.suite}: ${gate.message}`);
|
|
50
|
+
}
|
|
51
|
+
console.log("");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Check for gate failures
|
|
55
|
+
const failedGates = result.gateResults?.filter(g => !g.passed) || [];
|
|
56
|
+
|
|
57
|
+
if (failedGates.length > 0) {
|
|
58
|
+
console.error(`❌ ${failedGates.length} gate(s) failed!`);
|
|
59
|
+
process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (!result.success) {
|
|
63
|
+
console.error(`❌ Evals failed threshold check`);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
console.log("✅ All gates passed!");
|
|
68
|
+
process.exit(0);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
main().catch((err) => {
|
|
72
|
+
console.error("Fatal error:", err);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for `swarm serve` command
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, test, expect } from "bun:test";
|
|
6
|
+
import { spawn } from "bun";
|
|
7
|
+
|
|
8
|
+
describe("swarm serve command", () => {
|
|
9
|
+
test("serve command accepts custom port via --port flag", () => {
|
|
10
|
+
// Verify that CLI parsing works for custom port
|
|
11
|
+
const args = ["serve", "--port", "8080"];
|
|
12
|
+
const port = args.includes("--port")
|
|
13
|
+
? Number.parseInt(args[args.indexOf("--port") + 1])
|
|
14
|
+
: 3001;
|
|
15
|
+
|
|
16
|
+
expect(port).toBe(8080);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("serve command defaults to port 3001", () => {
|
|
20
|
+
const args = ["serve"];
|
|
21
|
+
const port = args.includes("--port")
|
|
22
|
+
? Number.parseInt(args[args.indexOf("--port") + 1])
|
|
23
|
+
: 3001;
|
|
24
|
+
|
|
25
|
+
expect(port).toBe(3001);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("serve command uses project path from CWD", () => {
|
|
29
|
+
const projectPath = process.cwd();
|
|
30
|
+
expect(projectPath).toBeDefined();
|
|
31
|
+
expect(typeof projectPath).toBe("string");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("serve command appears in help text", async () => {
|
|
35
|
+
const proc = spawn(["bun", "run", "bin/swarm.ts", "help"], {
|
|
36
|
+
stdout: "pipe",
|
|
37
|
+
stderr: "pipe",
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const output = await new Response(proc.stdout).text();
|
|
41
|
+
|
|
42
|
+
expect(output).toContain("swarm serve");
|
|
43
|
+
expect(output).toContain("Start SSE server");
|
|
44
|
+
expect(output).toContain("--port");
|
|
45
|
+
});
|
|
46
|
+
});
|