opencode-swarm-plugin 0.40.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
  2. package/.hive/analysis/session-data-quality-audit.md +320 -0
  3. package/.hive/eval-results.json +481 -24
  4. package/.hive/issues.jsonl +67 -16
  5. package/.hive/memories.jsonl +159 -1
  6. package/.opencode/eval-history.jsonl +315 -0
  7. package/.turbo/turbo-build.log +5 -5
  8. package/CHANGELOG.md +165 -0
  9. package/README.md +2 -0
  10. package/SCORER-ANALYSIS.md +598 -0
  11. package/bin/eval-gate.test.ts +158 -0
  12. package/bin/eval-gate.ts +74 -0
  13. package/bin/swarm.serve.test.ts +46 -0
  14. package/bin/swarm.test.ts +661 -732
  15. package/bin/swarm.ts +335 -0
  16. package/dist/compaction-hook.d.ts +7 -5
  17. package/dist/compaction-hook.d.ts.map +1 -1
  18. package/dist/compaction-prompt-scoring.d.ts +1 -0
  19. package/dist/compaction-prompt-scoring.d.ts.map +1 -1
  20. package/dist/eval-runner.d.ts +134 -0
  21. package/dist/eval-runner.d.ts.map +1 -0
  22. package/dist/hive.d.ts.map +1 -1
  23. package/dist/index.d.ts +29 -0
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +99741 -58858
  26. package/dist/memory-tools.d.ts +70 -2
  27. package/dist/memory-tools.d.ts.map +1 -1
  28. package/dist/memory.d.ts +37 -0
  29. package/dist/memory.d.ts.map +1 -1
  30. package/dist/observability-tools.d.ts +64 -0
  31. package/dist/observability-tools.d.ts.map +1 -1
  32. package/dist/plugin.js +99356 -58318
  33. package/dist/swarm-orchestrate.d.ts.map +1 -1
  34. package/dist/swarm-prompts.d.ts +32 -1
  35. package/dist/swarm-prompts.d.ts.map +1 -1
  36. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
  37. package/evals/ARCHITECTURE.md +1189 -0
  38. package/evals/example.eval.ts +3 -4
  39. package/evals/fixtures/compaction-prompt-cases.ts +6 -0
  40. package/evals/scorers/coordinator-discipline.evalite-test.ts +1 -162
  41. package/evals/scorers/coordinator-discipline.ts +0 -323
  42. package/evals/swarm-decomposition.eval.ts +4 -2
  43. package/package.json +4 -3
  44. package/src/compaction-prompt-scorers.test.ts +185 -9
  45. package/src/compaction-prompt-scoring.ts +7 -5
  46. package/src/eval-runner.test.ts +128 -1
  47. package/src/eval-runner.ts +46 -0
  48. package/src/hive.ts +43 -42
  49. package/src/memory-tools.test.ts +84 -0
  50. package/src/memory-tools.ts +68 -3
  51. package/src/memory.test.ts +2 -112
  52. package/src/memory.ts +88 -49
  53. package/src/observability-tools.test.ts +13 -0
  54. package/src/observability-tools.ts +277 -0
  55. package/src/swarm-orchestrate.test.ts +162 -0
  56. package/src/swarm-orchestrate.ts +7 -5
  57. package/src/swarm-prompts.test.ts +168 -4
  58. package/src/swarm-prompts.ts +228 -7
  59. package/.env +0 -2
  60. package/.turbo/turbo-test.log +0 -481
  61. package/.turbo/turbo-typecheck.log +0 -1
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Tests for eval-gate CLI
4
+ *
5
+ * TDD: Write tests first to verify behavior before implementing.
6
+ */
7
+
8
+ import { describe, test, expect, beforeEach, mock } from "bun:test";
9
+ import type { RunEvalsResult } from "../src/eval-runner.js";
10
+
11
+ // Mock process.exit to prevent test from actually exiting
12
+ const mockExit = mock((code?: number) => {
13
+ throw new Error(`EXIT:${code ?? 0}`);
14
+ });
15
+
16
+ beforeEach(() => {
17
+ mockExit.mockClear();
18
+ });
19
+
20
+ describe("eval-gate CLI", () => {
21
+ test("exits 0 when all gates pass", async () => {
22
+ const mockResult: RunEvalsResult = {
23
+ success: true,
24
+ totalSuites: 2,
25
+ totalEvals: 10,
26
+ averageScore: 0.95,
27
+ suites: [],
28
+ gateResults: [
29
+ {
30
+ suite: "example",
31
+ passed: true,
32
+ phase: "production",
33
+ message: "Passed",
34
+ currentScore: 0.95,
35
+ },
36
+ ],
37
+ };
38
+
39
+ // Simulate main() execution with mocked runEvals
40
+ let exitCode: number | undefined;
41
+ try {
42
+ // Would call main() here if we extract it to a function
43
+ // For now, verify exit logic manually
44
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
45
+ if (failedGates.length > 0 || !mockResult.success) {
46
+ exitCode = 1;
47
+ } else {
48
+ exitCode = 0;
49
+ }
50
+ } catch (e) {
51
+ // Extract exit code from mocked error
52
+ if (e instanceof Error && e.message.startsWith("EXIT:")) {
53
+ exitCode = parseInt(e.message.split(":")[1]);
54
+ }
55
+ }
56
+
57
+ expect(exitCode).toBe(0);
58
+ });
59
+
60
+ test("exits 1 when gates fail", async () => {
61
+ const mockResult: RunEvalsResult = {
62
+ success: false,
63
+ totalSuites: 2,
64
+ totalEvals: 10,
65
+ averageScore: 0.45,
66
+ suites: [],
67
+ gateResults: [
68
+ {
69
+ suite: "coordinator",
70
+ passed: false,
71
+ phase: "production",
72
+ message: "Regression detected",
73
+ currentScore: 0.45,
74
+ baseline: 0.85,
75
+ regressionPercent: -47,
76
+ },
77
+ ],
78
+ };
79
+
80
+ let exitCode: number | undefined;
81
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
82
+ if (failedGates.length > 0 || !mockResult.success) {
83
+ exitCode = 1;
84
+ } else {
85
+ exitCode = 0;
86
+ }
87
+
88
+ expect(exitCode).toBe(1);
89
+ });
90
+
91
+ test("exits 1 when threshold check fails", async () => {
92
+ const mockResult: RunEvalsResult = {
93
+ success: false, // Threshold failed
94
+ totalSuites: 2,
95
+ totalEvals: 10,
96
+ averageScore: 0.65, // Below threshold of 80
97
+ suites: [],
98
+ gateResults: [],
99
+ };
100
+
101
+ let exitCode: number | undefined;
102
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
103
+ if (failedGates.length > 0 || !mockResult.success) {
104
+ exitCode = 1;
105
+ } else {
106
+ exitCode = 0;
107
+ }
108
+
109
+ expect(exitCode).toBe(1);
110
+ });
111
+
112
+ test("parses --suite argument", () => {
113
+ const args = ["--suite", "coordinator"];
114
+ let suiteFilter: string | undefined;
115
+
116
+ for (let i = 0; i < args.length; i++) {
117
+ if (args[i] === "--suite" && args[i + 1]) {
118
+ suiteFilter = args[i + 1];
119
+ i++;
120
+ }
121
+ }
122
+
123
+ expect(suiteFilter).toBe("coordinator");
124
+ });
125
+
126
+ test("parses --threshold argument", () => {
127
+ const args = ["--threshold", "85"];
128
+ let scoreThreshold: number | undefined;
129
+
130
+ for (let i = 0; i < args.length; i++) {
131
+ if (args[i] === "--threshold" && args[i + 1]) {
132
+ scoreThreshold = parseInt(args[i + 1], 10);
133
+ i++;
134
+ }
135
+ }
136
+
137
+ expect(scoreThreshold).toBe(85);
138
+ });
139
+
140
+ test("handles missing arguments gracefully", () => {
141
+ const args: string[] = [];
142
+ let suiteFilter: string | undefined;
143
+ let scoreThreshold: number | undefined;
144
+
145
+ for (let i = 0; i < args.length; i++) {
146
+ if (args[i] === "--suite" && args[i + 1]) {
147
+ suiteFilter = args[i + 1];
148
+ i++;
149
+ } else if (args[i] === "--threshold" && args[i + 1]) {
150
+ scoreThreshold = parseInt(args[i + 1], 10);
151
+ i++;
152
+ }
153
+ }
154
+
155
+ expect(suiteFilter).toBeUndefined();
156
+ expect(scoreThreshold).toBeUndefined();
157
+ });
158
+ });
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Eval Gate CLI - Run evals and fail on regression
4
+ *
5
+ * Usage:
6
+ * bun run bin/eval-gate.ts # Run all evals
7
+ * bun run bin/eval-gate.ts --suite coordinator # Run specific suite
8
+ * bun run bin/eval-gate.ts --threshold 80 # Custom score threshold
9
+ */
10
+
11
+ import { runEvals } from "../src/eval-runner.js";
12
+
13
+ const args = process.argv.slice(2);
14
+
15
+ // Parse args
16
+ let suiteFilter: string | undefined;
17
+ let scoreThreshold: number | undefined;
18
+
19
+ for (let i = 0; i < args.length; i++) {
20
+ if (args[i] === "--suite" && args[i + 1]) {
21
+ suiteFilter = args[i + 1];
22
+ i++;
23
+ } else if (args[i] === "--threshold" && args[i + 1]) {
24
+ scoreThreshold = parseInt(args[i + 1], 10);
25
+ i++;
26
+ }
27
+ }
28
+
29
+ async function main() {
30
+ console.log("🔍 Running eval gates...\n");
31
+
32
+ const result = await runEvals({
33
+ cwd: process.cwd(),
34
+ suiteFilter,
35
+ scoreThreshold,
36
+ });
37
+
38
+ // Print results
39
+ console.log(`📊 Results:`);
40
+ console.log(` Suites: ${result.totalSuites}`);
41
+ console.log(` Evals: ${result.totalEvals}`);
42
+ console.log(` Average Score: ${(result.averageScore * 100).toFixed(1)}%\n`);
43
+
44
+ // Print gate results
45
+ if (result.gateResults && result.gateResults.length > 0) {
46
+ console.log("🚦 Gate Results:");
47
+ for (const gate of result.gateResults) {
48
+ const icon = gate.passed ? "✅" : "❌";
49
+ console.log(` ${icon} ${gate.suite}: ${gate.message}`);
50
+ }
51
+ console.log("");
52
+ }
53
+
54
+ // Check for gate failures
55
+ const failedGates = result.gateResults?.filter(g => !g.passed) || [];
56
+
57
+ if (failedGates.length > 0) {
58
+ console.error(`❌ ${failedGates.length} gate(s) failed!`);
59
+ process.exit(1);
60
+ }
61
+
62
+ if (!result.success) {
63
+ console.error(`❌ Evals failed threshold check`);
64
+ process.exit(1);
65
+ }
66
+
67
+ console.log("✅ All gates passed!");
68
+ process.exit(0);
69
+ }
70
+
71
+ main().catch((err) => {
72
+ console.error("Fatal error:", err);
73
+ process.exit(1);
74
+ });
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Tests for `swarm serve` command
3
+ */
4
+
5
+ import { describe, test, expect } from "bun:test";
6
+ import { spawn } from "bun";
7
+
8
+ describe("swarm serve command", () => {
9
+ test("serve command accepts custom port via --port flag", () => {
10
+ // Verify that CLI parsing works for custom port
11
+ const args = ["serve", "--port", "8080"];
12
+ const port = args.includes("--port")
13
+ ? Number.parseInt(args[args.indexOf("--port") + 1])
14
+ : 3001;
15
+
16
+ expect(port).toBe(8080);
17
+ });
18
+
19
+ test("serve command defaults to port 3001", () => {
20
+ const args = ["serve"];
21
+ const port = args.includes("--port")
22
+ ? Number.parseInt(args[args.indexOf("--port") + 1])
23
+ : 3001;
24
+
25
+ expect(port).toBe(3001);
26
+ });
27
+
28
+ test("serve command uses project path from CWD", () => {
29
+ const projectPath = process.cwd();
30
+ expect(projectPath).toBeDefined();
31
+ expect(typeof projectPath).toBe("string");
32
+ });
33
+
34
+ test("serve command appears in help text", async () => {
35
+ const proc = spawn(["bun", "run", "bin/swarm.ts", "help"], {
36
+ stdout: "pipe",
37
+ stderr: "pipe",
38
+ });
39
+
40
+ const output = await new Response(proc.stdout).text();
41
+
42
+ expect(output).toContain("swarm serve");
43
+ expect(output).toContain("Start SSE server");
44
+ expect(output).toContain("--port");
45
+ });
46
+ });