opencode-swarm-plugin 0.40.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
  2. package/.hive/analysis/session-data-quality-audit.md +320 -0
  3. package/.hive/eval-results.json +481 -24
  4. package/.hive/issues.jsonl +65 -16
  5. package/.hive/memories.jsonl +159 -1
  6. package/.opencode/eval-history.jsonl +315 -0
  7. package/.turbo/turbo-build.log +5 -5
  8. package/CHANGELOG.md +155 -0
  9. package/README.md +2 -0
  10. package/SCORER-ANALYSIS.md +598 -0
  11. package/bin/eval-gate.test.ts +158 -0
  12. package/bin/eval-gate.ts +74 -0
  13. package/bin/swarm.test.ts +661 -732
  14. package/bin/swarm.ts +274 -0
  15. package/dist/compaction-hook.d.ts +7 -5
  16. package/dist/compaction-hook.d.ts.map +1 -1
  17. package/dist/compaction-prompt-scoring.d.ts +1 -0
  18. package/dist/compaction-prompt-scoring.d.ts.map +1 -1
  19. package/dist/eval-runner.d.ts +134 -0
  20. package/dist/eval-runner.d.ts.map +1 -0
  21. package/dist/hive.d.ts.map +1 -1
  22. package/dist/index.d.ts +29 -0
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +99741 -58858
  25. package/dist/memory-tools.d.ts +70 -2
  26. package/dist/memory-tools.d.ts.map +1 -1
  27. package/dist/memory.d.ts +37 -0
  28. package/dist/memory.d.ts.map +1 -1
  29. package/dist/observability-tools.d.ts +64 -0
  30. package/dist/observability-tools.d.ts.map +1 -1
  31. package/dist/plugin.js +99356 -58318
  32. package/dist/swarm-orchestrate.d.ts.map +1 -1
  33. package/dist/swarm-prompts.d.ts +32 -1
  34. package/dist/swarm-prompts.d.ts.map +1 -1
  35. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
  36. package/evals/ARCHITECTURE.md +1189 -0
  37. package/evals/example.eval.ts +3 -4
  38. package/evals/fixtures/compaction-prompt-cases.ts +6 -0
  39. package/evals/scorers/coordinator-discipline.ts +0 -253
  40. package/evals/swarm-decomposition.eval.ts +4 -2
  41. package/package.json +4 -3
  42. package/src/compaction-prompt-scorers.test.ts +10 -9
  43. package/src/compaction-prompt-scoring.ts +7 -5
  44. package/src/eval-runner.test.ts +128 -1
  45. package/src/eval-runner.ts +46 -0
  46. package/src/hive.ts +43 -42
  47. package/src/memory-tools.test.ts +84 -0
  48. package/src/memory-tools.ts +68 -3
  49. package/src/memory.test.ts +2 -112
  50. package/src/memory.ts +88 -49
  51. package/src/observability-tools.test.ts +13 -0
  52. package/src/observability-tools.ts +277 -0
  53. package/src/swarm-orchestrate.test.ts +162 -0
  54. package/src/swarm-orchestrate.ts +7 -5
  55. package/src/swarm-prompts.test.ts +168 -4
  56. package/src/swarm-prompts.ts +228 -7
  57. package/.env +0 -2
  58. package/.turbo/turbo-test.log +0 -481
  59. package/.turbo/turbo-typecheck.log +0 -1
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Tests for eval-gate CLI
4
+ *
5
+ * TDD: Write tests first to verify behavior before implementing.
6
+ */
7
+
8
+ import { describe, test, expect, beforeEach, mock } from "bun:test";
9
+ import type { RunEvalsResult } from "../src/eval-runner.js";
10
+
11
+ // Mock process.exit to prevent test from actually exiting
12
+ const mockExit = mock((code?: number) => {
13
+ throw new Error(`EXIT:${code ?? 0}`);
14
+ });
15
+
16
+ beforeEach(() => {
17
+ mockExit.mockClear();
18
+ });
19
+
20
+ describe("eval-gate CLI", () => {
21
+ test("exits 0 when all gates pass", async () => {
22
+ const mockResult: RunEvalsResult = {
23
+ success: true,
24
+ totalSuites: 2,
25
+ totalEvals: 10,
26
+ averageScore: 0.95,
27
+ suites: [],
28
+ gateResults: [
29
+ {
30
+ suite: "example",
31
+ passed: true,
32
+ phase: "production",
33
+ message: "Passed",
34
+ currentScore: 0.95,
35
+ },
36
+ ],
37
+ };
38
+
39
+ // Simulate main() execution with mocked runEvals
40
+ let exitCode: number | undefined;
41
+ try {
42
+ // Would call main() here if we extract it to a function
43
+ // For now, verify exit logic manually
44
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
45
+ if (failedGates.length > 0 || !mockResult.success) {
46
+ exitCode = 1;
47
+ } else {
48
+ exitCode = 0;
49
+ }
50
+ } catch (e) {
51
+ // Extract exit code from mocked error
52
+ if (e instanceof Error && e.message.startsWith("EXIT:")) {
53
+ exitCode = parseInt(e.message.split(":")[1]);
54
+ }
55
+ }
56
+
57
+ expect(exitCode).toBe(0);
58
+ });
59
+
60
+ test("exits 1 when gates fail", async () => {
61
+ const mockResult: RunEvalsResult = {
62
+ success: false,
63
+ totalSuites: 2,
64
+ totalEvals: 10,
65
+ averageScore: 0.45,
66
+ suites: [],
67
+ gateResults: [
68
+ {
69
+ suite: "coordinator",
70
+ passed: false,
71
+ phase: "production",
72
+ message: "Regression detected",
73
+ currentScore: 0.45,
74
+ baseline: 0.85,
75
+ regressionPercent: -47,
76
+ },
77
+ ],
78
+ };
79
+
80
+ let exitCode: number | undefined;
81
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
82
+ if (failedGates.length > 0 || !mockResult.success) {
83
+ exitCode = 1;
84
+ } else {
85
+ exitCode = 0;
86
+ }
87
+
88
+ expect(exitCode).toBe(1);
89
+ });
90
+
91
+ test("exits 1 when threshold check fails", async () => {
92
+ const mockResult: RunEvalsResult = {
93
+ success: false, // Threshold failed
94
+ totalSuites: 2,
95
+ totalEvals: 10,
96
+ averageScore: 0.65, // Below threshold of 80
97
+ suites: [],
98
+ gateResults: [],
99
+ };
100
+
101
+ let exitCode: number | undefined;
102
+ const failedGates = mockResult.gateResults?.filter((g) => !g.passed) || [];
103
+ if (failedGates.length > 0 || !mockResult.success) {
104
+ exitCode = 1;
105
+ } else {
106
+ exitCode = 0;
107
+ }
108
+
109
+ expect(exitCode).toBe(1);
110
+ });
111
+
112
+ test("parses --suite argument", () => {
113
+ const args = ["--suite", "coordinator"];
114
+ let suiteFilter: string | undefined;
115
+
116
+ for (let i = 0; i < args.length; i++) {
117
+ if (args[i] === "--suite" && args[i + 1]) {
118
+ suiteFilter = args[i + 1];
119
+ i++;
120
+ }
121
+ }
122
+
123
+ expect(suiteFilter).toBe("coordinator");
124
+ });
125
+
126
+ test("parses --threshold argument", () => {
127
+ const args = ["--threshold", "85"];
128
+ let scoreThreshold: number | undefined;
129
+
130
+ for (let i = 0; i < args.length; i++) {
131
+ if (args[i] === "--threshold" && args[i + 1]) {
132
+ scoreThreshold = parseInt(args[i + 1], 10);
133
+ i++;
134
+ }
135
+ }
136
+
137
+ expect(scoreThreshold).toBe(85);
138
+ });
139
+
140
+ test("handles missing arguments gracefully", () => {
141
+ const args: string[] = [];
142
+ let suiteFilter: string | undefined;
143
+ let scoreThreshold: number | undefined;
144
+
145
+ for (let i = 0; i < args.length; i++) {
146
+ if (args[i] === "--suite" && args[i + 1]) {
147
+ suiteFilter = args[i + 1];
148
+ i++;
149
+ } else if (args[i] === "--threshold" && args[i + 1]) {
150
+ scoreThreshold = parseInt(args[i + 1], 10);
151
+ i++;
152
+ }
153
+ }
154
+
155
+ expect(suiteFilter).toBeUndefined();
156
+ expect(scoreThreshold).toBeUndefined();
157
+ });
158
+ });
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Eval Gate CLI - Run evals and fail on regression
4
+ *
5
+ * Usage:
6
+ * bun run bin/eval-gate.ts # Run all evals
7
+ * bun run bin/eval-gate.ts --suite coordinator # Run specific suite
8
+ * bun run bin/eval-gate.ts --threshold 80 # Custom score threshold
9
+ */
10
+
11
+ import { runEvals } from "../src/eval-runner.js";
12
+
13
+ const args = process.argv.slice(2);
14
+
15
+ // Parse args
16
+ let suiteFilter: string | undefined;
17
+ let scoreThreshold: number | undefined;
18
+
19
+ for (let i = 0; i < args.length; i++) {
20
+ if (args[i] === "--suite" && args[i + 1]) {
21
+ suiteFilter = args[i + 1];
22
+ i++;
23
+ } else if (args[i] === "--threshold" && args[i + 1]) {
24
+ scoreThreshold = parseInt(args[i + 1], 10);
25
+ i++;
26
+ }
27
+ }
28
+
29
+ async function main() {
30
+ console.log("🔍 Running eval gates...\n");
31
+
32
+ const result = await runEvals({
33
+ cwd: process.cwd(),
34
+ suiteFilter,
35
+ scoreThreshold,
36
+ });
37
+
38
+ // Print results
39
+ console.log(`📊 Results:`);
40
+ console.log(` Suites: ${result.totalSuites}`);
41
+ console.log(` Evals: ${result.totalEvals}`);
42
+ console.log(` Average Score: ${(result.averageScore * 100).toFixed(1)}%\n`);
43
+
44
+ // Print gate results
45
+ if (result.gateResults && result.gateResults.length > 0) {
46
+ console.log("🚦 Gate Results:");
47
+ for (const gate of result.gateResults) {
48
+ const icon = gate.passed ? "✅" : "❌";
49
+ console.log(` ${icon} ${gate.suite}: ${gate.message}`);
50
+ }
51
+ console.log("");
52
+ }
53
+
54
+ // Check for gate failures
55
+ const failedGates = result.gateResults?.filter(g => !g.passed) || [];
56
+
57
+ if (failedGates.length > 0) {
58
+ console.error(`❌ ${failedGates.length} gate(s) failed!`);
59
+ process.exit(1);
60
+ }
61
+
62
+ if (!result.success) {
63
+ console.error(`❌ Evals failed threshold check`);
64
+ process.exit(1);
65
+ }
66
+
67
+ console.log("✅ All gates passed!");
68
+ process.exit(0);
69
+ }
70
+
71
+ main().catch((err) => {
72
+ console.error("Fatal error:", err);
73
+ process.exit(1);
74
+ });