opencode-swarm-plugin 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Swarm Decomposition Quality Eval
3
+ *
4
+ * Tests the quality of task decomposition for swarm coordination.
5
+ * Uses real LLM calls via AI SDK + Vercel AI Gateway.
6
+ *
7
+ * Scorers evaluate:
8
+ * - Subtask independence (no file conflicts)
9
+ * - Complexity balance (even distribution)
10
+ * - Coverage completeness (all required files)
11
+ * - Instruction clarity (actionable descriptions)
12
+ *
13
+ * Run with: pnpm evalite evals/swarm-decomposition.eval.ts
14
+ *
15
+ * Requires: ANTHROPIC_API_KEY environment variable
16
+ */
17
+ import { evalite } from "evalite";
18
+ import {
19
+ subtaskIndependence,
20
+ coverageCompleteness,
21
+ instructionClarity,
22
+ } from "./scorers/index.js";
23
+ import { decompositionCases } from "./fixtures/decomposition-cases.js";
24
+ import {
25
+ generateDecomposition,
26
+ formatDecompositionPrompt,
27
+ extractJson,
28
+ } from "./lib/llm.js";
29
+ import {
30
+ loadEvalCases,
31
+ hasRealEvalData,
32
+ getEvalDataSummary,
33
+ } from "./lib/data-loader.js";
34
+
35
+ // Determine project key from current directory
36
+ const PROJECT_KEY = "opencode-swarm-plugin";
37
+ const PROJECT_PATH = process.cwd();
38
+
39
+ // Check if we have enough real data to use instead of fixtures
40
+ const useRealData = await hasRealEvalData(PROJECT_KEY, 5, PROJECT_PATH);
41
+
42
+ // Load data based on availability
43
+ const evalCases = useRealData
44
+ ? await loadEvalCases(PROJECT_KEY, { limit: 20, projectPath: PROJECT_PATH })
45
+ : decompositionCases.map((testCase) => ({
46
+ input: testCase.input,
47
+ expected: testCase.expected,
48
+ }));
49
+
50
+ // Log data source for transparency
51
+ if (useRealData) {
52
+ const summary = await getEvalDataSummary(PROJECT_KEY, PROJECT_PATH);
53
+ console.log(`[eval] Using real data from PGlite:`);
54
+ console.log(` - Total records: ${summary.totalRecords}`);
55
+ console.log(` - Success rate: ${(summary.successRate * 100).toFixed(1)}%`);
56
+ console.log(
57
+ ` - Strategies: ${Object.entries(summary.byStrategy)
58
+ .map(([s, c]) => `${s}(${c})`)
59
+ .join(", ")}`,
60
+ );
61
+ console.log(` - Eval cases: ${evalCases.length}`);
62
+ } else {
63
+ console.log(
64
+ `[eval] Using fixture data (${evalCases.length} cases) - not enough real data yet`,
65
+ );
66
+ }
67
+
68
+ /**
69
+ * Swarm Decomposition Quality Eval
70
+ *
71
+ * Tests decomposition quality with real LLM calls.
72
+ */
73
+ evalite("Swarm Decomposition Quality", {
74
+ // Test data from PGlite or fixtures
75
+ data: async () => evalCases,
76
+
77
+ // Task: generate real decomposition via Claude
78
+ task: async (input) => {
79
+ const prompt = formatDecompositionPrompt(input.task, input.context);
80
+ const response = await generateDecomposition(prompt);
81
+ return extractJson(response);
82
+ },
83
+
84
+ // Scorers evaluate decomposition quality
85
+ scorers: [subtaskIndependence, coverageCompleteness, instructionClarity],
86
+ });
87
+
88
+ /**
89
+ * Edge Case Eval: Minimal and Complex Tasks
90
+ *
91
+ * Tests handling of edge cases in decomposition.
92
+ */
93
+ evalite("Decomposition Edge Cases", {
94
+ data: async () => [
95
+ {
96
+ input: { task: "Fix typo in README.md" },
97
+ expected: { minSubtasks: 1, maxSubtasks: 2 },
98
+ },
99
+ {
100
+ input: { task: "Refactor entire codebase from JavaScript to TypeScript" },
101
+ expected: { minSubtasks: 4, maxSubtasks: 8 },
102
+ },
103
+ ],
104
+
105
+ task: async (input) => {
106
+ const prompt = formatDecompositionPrompt(input.task, undefined, 8);
107
+ const response = await generateDecomposition(prompt);
108
+ return extractJson(response);
109
+ },
110
+
111
+ scorers: [subtaskIndependence, coverageCompleteness],
112
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-swarm-plugin",
3
- "version": "0.19.0",
3
+ "version": "0.21.0",
4
4
  "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -27,6 +27,9 @@
27
27
  "test:all": "bun run test && bun run test:swarm",
28
28
  "typecheck": "tsc --noEmit",
29
29
  "clean": "rm -rf dist",
30
+ "eval:dev": "evalite watch evals/",
31
+ "eval:run": "evalite run evals/",
32
+ "eval:ci": "evalite run evals/ --threshold 80",
30
33
  "release": "npm run build && npm version patch && git push && npm run publish:otp",
31
34
  "release:minor": "npm run build && npm version minor && git push && npm run publish:otp",
32
35
  "release:major": "npm run build && npm version major && git push && npm run publish:otp",
@@ -41,11 +44,15 @@
41
44
  "gray-matter": "^4.0.3",
42
45
  "ioredis": "^5.4.1",
43
46
  "minimatch": "^10.1.1",
47
+ "nanoid": "^5.1.6",
44
48
  "zod": "4.1.8"
45
49
  },
46
50
  "devDependencies": {
47
51
  "@types/bun": "latest",
48
52
  "@types/minimatch": "^6.0.0",
53
+ "ai": "6.0.0-beta.150",
54
+ "bun-types": "^1.3.4",
55
+ "evalite": "^1.0.0-beta.10",
49
56
  "typescript": "^5.7.0",
50
57
  "vitest": "^4.0.15"
51
58
  },
package/src/agent-mail.ts CHANGED
@@ -31,6 +31,7 @@ import { tool } from "@opencode-ai/plugin";
31
31
  import { z } from "zod";
32
32
  import { isToolAvailable, warnMissingTool } from "./tool-availability";
33
33
  import { getRateLimiter, type RateLimiter } from "./rate-limiter";
34
+ import type { MailSessionState } from "./streams/events";
34
35
 
35
36
  // ============================================================================
36
37
  // Configuration
@@ -92,13 +93,12 @@ const RECOVERY_CONFIG = {
92
93
  // Types
93
94
  // ============================================================================
94
95
 
95
- /** Agent Mail session state */
96
- export interface AgentMailState {
97
- projectKey: string;
98
- agentName: string;
99
- reservations: number[];
100
- startedAt: string;
101
- }
96
+ /**
97
+ * Agent Mail session state
98
+ * @deprecated Use MailSessionState from streams/events.ts instead
99
+ * This is kept for backward compatibility and re-exported as an alias
100
+ */
101
+ export type AgentMailState = MailSessionState;
102
102
 
103
103
  // ============================================================================
104
104
  // Module-level state (keyed by sessionID)
package/src/beads.ts CHANGED
@@ -104,6 +104,8 @@ import {
104
104
  type BeadCreateArgs,
105
105
  type EpicCreateResult,
106
106
  } from "./schemas";
107
+ import { createEvent } from "./streams/events";
108
+ import { appendEvent } from "./streams/store";
107
109
 
108
110
  /**
109
111
  * Custom error for bead operations
@@ -321,6 +323,26 @@ export const beads_create_epic = tool({
321
323
  }),
322
324
  )
323
325
  .describe("Subtasks to create under the epic"),
326
+ strategy: tool.schema
327
+ .enum(["file-based", "feature-based", "risk-based"])
328
+ .optional()
329
+ .describe("Decomposition strategy used (default: feature-based)"),
330
+ task: tool.schema
331
+ .string()
332
+ .optional()
333
+ .describe("Original task description that was decomposed"),
334
+ project_key: tool.schema
335
+ .string()
336
+ .optional()
337
+ .describe("Project path for event emission"),
338
+ recovery_context: tool.schema
339
+ .object({
340
+ shared_context: tool.schema.string().optional(),
341
+ skills_to_load: tool.schema.array(tool.schema.string()).optional(),
342
+ coordinator_notes: tool.schema.string().optional(),
343
+ })
344
+ .optional()
345
+ .describe("Recovery context from checkpoint compaction"),
324
346
  },
325
347
  async execute(args, ctx) {
326
348
  const validated = EpicCreateArgsSchema.parse(args);
@@ -386,6 +408,33 @@ export const beads_create_epic = tool({
386
408
  subtasks: created.slice(1),
387
409
  };
388
410
 
411
+ // Emit DecompositionGeneratedEvent for learning system
412
+ if (args.project_key) {
413
+ try {
414
+ const event = createEvent("decomposition_generated", {
415
+ project_key: args.project_key,
416
+ epic_id: epic.id,
417
+ task: args.task || validated.epic_title,
418
+ context: validated.epic_description,
419
+ strategy: args.strategy || "feature-based",
420
+ epic_title: validated.epic_title,
421
+ subtasks: validated.subtasks.map((st) => ({
422
+ title: st.title,
423
+ files: st.files || [],
424
+ priority: st.priority,
425
+ })),
426
+ recovery_context: args.recovery_context,
427
+ });
428
+ await appendEvent(event, args.project_key);
429
+ } catch (error) {
430
+ // Non-fatal - log and continue
431
+ console.warn(
432
+ "[beads_create_epic] Failed to emit DecompositionGeneratedEvent:",
433
+ error,
434
+ );
435
+ }
436
+ }
437
+
389
438
  return JSON.stringify(result, null, 2);
390
439
  } catch (error) {
391
440
  // Partial failure - execute rollback automatically