opencode-swarm-plugin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Evaluation schemas for structured agent output validation
3
+ *
4
+ * These schemas define the expected format for agent self-evaluations
5
+ * and coordinator evaluations of completed work.
6
+ *
7
+ * Includes support for confidence decay - criteria weights fade over time
8
+ * unless revalidated by successful outcomes.
9
+ *
10
+ * @see src/learning.ts for decay calculations
11
+ */
12
+ import { z } from "zod";
13
+
14
+ /**
15
+ * Single criterion evaluation
16
+ *
17
+ * Each criterion (type_safe, no_bugs, etc.) gets its own evaluation.
18
+ */
19
+ export const CriterionEvaluationSchema = z.object({
20
+ passed: z.boolean(),
21
+ feedback: z.string(),
22
+ score: z.number().min(0).max(1).optional(), // 0-1 normalized score
23
+ });
24
+ export type CriterionEvaluation = z.infer<typeof CriterionEvaluationSchema>;
25
+
26
+ /**
27
+ * Weighted criterion evaluation with confidence decay
28
+ *
29
+ * Extends CriterionEvaluation with weight information from learning.
30
+ * Lower weights indicate criteria that have been historically unreliable.
31
+ */
32
+ export const WeightedCriterionEvaluationSchema =
33
+ CriterionEvaluationSchema.extend({
34
+ /** Current weight after decay (0-1, lower = less reliable) */
35
+ weight: z.number().min(0).max(1).default(1),
36
+ /** Weighted score = score * weight */
37
+ weighted_score: z.number().min(0).max(1).optional(),
38
+ /** Whether this criterion is deprecated due to high failure rate */
39
+ deprecated: z.boolean().default(false),
40
+ });
41
+ export type WeightedCriterionEvaluation = z.infer<
42
+ typeof WeightedCriterionEvaluationSchema
43
+ >;
44
+
45
+ /**
46
+ * Full evaluation result
47
+ *
48
+ * Returned by agents after completing a subtask.
49
+ * Used by coordinator to determine if work is acceptable.
50
+ */
51
+ export const EvaluationSchema = z.object({
52
+ passed: z.boolean(),
53
+ criteria: z.record(z.string(), CriterionEvaluationSchema),
54
+ overall_feedback: z.string(),
55
+ retry_suggestion: z.string().nullable(),
56
+ timestamp: z.string().optional(), // ISO-8601
57
+ });
58
+ export type Evaluation = z.infer<typeof EvaluationSchema>;
59
+
60
+ /**
61
+ * Default evaluation criteria
62
+ *
63
+ * These are the standard criteria used when none are specified.
64
+ * Can be overridden per-task or per-project.
65
+ */
66
+ export const DEFAULT_CRITERIA = [
67
+ "type_safe",
68
+ "no_bugs",
69
+ "patterns",
70
+ "readable",
71
+ ] as const;
72
+ export type DefaultCriterion = (typeof DEFAULT_CRITERIA)[number];
73
+
74
+ /**
75
+ * Evaluation request arguments
76
+ */
77
+ export const EvaluationRequestSchema = z.object({
78
+ subtask_id: z.string(),
79
+ criteria: z.array(z.string()).default([...DEFAULT_CRITERIA]),
80
+ context: z.string().optional(),
81
+ });
82
+ export type EvaluationRequest = z.infer<typeof EvaluationRequestSchema>;
83
+
84
+ /**
85
+ * Weighted evaluation result with confidence-adjusted scores
86
+ *
87
+ * Used when applying learned weights to evaluation criteria.
88
+ */
89
+ export const WeightedEvaluationSchema = z.object({
90
+ passed: z.boolean(),
91
+ criteria: z.record(z.string(), WeightedCriterionEvaluationSchema),
92
+ overall_feedback: z.string(),
93
+ retry_suggestion: z.string().nullable(),
94
+ timestamp: z.string().optional(), // ISO-8601
95
+ /** Average weight across all criteria (indicates overall confidence) */
96
+ average_weight: z.number().min(0).max(1).optional(),
97
+ /** Raw score before weighting */
98
+ raw_score: z.number().min(0).max(1).optional(),
99
+ /** Weighted score after applying criterion weights */
100
+ weighted_score: z.number().min(0).max(1).optional(),
101
+ });
102
+ export type WeightedEvaluation = z.infer<typeof WeightedEvaluationSchema>;
103
+
104
+ /**
105
+ * Aggregated evaluation results for a swarm
106
+ */
107
+ export const SwarmEvaluationResultSchema = z.object({
108
+ epic_id: z.string(),
109
+ total: z.number().int().min(0),
110
+ passed: z.number().int().min(0),
111
+ failed: z.number().int().min(0),
112
+ evaluations: z.array(
113
+ z.object({
114
+ bead_id: z.string(),
115
+ evaluation: EvaluationSchema,
116
+ }),
117
+ ),
118
+ overall_passed: z.boolean(),
119
+ retry_needed: z.array(z.string()), // Bead IDs that need retry
120
+ });
121
+ export type SwarmEvaluationResult = z.infer<typeof SwarmEvaluationResultSchema>;
122
+
123
+ /**
124
+ * Validation result with retry info
125
+ */
126
+ export const ValidationResultSchema = z.object({
127
+ success: z.boolean(),
128
+ data: z.unknown().optional(),
129
+ attempts: z.number().int().min(1),
130
+ errors: z.array(z.string()).optional(),
131
+ extractionMethod: z.string().optional(),
132
+ });
133
+ export type ValidationResult = z.infer<typeof ValidationResultSchema>;
@@ -0,0 +1,199 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ BeadSchema,
4
+ BeadTypeSchema,
5
+ BeadCreateArgsSchema,
6
+ EpicCreateArgsSchema,
7
+ EvaluationSchema,
8
+ TaskDecompositionSchema,
9
+ DecomposedSubtaskSchema,
10
+ SwarmStatusSchema,
11
+ ValidationResultSchema,
12
+ } from "./index";
13
+
14
+ describe("BeadSchema", () => {
15
+ it("validates a complete bead", () => {
16
+ const bead = {
17
+ id: "bd-abc123",
18
+ title: "Fix the thing",
19
+ type: "bug",
20
+ status: "open",
21
+ priority: 1,
22
+ created_at: "2025-01-01T00:00:00Z",
23
+ updated_at: "2025-01-01T00:00:00Z",
24
+ };
25
+ expect(() => BeadSchema.parse(bead)).not.toThrow();
26
+ });
27
+
28
+ it("rejects invalid priority", () => {
29
+ const bead = {
30
+ id: "bd-abc123",
31
+ title: "Fix the thing",
32
+ type: "bug",
33
+ status: "open",
34
+ priority: 5, // Invalid: max is 3
35
+ created_at: "2025-01-01T00:00:00Z",
36
+ updated_at: "2025-01-01T00:00:00Z",
37
+ };
38
+ expect(() => BeadSchema.parse(bead)).toThrow();
39
+ });
40
+
41
+ it("accepts all valid types", () => {
42
+ const types = ["bug", "feature", "task", "epic", "chore"];
43
+ for (const type of types) {
44
+ expect(() => BeadTypeSchema.parse(type)).not.toThrow();
45
+ }
46
+ });
47
+ });
48
+
49
+ describe("BeadCreateArgsSchema", () => {
50
+ it("validates minimal create args", () => {
51
+ const args = { title: "New bead" };
52
+ const result = BeadCreateArgsSchema.parse(args);
53
+ expect(result.title).toBe("New bead");
54
+ expect(result.type).toBe("task"); // default
55
+ expect(result.priority).toBe(2); // default
56
+ });
57
+
58
+ it("rejects empty title", () => {
59
+ const args = { title: "" };
60
+ expect(() => BeadCreateArgsSchema.parse(args)).toThrow();
61
+ });
62
+ });
63
+
64
+ describe("EpicCreateArgsSchema", () => {
65
+ it("validates epic with subtasks", () => {
66
+ const args = {
67
+ epic_title: "Big feature",
68
+ subtasks: [
69
+ { title: "Part 1", priority: 2 },
70
+ { title: "Part 2", priority: 3 },
71
+ ],
72
+ };
73
+ expect(() => EpicCreateArgsSchema.parse(args)).not.toThrow();
74
+ });
75
+
76
+ it("requires at least one subtask", () => {
77
+ const args = {
78
+ epic_title: "Big feature",
79
+ subtasks: [],
80
+ };
81
+ expect(() => EpicCreateArgsSchema.parse(args)).toThrow();
82
+ });
83
+ });
84
+
85
+ describe("EvaluationSchema", () => {
86
+ it("validates a passing evaluation", () => {
87
+ const evaluation = {
88
+ passed: true,
89
+ criteria: {
90
+ type_safe: { passed: true, feedback: "All types correct" },
91
+ no_bugs: { passed: true, feedback: "No issues found" },
92
+ },
93
+ overall_feedback: "Good work",
94
+ retry_suggestion: null,
95
+ };
96
+ expect(() => EvaluationSchema.parse(evaluation)).not.toThrow();
97
+ });
98
+
99
+ it("validates a failing evaluation with retry suggestion", () => {
100
+ const evaluation = {
101
+ passed: false,
102
+ criteria: {
103
+ type_safe: { passed: false, feedback: "Missing types on line 42" },
104
+ },
105
+ overall_feedback: "Needs work",
106
+ retry_suggestion: "Add explicit types to the handler function",
107
+ };
108
+ expect(() => EvaluationSchema.parse(evaluation)).not.toThrow();
109
+ });
110
+ });
111
+
112
+ describe("TaskDecompositionSchema", () => {
113
+ it("validates a decomposition", () => {
114
+ const decomposition = {
115
+ task: "Add OAuth authentication",
116
+ reasoning: "Breaking into provider setup and integration",
117
+ subtasks: [
118
+ {
119
+ title: "Add OAuth provider",
120
+ description: "Configure Google OAuth",
121
+ files: ["src/auth/google.ts"],
122
+ estimated_effort: "medium" as const,
123
+ },
124
+ ],
125
+ dependencies: [],
126
+ shared_context: "Using NextAuth.js",
127
+ };
128
+ expect(() => TaskDecompositionSchema.parse(decomposition)).not.toThrow();
129
+ });
130
+
131
+ it("validates subtask effort levels", () => {
132
+ const efforts = ["trivial", "small", "medium", "large"];
133
+ for (const effort of efforts) {
134
+ const subtask = {
135
+ title: "Test",
136
+ description: "Test description",
137
+ files: [],
138
+ estimated_effort: effort,
139
+ };
140
+ expect(() => DecomposedSubtaskSchema.parse(subtask)).not.toThrow();
141
+ }
142
+ });
143
+ });
144
+
145
+ describe("SwarmStatusSchema", () => {
146
+ it("validates swarm status", () => {
147
+ const status = {
148
+ epic_id: "bd-epic123",
149
+ total_agents: 3,
150
+ running: 1,
151
+ completed: 1,
152
+ failed: 0,
153
+ blocked: 1,
154
+ agents: [
155
+ {
156
+ bead_id: "bd-1",
157
+ agent_name: "BlueLake",
158
+ status: "completed" as const,
159
+ files: ["src/a.ts"],
160
+ },
161
+ {
162
+ bead_id: "bd-2",
163
+ agent_name: "RedStone",
164
+ status: "running" as const,
165
+ files: ["src/b.ts"],
166
+ },
167
+ {
168
+ bead_id: "bd-3",
169
+ agent_name: "GreenCastle",
170
+ status: "pending" as const,
171
+ files: ["src/c.ts"],
172
+ },
173
+ ],
174
+ last_update: "2025-01-01T00:00:00Z",
175
+ };
176
+ expect(() => SwarmStatusSchema.parse(status)).not.toThrow();
177
+ });
178
+ });
179
+
180
+ describe("ValidationResultSchema", () => {
181
+ it("validates success result", () => {
182
+ const result = {
183
+ success: true,
184
+ data: { foo: "bar" },
185
+ attempts: 1,
186
+ extractionMethod: "direct",
187
+ };
188
+ expect(() => ValidationResultSchema.parse(result)).not.toThrow();
189
+ });
190
+
191
+ it("validates failure result with errors", () => {
192
+ const result = {
193
+ success: false,
194
+ attempts: 2,
195
+ errors: ["Missing required field: name", "Invalid type for age"],
196
+ };
197
+ expect(() => ValidationResultSchema.parse(result)).not.toThrow();
198
+ });
199
+ });
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Schema exports
3
+ *
4
+ * Re-export all schemas for convenient importing.
5
+ */
6
+
7
+ // Bead schemas
8
+ export {
9
+ BeadStatusSchema,
10
+ BeadTypeSchema,
11
+ BeadDependencySchema,
12
+ BeadSchema,
13
+ BeadCreateArgsSchema,
14
+ BeadUpdateArgsSchema,
15
+ BeadCloseArgsSchema,
16
+ BeadQueryArgsSchema,
17
+ SubtaskSpecSchema,
18
+ BeadTreeSchema,
19
+ EpicCreateArgsSchema,
20
+ EpicCreateResultSchema,
21
+ type BeadStatus,
22
+ type BeadType,
23
+ type BeadDependency,
24
+ type Bead,
25
+ type BeadCreateArgs,
26
+ type BeadUpdateArgs,
27
+ type BeadCloseArgs,
28
+ type BeadQueryArgs,
29
+ type SubtaskSpec,
30
+ type BeadTree,
31
+ type EpicCreateArgs,
32
+ type EpicCreateResult,
33
+ } from "./bead";
34
+
35
+ // Evaluation schemas
36
+ export {
37
+ CriterionEvaluationSchema,
38
+ WeightedCriterionEvaluationSchema,
39
+ EvaluationSchema,
40
+ WeightedEvaluationSchema,
41
+ EvaluationRequestSchema,
42
+ SwarmEvaluationResultSchema,
43
+ ValidationResultSchema,
44
+ DEFAULT_CRITERIA,
45
+ type CriterionEvaluation,
46
+ type WeightedCriterionEvaluation,
47
+ type Evaluation,
48
+ type WeightedEvaluation,
49
+ type EvaluationRequest,
50
+ type SwarmEvaluationResult,
51
+ type ValidationResult,
52
+ type DefaultCriterion,
53
+ } from "./evaluation";
54
+
55
+ // Task schemas
56
+ export {
57
+ EffortLevelSchema,
58
+ DependencyTypeSchema,
59
+ DecomposedSubtaskSchema,
60
+ SubtaskDependencySchema,
61
+ TaskDecompositionSchema,
62
+ DecomposeArgsSchema,
63
+ SpawnedAgentSchema,
64
+ SwarmSpawnResultSchema,
65
+ AgentProgressSchema,
66
+ SwarmStatusSchema,
67
+ type EffortLevel,
68
+ type DependencyType,
69
+ type DecomposedSubtask,
70
+ type SubtaskDependency,
71
+ type TaskDecomposition,
72
+ type DecomposeArgs,
73
+ type SpawnedAgent,
74
+ type SwarmSpawnResult,
75
+ type AgentProgress,
76
+ type SwarmStatus,
77
+ } from "./task";
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Task decomposition schemas
3
+ *
4
+ * These schemas define the structure for breaking down tasks
5
+ * into parallelizable subtasks for swarm execution.
6
+ */
7
+ import { z } from "zod";
8
+
9
+ /**
10
+ * Effort estimation levels
11
+ */
12
+ export const EffortLevelSchema = z.enum([
13
+ "trivial", // < 5 min
14
+ "small", // 5-30 min
15
+ "medium", // 30 min - 2 hours
16
+ "large", // 2+ hours
17
+ ]);
18
+ export type EffortLevel = z.infer<typeof EffortLevelSchema>;
19
+
20
+ /**
21
+ * Dependency type between subtasks
22
+ */
23
+ export const DependencyTypeSchema = z.enum([
24
+ "blocks", // Must complete before dependent can start
25
+ "requires", // Needs output from another task
26
+ "related", // Informational relationship
27
+ ]);
28
+ export type DependencyType = z.infer<typeof DependencyTypeSchema>;
29
+
30
+ /**
31
+ * Subtask in a decomposition
32
+ */
33
+ export const DecomposedSubtaskSchema = z.object({
34
+ title: z.string().min(1),
35
+ description: z.string(),
36
+ files: z.array(z.string()), // File paths this subtask will modify
37
+ estimated_effort: EffortLevelSchema,
38
+ risks: z.array(z.string()).optional().default([]),
39
+ });
40
+ export type DecomposedSubtask = z.infer<typeof DecomposedSubtaskSchema>;
41
+
42
+ /**
43
+ * Dependency between subtasks
44
+ */
45
+ export const SubtaskDependencySchema = z.object({
46
+ from: z.number().int().min(0), // Subtask index
47
+ to: z.number().int().min(0), // Subtask index
48
+ type: DependencyTypeSchema,
49
+ });
50
+ export type SubtaskDependency = z.infer<typeof SubtaskDependencySchema>;
51
+
52
+ /**
53
+ * Full task decomposition result
54
+ *
55
+ * Returned by the decomposition agent, validated before spawning.
56
+ */
57
+ export const TaskDecompositionSchema = z.object({
58
+ task: z.string(), // Original task description
59
+ reasoning: z.string().optional(), // Why this decomposition
60
+ subtasks: z.array(DecomposedSubtaskSchema).min(1).max(10),
61
+ dependencies: z.array(SubtaskDependencySchema).optional().default([]),
62
+ shared_context: z.string().optional(), // Context to pass to all agents
63
+ });
64
+ export type TaskDecomposition = z.infer<typeof TaskDecompositionSchema>;
65
+
66
+ /**
67
+ * Arguments for task decomposition
68
+ */
69
+ export const DecomposeArgsSchema = z.object({
70
+ task: z.string().min(1),
71
+ max_subtasks: z.number().int().min(1).max(10).default(5),
72
+ context: z.string().optional(),
73
+ });
74
+ export type DecomposeArgs = z.infer<typeof DecomposeArgsSchema>;
75
+
76
+ /**
77
+ * Spawn result for a single agent
78
+ */
79
+ export const SpawnedAgentSchema = z.object({
80
+ bead_id: z.string(),
81
+ agent_name: z.string(), // Agent Mail name (e.g., "BlueLake")
82
+ task_id: z.string().optional(), // OpenCode task ID
83
+ status: z.enum(["pending", "running", "completed", "failed"]),
84
+ files: z.array(z.string()), // Reserved files
85
+ reservation_ids: z.array(z.number()).optional(), // Agent Mail reservation IDs
86
+ });
87
+ export type SpawnedAgent = z.infer<typeof SpawnedAgentSchema>;
88
+
89
+ /**
90
+ * Result of spawning a swarm
91
+ */
92
+ export const SwarmSpawnResultSchema = z.object({
93
+ epic_id: z.string(),
94
+ coordinator_name: z.string(), // Agent Mail name of coordinator
95
+ thread_id: z.string(), // Agent Mail thread for this swarm
96
+ agents: z.array(SpawnedAgentSchema),
97
+ started_at: z.string(), // ISO-8601
98
+ });
99
+ export type SwarmSpawnResult = z.infer<typeof SwarmSpawnResultSchema>;
100
+
101
+ /**
102
+ * Progress update from an agent
103
+ */
104
+ export const AgentProgressSchema = z.object({
105
+ bead_id: z.string(),
106
+ agent_name: z.string(),
107
+ status: z.enum(["in_progress", "blocked", "completed", "failed"]),
108
+ progress_percent: z.number().min(0).max(100).optional(),
109
+ message: z.string().optional(),
110
+ files_touched: z.array(z.string()).optional(),
111
+ blockers: z.array(z.string()).optional(),
112
+ timestamp: z.string(), // ISO-8601
113
+ });
114
+ export type AgentProgress = z.infer<typeof AgentProgressSchema>;
115
+
116
+ /**
117
+ * Swarm status summary
118
+ */
119
+ export const SwarmStatusSchema = z.object({
120
+ epic_id: z.string(),
121
+ total_agents: z.number().int().min(0),
122
+ running: z.number().int().min(0),
123
+ completed: z.number().int().min(0),
124
+ failed: z.number().int().min(0),
125
+ blocked: z.number().int().min(0),
126
+ agents: z.array(SpawnedAgentSchema),
127
+ last_update: z.string(), // ISO-8601
128
+ });
129
+ export type SwarmStatus = z.infer<typeof SwarmStatusSchema>;