opencode-swarm-plugin 0.43.0 → 0.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/bin/cass.characterization.test.ts +422 -0
  2. package/bin/swarm.serve.test.ts +6 -4
  3. package/bin/swarm.test.ts +68 -0
  4. package/bin/swarm.ts +81 -8
  5. package/dist/compaction-prompt-scoring.js +139 -0
  6. package/dist/contributor-tools.d.ts +42 -0
  7. package/dist/contributor-tools.d.ts.map +1 -0
  8. package/dist/eval-capture.js +12811 -0
  9. package/dist/hive.d.ts.map +1 -1
  10. package/dist/index.d.ts +12 -0
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +7728 -62590
  13. package/dist/plugin.js +23833 -78695
  14. package/dist/sessions/agent-discovery.d.ts +59 -0
  15. package/dist/sessions/agent-discovery.d.ts.map +1 -0
  16. package/dist/sessions/index.d.ts +10 -0
  17. package/dist/sessions/index.d.ts.map +1 -0
  18. package/dist/swarm-orchestrate.d.ts.map +1 -1
  19. package/dist/swarm-prompts.d.ts.map +1 -1
  20. package/dist/swarm-review.d.ts.map +1 -1
  21. package/package.json +17 -5
  22. package/.changeset/swarm-insights-data-layer.md +0 -63
  23. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
  24. package/.hive/analysis/session-data-quality-audit.md +0 -320
  25. package/.hive/eval-results.json +0 -483
  26. package/.hive/issues.jsonl +0 -138
  27. package/.hive/memories.jsonl +0 -729
  28. package/.opencode/eval-history.jsonl +0 -327
  29. package/.turbo/turbo-build.log +0 -9
  30. package/CHANGELOG.md +0 -2255
  31. package/SCORER-ANALYSIS.md +0 -598
  32. package/docs/analysis/subagent-coordination-patterns.md +0 -902
  33. package/docs/analysis-socratic-planner-pattern.md +0 -504
  34. package/docs/planning/ADR-001-monorepo-structure.md +0 -171
  35. package/docs/planning/ADR-002-package-extraction.md +0 -393
  36. package/docs/planning/ADR-003-performance-improvements.md +0 -451
  37. package/docs/planning/ADR-004-message-queue-features.md +0 -187
  38. package/docs/planning/ADR-005-devtools-observability.md +0 -202
  39. package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
  40. package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
  41. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
  42. package/docs/planning/ROADMAP.md +0 -368
  43. package/docs/semantic-memory-cli-syntax.md +0 -123
  44. package/docs/swarm-mail-architecture.md +0 -1147
  45. package/docs/testing/context-recovery-test.md +0 -470
  46. package/evals/ARCHITECTURE.md +0 -1189
  47. package/evals/README.md +0 -768
  48. package/evals/compaction-prompt.eval.ts +0 -149
  49. package/evals/compaction-resumption.eval.ts +0 -289
  50. package/evals/coordinator-behavior.eval.ts +0 -307
  51. package/evals/coordinator-session.eval.ts +0 -154
  52. package/evals/evalite.config.ts.bak +0 -15
  53. package/evals/example.eval.ts +0 -31
  54. package/evals/fixtures/compaction-cases.ts +0 -350
  55. package/evals/fixtures/compaction-prompt-cases.ts +0 -311
  56. package/evals/fixtures/coordinator-sessions.ts +0 -328
  57. package/evals/fixtures/decomposition-cases.ts +0 -105
  58. package/evals/lib/compaction-loader.test.ts +0 -248
  59. package/evals/lib/compaction-loader.ts +0 -320
  60. package/evals/lib/data-loader.evalite-test.ts +0 -289
  61. package/evals/lib/data-loader.test.ts +0 -345
  62. package/evals/lib/data-loader.ts +0 -281
  63. package/evals/lib/llm.ts +0 -115
  64. package/evals/scorers/compaction-prompt-scorers.ts +0 -145
  65. package/evals/scorers/compaction-scorers.ts +0 -305
  66. package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
  67. package/evals/scorers/coordinator-discipline.ts +0 -325
  68. package/evals/scorers/index.test.ts +0 -146
  69. package/evals/scorers/index.ts +0 -328
  70. package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
  71. package/evals/scorers/outcome-scorers.ts +0 -349
  72. package/evals/swarm-decomposition.eval.ts +0 -121
  73. package/examples/commands/swarm.md +0 -745
  74. package/examples/plugin-wrapper-template.ts +0 -2426
  75. package/examples/skills/hive-workflow/SKILL.md +0 -212
  76. package/examples/skills/skill-creator/SKILL.md +0 -223
  77. package/examples/skills/swarm-coordination/SKILL.md +0 -292
  78. package/global-skills/cli-builder/SKILL.md +0 -344
  79. package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
  80. package/global-skills/learning-systems/SKILL.md +0 -644
  81. package/global-skills/skill-creator/LICENSE.txt +0 -202
  82. package/global-skills/skill-creator/SKILL.md +0 -352
  83. package/global-skills/skill-creator/references/output-patterns.md +0 -82
  84. package/global-skills/skill-creator/references/workflows.md +0 -28
  85. package/global-skills/swarm-coordination/SKILL.md +0 -995
  86. package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
  87. package/global-skills/swarm-coordination/references/strategies.md +0 -138
  88. package/global-skills/system-design/SKILL.md +0 -213
  89. package/global-skills/testing-patterns/SKILL.md +0 -430
  90. package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
  91. package/opencode-swarm-plugin-0.30.7.tgz +0 -0
  92. package/opencode-swarm-plugin-0.31.0.tgz +0 -0
  93. package/scripts/cleanup-test-memories.ts +0 -346
  94. package/scripts/init-skill.ts +0 -222
  95. package/scripts/migrate-unknown-sessions.ts +0 -349
  96. package/scripts/validate-skill.ts +0 -204
  97. package/src/agent-mail.ts +0 -1724
  98. package/src/anti-patterns.test.ts +0 -1167
  99. package/src/anti-patterns.ts +0 -448
  100. package/src/compaction-capture.integration.test.ts +0 -257
  101. package/src/compaction-hook.test.ts +0 -838
  102. package/src/compaction-hook.ts +0 -1204
  103. package/src/compaction-observability.integration.test.ts +0 -139
  104. package/src/compaction-observability.test.ts +0 -187
  105. package/src/compaction-observability.ts +0 -324
  106. package/src/compaction-prompt-scorers.test.ts +0 -475
  107. package/src/compaction-prompt-scoring.ts +0 -300
  108. package/src/dashboard.test.ts +0 -611
  109. package/src/dashboard.ts +0 -462
  110. package/src/error-enrichment.test.ts +0 -403
  111. package/src/error-enrichment.ts +0 -219
  112. package/src/eval-capture.test.ts +0 -1015
  113. package/src/eval-capture.ts +0 -929
  114. package/src/eval-gates.test.ts +0 -306
  115. package/src/eval-gates.ts +0 -218
  116. package/src/eval-history.test.ts +0 -508
  117. package/src/eval-history.ts +0 -214
  118. package/src/eval-learning.test.ts +0 -378
  119. package/src/eval-learning.ts +0 -360
  120. package/src/eval-runner.test.ts +0 -223
  121. package/src/eval-runner.ts +0 -402
  122. package/src/export-tools.test.ts +0 -476
  123. package/src/export-tools.ts +0 -257
  124. package/src/hive.integration.test.ts +0 -2241
  125. package/src/hive.ts +0 -1628
  126. package/src/index.ts +0 -935
  127. package/src/learning.integration.test.ts +0 -1815
  128. package/src/learning.ts +0 -1079
  129. package/src/logger.test.ts +0 -189
  130. package/src/logger.ts +0 -135
  131. package/src/mandate-promotion.test.ts +0 -473
  132. package/src/mandate-promotion.ts +0 -239
  133. package/src/mandate-storage.integration.test.ts +0 -601
  134. package/src/mandate-storage.test.ts +0 -578
  135. package/src/mandate-storage.ts +0 -794
  136. package/src/mandates.ts +0 -540
  137. package/src/memory-tools.test.ts +0 -195
  138. package/src/memory-tools.ts +0 -344
  139. package/src/memory.integration.test.ts +0 -334
  140. package/src/memory.test.ts +0 -158
  141. package/src/memory.ts +0 -527
  142. package/src/model-selection.test.ts +0 -188
  143. package/src/model-selection.ts +0 -68
  144. package/src/observability-tools.test.ts +0 -359
  145. package/src/observability-tools.ts +0 -871
  146. package/src/output-guardrails.test.ts +0 -438
  147. package/src/output-guardrails.ts +0 -381
  148. package/src/pattern-maturity.test.ts +0 -1160
  149. package/src/pattern-maturity.ts +0 -525
  150. package/src/planning-guardrails.test.ts +0 -491
  151. package/src/planning-guardrails.ts +0 -438
  152. package/src/plugin.ts +0 -23
  153. package/src/post-compaction-tracker.test.ts +0 -251
  154. package/src/post-compaction-tracker.ts +0 -237
  155. package/src/query-tools.test.ts +0 -636
  156. package/src/query-tools.ts +0 -324
  157. package/src/rate-limiter.integration.test.ts +0 -466
  158. package/src/rate-limiter.ts +0 -774
  159. package/src/replay-tools.test.ts +0 -496
  160. package/src/replay-tools.ts +0 -240
  161. package/src/repo-crawl.integration.test.ts +0 -441
  162. package/src/repo-crawl.ts +0 -610
  163. package/src/schemas/cell-events.test.ts +0 -347
  164. package/src/schemas/cell-events.ts +0 -807
  165. package/src/schemas/cell.ts +0 -257
  166. package/src/schemas/evaluation.ts +0 -166
  167. package/src/schemas/index.test.ts +0 -199
  168. package/src/schemas/index.ts +0 -286
  169. package/src/schemas/mandate.ts +0 -232
  170. package/src/schemas/swarm-context.ts +0 -115
  171. package/src/schemas/task.ts +0 -161
  172. package/src/schemas/worker-handoff.test.ts +0 -302
  173. package/src/schemas/worker-handoff.ts +0 -131
  174. package/src/skills.integration.test.ts +0 -1192
  175. package/src/skills.test.ts +0 -643
  176. package/src/skills.ts +0 -1549
  177. package/src/storage.integration.test.ts +0 -341
  178. package/src/storage.ts +0 -884
  179. package/src/structured.integration.test.ts +0 -817
  180. package/src/structured.test.ts +0 -1046
  181. package/src/structured.ts +0 -762
  182. package/src/swarm-decompose.test.ts +0 -188
  183. package/src/swarm-decompose.ts +0 -1302
  184. package/src/swarm-deferred.integration.test.ts +0 -157
  185. package/src/swarm-deferred.test.ts +0 -38
  186. package/src/swarm-insights.test.ts +0 -214
  187. package/src/swarm-insights.ts +0 -459
  188. package/src/swarm-mail.integration.test.ts +0 -970
  189. package/src/swarm-mail.ts +0 -739
  190. package/src/swarm-orchestrate.integration.test.ts +0 -282
  191. package/src/swarm-orchestrate.test.ts +0 -548
  192. package/src/swarm-orchestrate.ts +0 -3084
  193. package/src/swarm-prompts.test.ts +0 -1270
  194. package/src/swarm-prompts.ts +0 -2077
  195. package/src/swarm-research.integration.test.ts +0 -701
  196. package/src/swarm-research.test.ts +0 -698
  197. package/src/swarm-research.ts +0 -472
  198. package/src/swarm-review.integration.test.ts +0 -285
  199. package/src/swarm-review.test.ts +0 -879
  200. package/src/swarm-review.ts +0 -709
  201. package/src/swarm-strategies.ts +0 -407
  202. package/src/swarm-worktree.test.ts +0 -501
  203. package/src/swarm-worktree.ts +0 -575
  204. package/src/swarm.integration.test.ts +0 -2377
  205. package/src/swarm.ts +0 -38
  206. package/src/tool-adapter.integration.test.ts +0 -1221
  207. package/src/tool-availability.ts +0 -461
  208. package/tsconfig.json +0 -28
@@ -1,257 +0,0 @@
1
- /**
2
- * Cell schemas for type-safe cell operations
3
- *
4
- * These schemas validate all data from the `bd` CLI to ensure
5
- * type safety and catch malformed responses early.
6
- *
7
- * Cells are work items in the Hive (honeycomb metaphor).
8
- * Backward compatibility: Bead* aliases provided for gradual migration.
9
- */
10
- import { z } from "zod";
11
-
12
- /** Valid cell statuses */
13
- export const CellStatusSchema = z.enum([
14
- "open",
15
- "in_progress",
16
- "blocked",
17
- "closed",
18
- ]);
19
- export type CellStatus = z.infer<typeof CellStatusSchema>;
20
-
21
- /** Valid cell types */
22
- export const CellTypeSchema = z.enum([
23
- "bug",
24
- "feature",
25
- "task",
26
- "epic",
27
- "chore",
28
- ]);
29
- export type CellType = z.infer<typeof CellTypeSchema>;
30
-
31
- /** Dependency relationship between cells */
32
- export const CellDependencySchema = z.object({
33
- id: z.string(),
34
- type: z.enum(["blocks", "blocked-by", "related", "discovered-from"]),
35
- });
36
- export type CellDependency = z.infer<typeof CellDependencySchema>;
37
-
38
- /**
39
- * Core cell schema - validates bd CLI JSON output
40
- *
41
- * ID format:
42
- * - Standard: `{project}-{hash}` (e.g., `opencode-swarm-plugin-1i8`)
43
- * - Subtask: `{project}-{hash}.{index}` (e.g., `opencode-swarm-plugin-1i8.1`)
44
- * - Custom: `{project}-{custom-id}` (e.g., `migrate-egghead-phase-0`)
45
- * - Custom subtask: `{project}-{custom-id}.{suffix}` (e.g., `migrate-egghead-phase-0.e2e-test`)
46
- */
47
- export const CellSchema = z.object({
48
- /**
49
- * Cell ID format: project-slug-hash with optional subtask index.
50
- *
51
- * Pattern: `project-name-xxxxx` or `project-name-xxxxx.N`
52
- * Examples:
53
- * - `my-project-abc12` (main cell)
54
- * - `my-project-abc12.1` (first subtask)
55
- * - `my-project-abc12.2` (second subtask)
56
- */
57
- id: z
58
- .string()
59
- .regex(
60
- /^[a-z0-9]+(-[a-z0-9]+)+(\.[\w-]+)?$/,
61
- "Invalid cell ID format (expected: project-slug-hash or project-slug-hash.N)",
62
- ),
63
- title: z.string().min(1, "Title required"),
64
- description: z.string().optional().default(""),
65
- status: CellStatusSchema.default("open"),
66
- priority: z.number().int().min(0).max(3).default(2),
67
- issue_type: CellTypeSchema.default("task"),
68
- created_at: z.string().datetime({
69
- offset: true,
70
- message:
71
- "Must be ISO-8601 datetime with timezone (e.g., 2024-01-15T10:30:00Z)",
72
- }),
73
- updated_at: z
74
- .string()
75
- .datetime({
76
- offset: true,
77
- message:
78
- "Must be ISO-8601 datetime with timezone (e.g., 2024-01-15T10:30:00Z)",
79
- })
80
- .optional(),
81
- closed_at: z.string().datetime({ offset: true }).optional(),
82
- parent_id: z.string().optional(),
83
- dependencies: z.array(CellDependencySchema).default([]),
84
- metadata: z.record(z.string(), z.unknown()).optional(),
85
- });
86
- export type Cell = z.infer<typeof CellSchema>;
87
-
88
- /** Arguments for creating a cell */
89
- export const CellCreateArgsSchema = z.object({
90
- title: z.string().min(1, "Title required"),
91
- type: CellTypeSchema.default("task"),
92
- priority: z.number().int().min(0).max(3).default(2),
93
- description: z.string().optional(),
94
- parent_id: z.string().optional(),
95
- /**
96
- * Custom ID for human-readable cell names.
97
- * MUST include project prefix (e.g., 'migrate-egghead-phase-0', not just 'phase-0').
98
- * For subtasks, use dot notation: 'migrate-egghead-phase-0.e2e-test'
99
- */
100
- id: z.string().optional(),
101
- });
102
- export type CellCreateArgs = z.infer<typeof CellCreateArgsSchema>;
103
-
104
- /** Arguments for updating a cell */
105
- export const CellUpdateArgsSchema = z.object({
106
- id: z.string(),
107
- status: CellStatusSchema.optional(),
108
- description: z.string().optional(),
109
- priority: z.number().int().min(0).max(3).optional(),
110
- });
111
- export type CellUpdateArgs = z.infer<typeof CellUpdateArgsSchema>;
112
-
113
- /** Arguments for closing a cell */
114
- export const CellCloseArgsSchema = z.object({
115
- id: z.string(),
116
- reason: z.string().min(1, "Reason required"),
117
- });
118
- export type CellCloseArgs = z.infer<typeof CellCloseArgsSchema>;
119
-
120
- /** Arguments for querying cells */
121
- export const CellQueryArgsSchema = z.object({
122
- status: CellStatusSchema.optional(),
123
- type: CellTypeSchema.optional(),
124
- ready: z.boolean().optional(),
125
- parent_id: z.string().optional(),
126
- limit: z.number().int().positive().default(20),
127
- });
128
- export type CellQueryArgs = z.infer<typeof CellQueryArgsSchema>;
129
-
130
- /**
131
- * Subtask specification for epic decomposition
132
- *
133
- * Used when creating an epic with subtasks in one operation.
134
- * The `files` array is used for Agent Mail file reservations.
135
- */
136
- export const SubtaskSpecSchema = z.object({
137
- title: z.string().min(1),
138
- description: z.string().optional().default(""),
139
- files: z.array(z.string()).default([]),
140
- dependencies: z.array(z.number().int().min(0)).default([]), // Indices of other subtasks
141
- /**
142
- * Complexity estimate on 1-5 scale:
143
- * 1 = trivial (typo fix, simple rename)
144
- * 2 = simple (single function change)
145
- * 3 = moderate (multi-file, some coordination)
146
- * 4 = complex (significant refactoring)
147
- * 5 = very complex (architectural change)
148
- */
149
- estimated_complexity: z.number().int().min(1).max(5).default(3),
150
- });
151
- export type SubtaskSpec = z.infer<typeof SubtaskSpecSchema>;
152
-
153
- /**
154
- * Cell tree for swarm decomposition
155
- *
156
- * Represents an epic with its subtasks, ready for spawning agents.
157
- */
158
- export const CellTreeSchema = z.object({
159
- epic: z.object({
160
- title: z.string().min(1),
161
- description: z.string().optional().default(""),
162
- }),
163
- subtasks: z.array(SubtaskSpecSchema).min(1),
164
- });
165
- export type CellTree = z.infer<typeof CellTreeSchema>;
166
-
167
- /** Arguments for creating an epic with subtasks */
168
- export const EpicCreateArgsSchema = z.object({
169
- epic_title: z.string().min(1),
170
- epic_description: z.string().optional(),
171
- /**
172
- * Custom ID for the epic. MUST include project prefix.
173
- * Example: 'migrate-egghead-phase-0' (not just 'phase-0')
174
- * If not provided, bd generates a random ID.
175
- */
176
- epic_id: z.string().optional(),
177
- subtasks: z
178
- .array(
179
- z.object({
180
- title: z.string().min(1),
181
- priority: z.number().int().min(0).max(3).default(2),
182
- files: z.array(z.string()).optional().default([]),
183
- /**
184
- * Custom ID suffix for subtask. Combined with epic_id using dot notation.
185
- * Example: epic_id='migrate-egghead-phase-0', id_suffix='e2e-test'
186
- * → subtask ID: 'migrate-egghead-phase-0.e2e-test'
187
- */
188
- id_suffix: z.string().optional(),
189
- }),
190
- )
191
- .min(1),
192
- });
193
- export type EpicCreateArgs = z.infer<typeof EpicCreateArgsSchema>;
194
-
195
- /**
196
- * Result of epic creation
197
- *
198
- * Contains the created epic and all subtasks with their IDs.
199
- */
200
- export const EpicCreateResultSchema = z.object({
201
- success: z.boolean(),
202
- epic: CellSchema,
203
- subtasks: z.array(CellSchema),
204
- rollback_hint: z.string().optional(),
205
- });
206
- export type EpicCreateResult = z.infer<typeof EpicCreateResultSchema>;
207
-
208
- // ============================================================================
209
- // BACKWARD COMPATIBILITY ALIASES
210
- // These aliases maintain compatibility with existing code using Bead* names.
211
- // Gradually migrate to Cell* names in new code.
212
- // ============================================================================
213
-
214
- /** @deprecated Use CellStatusSchema instead */
215
- export const BeadStatusSchema = CellStatusSchema;
216
- /** @deprecated Use CellStatus instead */
217
- export type BeadStatus = CellStatus;
218
-
219
- /** @deprecated Use CellTypeSchema instead */
220
- export const BeadTypeSchema = CellTypeSchema;
221
- /** @deprecated Use CellType instead */
222
- export type BeadType = CellType;
223
-
224
- /** @deprecated Use CellDependencySchema instead */
225
- export const BeadDependencySchema = CellDependencySchema;
226
- /** @deprecated Use CellDependency instead */
227
- export type BeadDependency = CellDependency;
228
-
229
- /** @deprecated Use CellSchema instead */
230
- export const BeadSchema = CellSchema;
231
- /** @deprecated Use Cell instead */
232
- export type Bead = Cell;
233
-
234
- /** @deprecated Use CellCreateArgsSchema instead */
235
- export const BeadCreateArgsSchema = CellCreateArgsSchema;
236
- /** @deprecated Use CellCreateArgs instead */
237
- export type BeadCreateArgs = CellCreateArgs;
238
-
239
- /** @deprecated Use CellUpdateArgsSchema instead */
240
- export const BeadUpdateArgsSchema = CellUpdateArgsSchema;
241
- /** @deprecated Use CellUpdateArgs instead */
242
- export type BeadUpdateArgs = CellUpdateArgs;
243
-
244
- /** @deprecated Use CellCloseArgsSchema instead */
245
- export const BeadCloseArgsSchema = CellCloseArgsSchema;
246
- /** @deprecated Use CellCloseArgs instead */
247
- export type BeadCloseArgs = CellCloseArgs;
248
-
249
- /** @deprecated Use CellQueryArgsSchema instead */
250
- export const BeadQueryArgsSchema = CellQueryArgsSchema;
251
- /** @deprecated Use CellQueryArgs instead */
252
- export type BeadQueryArgs = CellQueryArgs;
253
-
254
- /** @deprecated Use CellTreeSchema instead */
255
- export const BeadTreeSchema = CellTreeSchema;
256
- /** @deprecated Use CellTree instead */
257
- export type BeadTree = CellTree;
@@ -1,166 +0,0 @@
1
- /**
2
- * Evaluation schemas for structured agent output validation
3
- *
4
- * These schemas define the expected format for agent self-evaluations
5
- * and coordinator evaluations of completed work.
6
- *
7
- * Includes support for confidence decay - criteria weights fade over time
8
- * unless revalidated by successful outcomes.
9
- *
10
- * @see src/learning.ts for decay calculations
11
- */
12
- import { z } from "zod";
13
-
14
- /**
15
- * Evaluation of a single criterion.
16
- *
17
- * @example
18
- * // Passing criterion
19
- * { passed: true, feedback: "All types validated", score: 0.95 }
20
- *
21
- * @example
22
- * // Failing criterion
23
- * { passed: false, feedback: "Missing error handling in auth flow", score: 0.3 }
24
- */
25
- export const CriterionEvaluationSchema = z.object({
26
- passed: z.boolean(),
27
- feedback: z.string(),
28
- score: z.number().min(0).max(1).optional(), // 0-1 normalized score
29
- });
30
- export type CriterionEvaluation = z.infer<typeof CriterionEvaluationSchema>;
31
-
32
- /**
33
- * Weighted criterion evaluation with confidence decay
34
- *
35
- * Extends CriterionEvaluation with weight information from learning.
36
- * Lower weights indicate criteria that have been historically unreliable.
37
- */
38
- export const WeightedCriterionEvaluationSchema =
39
- CriterionEvaluationSchema.extend({
40
- /**
41
- * Current weight after 90-day half-life decay.
42
- * Range: 0-1 where 1 = recent/validated, 0 = old/unreliable.
43
- * Weights decay over time unless revalidated via semantic-memory_validate.
44
- */
45
- weight: z.number().min(0).max(1).default(1),
46
- /** Weighted score = score * weight */
47
- weighted_score: z.number().min(0).max(1).optional(),
48
- /** Whether this criterion is deprecated due to high failure rate */
49
- deprecated: z.boolean().default(false),
50
- });
51
- export type WeightedCriterionEvaluation = z.infer<
52
- typeof WeightedCriterionEvaluationSchema
53
- >;
54
-
55
- /**
56
- * Full evaluation result
57
- *
58
- * Returned by agents after completing a subtask.
59
- * Used by coordinator to determine if work is acceptable.
60
- */
61
- export const EvaluationSchema = z.object({
62
- passed: z.boolean(),
63
- criteria: z.record(z.string(), CriterionEvaluationSchema),
64
- overall_feedback: z.string(),
65
- retry_suggestion: z.string().nullable(),
66
- timestamp: z.string().datetime({ offset: true }).optional(), // ISO-8601 with timezone
67
- });
68
- export type Evaluation = z.infer<typeof EvaluationSchema>;
69
-
70
- /**
71
- * Default evaluation criteria
72
- *
73
- * These are the standard criteria used when none are specified.
74
- * Can be overridden per-task or per-project.
75
- */
76
- export const DEFAULT_CRITERIA = [
77
- "type_safe",
78
- "no_bugs",
79
- "patterns",
80
- "readable",
81
- ] as const;
82
- export type DefaultCriterion = (typeof DEFAULT_CRITERIA)[number];
83
-
84
- /**
85
- * Evaluation request arguments
86
- */
87
- export const EvaluationRequestSchema = z.object({
88
- bead_id: z.string(),
89
- subtask_title: z.string(),
90
- files_touched: z.array(z.string()),
91
- /** ISO-8601 timestamp when evaluation was requested */
92
- requested_at: z.string().datetime().optional(),
93
- });
94
- export type EvaluationRequest = z.infer<typeof EvaluationRequestSchema>;
95
-
96
- /**
97
- * Weighted evaluation result with confidence-adjusted scores
98
- *
99
- * Used when applying learned weights to evaluation criteria.
100
- */
101
- export const WeightedEvaluationSchema = z.object({
102
- passed: z.boolean(),
103
- criteria: z.record(z.string(), WeightedCriterionEvaluationSchema),
104
- overall_feedback: z.string(),
105
- retry_suggestion: z.string().nullable(),
106
- timestamp: z.string().datetime({ offset: true }).optional(), // ISO-8601 with timezone
107
- /** Average weight across all criteria (indicates overall confidence) */
108
- average_weight: z.number().min(0).max(1).optional(),
109
- /** Raw score before weighting */
110
- raw_score: z.number().min(0).max(1).optional(),
111
- /** Weighted score after applying criterion weights */
112
- weighted_score: z.number().min(0).max(1).optional(),
113
- });
114
- export type WeightedEvaluation = z.infer<typeof WeightedEvaluationSchema>;
115
-
116
- /**
117
- * Aggregated evaluation results for a swarm
118
- */
119
- export const SwarmEvaluationResultSchema = z.object({
120
- epic_id: z.string(),
121
- total: z.number().int().min(0),
122
- passed: z.number().int().min(0),
123
- failed: z.number().int().min(0),
124
- evaluations: z.array(
125
- z.object({
126
- bead_id: z.string(),
127
- evaluation: EvaluationSchema,
128
- }),
129
- ),
130
- overall_passed: z.boolean(),
131
- retry_needed: z.array(z.string()), // Cell IDs that need retry
132
- });
133
- export type SwarmEvaluationResult = z.infer<typeof SwarmEvaluationResultSchema>;
134
-
135
- /**
136
- * Validation result with retry info
137
- */
138
- export const ValidationResultSchema = z.object({
139
- success: z.boolean(),
140
- data: z.unknown().optional(),
141
- attempts: z.number().int().min(1),
142
- errors: z.array(z.string()).optional(),
143
- extractionMethod: z.string().optional(),
144
- });
145
- export type ValidationResult = z.infer<typeof ValidationResultSchema>;
146
-
147
- /**
148
- * Failure mode taxonomy for task failures
149
- *
150
- * Classifies WHY tasks fail, not just that they failed.
151
- * Used in outcome tracking to learn from failure patterns.
152
- *
153
- * @see src/learning.ts OutcomeSignalsSchema
154
- * @see "Patterns for Building AI Agents" p.46
155
- */
156
- export const FailureModeSchema = z.enum([
157
- "timeout", // Task exceeded time limit
158
- "conflict", // File reservation conflict
159
- "validation", // Output failed schema validation
160
- "tool_failure", // Tool call returned error
161
- "context_overflow", // Ran out of context window
162
- "dependency_blocked", // Waiting on another subtask
163
- "user_cancelled", // User interrupted
164
- "unknown", // Unclassified
165
- ]);
166
- export type FailureMode = z.infer<typeof FailureModeSchema>;
@@ -1,199 +0,0 @@
1
- import { describe, expect, it } from "vitest";
2
- import {
3
- BeadSchema,
4
- BeadTypeSchema,
5
- BeadCreateArgsSchema,
6
- EpicCreateArgsSchema,
7
- EvaluationSchema,
8
- TaskDecompositionSchema,
9
- DecomposedSubtaskSchema,
10
- SwarmStatusSchema,
11
- ValidationResultSchema,
12
- } from "./index";
13
-
14
- describe("BeadSchema", () => {
15
- it("validates a complete bead", () => {
16
- const bead = {
17
- id: "bd-abc123",
18
- title: "Fix the thing",
19
- type: "bug",
20
- status: "open",
21
- priority: 1,
22
- created_at: "2025-01-01T00:00:00Z",
23
- updated_at: "2025-01-01T00:00:00Z",
24
- };
25
- expect(() => BeadSchema.parse(bead)).not.toThrow();
26
- });
27
-
28
- it("rejects invalid priority", () => {
29
- const bead = {
30
- id: "bd-abc123",
31
- title: "Fix the thing",
32
- type: "bug",
33
- status: "open",
34
- priority: 5, // Invalid: max is 3
35
- created_at: "2025-01-01T00:00:00Z",
36
- updated_at: "2025-01-01T00:00:00Z",
37
- };
38
- expect(() => BeadSchema.parse(bead)).toThrow();
39
- });
40
-
41
- it("accepts all valid types", () => {
42
- const types = ["bug", "feature", "task", "epic", "chore"];
43
- for (const type of types) {
44
- expect(() => BeadTypeSchema.parse(type)).not.toThrow();
45
- }
46
- });
47
- });
48
-
49
- describe("BeadCreateArgsSchema", () => {
50
- it("validates minimal create args", () => {
51
- const args = { title: "New bead" };
52
- const result = BeadCreateArgsSchema.parse(args);
53
- expect(result.title).toBe("New bead");
54
- expect(result.type).toBe("task"); // default
55
- expect(result.priority).toBe(2); // default
56
- });
57
-
58
- it("rejects empty title", () => {
59
- const args = { title: "" };
60
- expect(() => BeadCreateArgsSchema.parse(args)).toThrow();
61
- });
62
- });
63
-
64
- describe("EpicCreateArgsSchema", () => {
65
- it("validates epic with subtasks", () => {
66
- const args = {
67
- epic_title: "Big feature",
68
- subtasks: [
69
- { title: "Part 1", priority: 2 },
70
- { title: "Part 2", priority: 3 },
71
- ],
72
- };
73
- expect(() => EpicCreateArgsSchema.parse(args)).not.toThrow();
74
- });
75
-
76
- it("requires at least one subtask", () => {
77
- const args = {
78
- epic_title: "Big feature",
79
- subtasks: [],
80
- };
81
- expect(() => EpicCreateArgsSchema.parse(args)).toThrow();
82
- });
83
- });
84
-
85
- describe("EvaluationSchema", () => {
86
- it("validates a passing evaluation", () => {
87
- const evaluation = {
88
- passed: true,
89
- criteria: {
90
- type_safe: { passed: true, feedback: "All types correct" },
91
- no_bugs: { passed: true, feedback: "No issues found" },
92
- },
93
- overall_feedback: "Good work",
94
- retry_suggestion: null,
95
- };
96
- expect(() => EvaluationSchema.parse(evaluation)).not.toThrow();
97
- });
98
-
99
- it("validates a failing evaluation with retry suggestion", () => {
100
- const evaluation = {
101
- passed: false,
102
- criteria: {
103
- type_safe: { passed: false, feedback: "Missing types on line 42" },
104
- },
105
- overall_feedback: "Needs work",
106
- retry_suggestion: "Add explicit types to the handler function",
107
- };
108
- expect(() => EvaluationSchema.parse(evaluation)).not.toThrow();
109
- });
110
- });
111
-
112
- describe("TaskDecompositionSchema", () => {
113
- it("validates a decomposition", () => {
114
- const decomposition = {
115
- task: "Add OAuth authentication",
116
- reasoning: "Breaking into provider setup and integration",
117
- subtasks: [
118
- {
119
- title: "Add OAuth provider",
120
- description: "Configure Google OAuth",
121
- files: ["src/auth/google.ts"],
122
- estimated_effort: "medium" as const,
123
- },
124
- ],
125
- dependencies: [],
126
- shared_context: "Using NextAuth.js",
127
- };
128
- expect(() => TaskDecompositionSchema.parse(decomposition)).not.toThrow();
129
- });
130
-
131
- it("validates subtask effort levels", () => {
132
- const efforts = ["trivial", "small", "medium", "large"];
133
- for (const effort of efforts) {
134
- const subtask = {
135
- title: "Test",
136
- description: "Test description",
137
- files: [],
138
- estimated_effort: effort,
139
- };
140
- expect(() => DecomposedSubtaskSchema.parse(subtask)).not.toThrow();
141
- }
142
- });
143
- });
144
-
145
- describe("SwarmStatusSchema", () => {
146
- it("validates swarm status", () => {
147
- const status = {
148
- epic_id: "bd-epic123",
149
- total_agents: 3,
150
- running: 1,
151
- completed: 1,
152
- failed: 0,
153
- blocked: 1,
154
- agents: [
155
- {
156
- bead_id: "bd-1",
157
- agent_name: "BlueLake",
158
- status: "completed" as const,
159
- files: ["src/a.ts"],
160
- },
161
- {
162
- bead_id: "bd-2",
163
- agent_name: "RedStone",
164
- status: "running" as const,
165
- files: ["src/b.ts"],
166
- },
167
- {
168
- bead_id: "bd-3",
169
- agent_name: "GreenCastle",
170
- status: "pending" as const,
171
- files: ["src/c.ts"],
172
- },
173
- ],
174
- last_update: "2025-01-01T00:00:00Z",
175
- };
176
- expect(() => SwarmStatusSchema.parse(status)).not.toThrow();
177
- });
178
- });
179
-
180
- describe("ValidationResultSchema", () => {
181
- it("validates success result", () => {
182
- const result = {
183
- success: true,
184
- data: { foo: "bar" },
185
- attempts: 1,
186
- extractionMethod: "direct",
187
- };
188
- expect(() => ValidationResultSchema.parse(result)).not.toThrow();
189
- });
190
-
191
- it("validates failure result with errors", () => {
192
- const result = {
193
- success: false,
194
- attempts: 2,
195
- errors: ["Missing required field: name", "Invalid type for age"],
196
- };
197
- expect(() => ValidationResultSchema.parse(result)).not.toThrow();
198
- });
199
- });