opencode-swarm-plugin 0.44.0 → 0.44.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/bin/swarm.serve.test.ts +6 -4
  2. package/bin/swarm.ts +18 -12
  3. package/dist/compaction-prompt-scoring.js +139 -0
  4. package/dist/eval-capture.js +12811 -0
  5. package/dist/hive.d.ts.map +1 -1
  6. package/dist/hive.js +14834 -0
  7. package/dist/index.d.ts +18 -0
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +7743 -62593
  10. package/dist/plugin.js +24052 -78907
  11. package/dist/swarm-orchestrate.d.ts.map +1 -1
  12. package/dist/swarm-prompts.d.ts.map +1 -1
  13. package/dist/swarm-prompts.js +39407 -0
  14. package/dist/swarm-review.d.ts.map +1 -1
  15. package/dist/swarm-validation.d.ts +127 -0
  16. package/dist/swarm-validation.d.ts.map +1 -0
  17. package/dist/validators/index.d.ts +7 -0
  18. package/dist/validators/index.d.ts.map +1 -0
  19. package/dist/validators/schema-validator.d.ts +58 -0
  20. package/dist/validators/schema-validator.d.ts.map +1 -0
  21. package/package.json +17 -5
  22. package/.changeset/swarm-insights-data-layer.md +0 -63
  23. package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
  24. package/.hive/analysis/session-data-quality-audit.md +0 -320
  25. package/.hive/eval-results.json +0 -483
  26. package/.hive/issues.jsonl +0 -138
  27. package/.hive/memories.jsonl +0 -729
  28. package/.opencode/eval-history.jsonl +0 -327
  29. package/.turbo/turbo-build.log +0 -9
  30. package/CHANGELOG.md +0 -2286
  31. package/SCORER-ANALYSIS.md +0 -598
  32. package/docs/analysis/subagent-coordination-patterns.md +0 -902
  33. package/docs/analysis-socratic-planner-pattern.md +0 -504
  34. package/docs/planning/ADR-001-monorepo-structure.md +0 -171
  35. package/docs/planning/ADR-002-package-extraction.md +0 -393
  36. package/docs/planning/ADR-003-performance-improvements.md +0 -451
  37. package/docs/planning/ADR-004-message-queue-features.md +0 -187
  38. package/docs/planning/ADR-005-devtools-observability.md +0 -202
  39. package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
  40. package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
  41. package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
  42. package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
  43. package/docs/planning/ROADMAP.md +0 -368
  44. package/docs/semantic-memory-cli-syntax.md +0 -123
  45. package/docs/swarm-mail-architecture.md +0 -1147
  46. package/docs/testing/context-recovery-test.md +0 -470
  47. package/evals/ARCHITECTURE.md +0 -1189
  48. package/evals/README.md +0 -768
  49. package/evals/compaction-prompt.eval.ts +0 -149
  50. package/evals/compaction-resumption.eval.ts +0 -289
  51. package/evals/coordinator-behavior.eval.ts +0 -307
  52. package/evals/coordinator-session.eval.ts +0 -154
  53. package/evals/evalite.config.ts.bak +0 -15
  54. package/evals/example.eval.ts +0 -31
  55. package/evals/fixtures/cass-baseline.ts +0 -217
  56. package/evals/fixtures/compaction-cases.ts +0 -350
  57. package/evals/fixtures/compaction-prompt-cases.ts +0 -311
  58. package/evals/fixtures/coordinator-sessions.ts +0 -328
  59. package/evals/fixtures/decomposition-cases.ts +0 -105
  60. package/evals/lib/compaction-loader.test.ts +0 -248
  61. package/evals/lib/compaction-loader.ts +0 -320
  62. package/evals/lib/data-loader.evalite-test.ts +0 -289
  63. package/evals/lib/data-loader.test.ts +0 -345
  64. package/evals/lib/data-loader.ts +0 -281
  65. package/evals/lib/llm.ts +0 -115
  66. package/evals/scorers/compaction-prompt-scorers.ts +0 -145
  67. package/evals/scorers/compaction-scorers.ts +0 -305
  68. package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
  69. package/evals/scorers/coordinator-discipline.ts +0 -325
  70. package/evals/scorers/index.test.ts +0 -146
  71. package/evals/scorers/index.ts +0 -328
  72. package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
  73. package/evals/scorers/outcome-scorers.ts +0 -349
  74. package/evals/swarm-decomposition.eval.ts +0 -121
  75. package/examples/commands/swarm.md +0 -745
  76. package/examples/plugin-wrapper-template.ts +0 -2515
  77. package/examples/skills/hive-workflow/SKILL.md +0 -212
  78. package/examples/skills/skill-creator/SKILL.md +0 -223
  79. package/examples/skills/swarm-coordination/SKILL.md +0 -292
  80. package/global-skills/cli-builder/SKILL.md +0 -344
  81. package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
  82. package/global-skills/learning-systems/SKILL.md +0 -644
  83. package/global-skills/skill-creator/LICENSE.txt +0 -202
  84. package/global-skills/skill-creator/SKILL.md +0 -352
  85. package/global-skills/skill-creator/references/output-patterns.md +0 -82
  86. package/global-skills/skill-creator/references/workflows.md +0 -28
  87. package/global-skills/swarm-coordination/SKILL.md +0 -995
  88. package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
  89. package/global-skills/swarm-coordination/references/strategies.md +0 -138
  90. package/global-skills/system-design/SKILL.md +0 -213
  91. package/global-skills/testing-patterns/SKILL.md +0 -430
  92. package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
  93. package/opencode-swarm-plugin-0.30.7.tgz +0 -0
  94. package/opencode-swarm-plugin-0.31.0.tgz +0 -0
  95. package/scripts/cleanup-test-memories.ts +0 -346
  96. package/scripts/init-skill.ts +0 -222
  97. package/scripts/migrate-unknown-sessions.ts +0 -349
  98. package/scripts/validate-skill.ts +0 -204
  99. package/src/agent-mail.ts +0 -1724
  100. package/src/anti-patterns.test.ts +0 -1167
  101. package/src/anti-patterns.ts +0 -448
  102. package/src/compaction-capture.integration.test.ts +0 -257
  103. package/src/compaction-hook.test.ts +0 -838
  104. package/src/compaction-hook.ts +0 -1204
  105. package/src/compaction-observability.integration.test.ts +0 -139
  106. package/src/compaction-observability.test.ts +0 -187
  107. package/src/compaction-observability.ts +0 -324
  108. package/src/compaction-prompt-scorers.test.ts +0 -475
  109. package/src/compaction-prompt-scoring.ts +0 -300
  110. package/src/contributor-tools.test.ts +0 -133
  111. package/src/contributor-tools.ts +0 -201
  112. package/src/dashboard.test.ts +0 -611
  113. package/src/dashboard.ts +0 -462
  114. package/src/error-enrichment.test.ts +0 -403
  115. package/src/error-enrichment.ts +0 -219
  116. package/src/eval-capture.test.ts +0 -1015
  117. package/src/eval-capture.ts +0 -929
  118. package/src/eval-gates.test.ts +0 -306
  119. package/src/eval-gates.ts +0 -218
  120. package/src/eval-history.test.ts +0 -508
  121. package/src/eval-history.ts +0 -214
  122. package/src/eval-learning.test.ts +0 -378
  123. package/src/eval-learning.ts +0 -360
  124. package/src/eval-runner.test.ts +0 -223
  125. package/src/eval-runner.ts +0 -402
  126. package/src/export-tools.test.ts +0 -476
  127. package/src/export-tools.ts +0 -257
  128. package/src/hive.integration.test.ts +0 -2241
  129. package/src/hive.ts +0 -1628
  130. package/src/index.ts +0 -940
  131. package/src/learning.integration.test.ts +0 -1815
  132. package/src/learning.ts +0 -1079
  133. package/src/logger.test.ts +0 -189
  134. package/src/logger.ts +0 -135
  135. package/src/mandate-promotion.test.ts +0 -473
  136. package/src/mandate-promotion.ts +0 -239
  137. package/src/mandate-storage.integration.test.ts +0 -601
  138. package/src/mandate-storage.test.ts +0 -578
  139. package/src/mandate-storage.ts +0 -794
  140. package/src/mandates.ts +0 -540
  141. package/src/memory-tools.test.ts +0 -195
  142. package/src/memory-tools.ts +0 -344
  143. package/src/memory.integration.test.ts +0 -334
  144. package/src/memory.test.ts +0 -158
  145. package/src/memory.ts +0 -527
  146. package/src/model-selection.test.ts +0 -188
  147. package/src/model-selection.ts +0 -68
  148. package/src/observability-tools.test.ts +0 -359
  149. package/src/observability-tools.ts +0 -871
  150. package/src/output-guardrails.test.ts +0 -438
  151. package/src/output-guardrails.ts +0 -381
  152. package/src/pattern-maturity.test.ts +0 -1160
  153. package/src/pattern-maturity.ts +0 -525
  154. package/src/planning-guardrails.test.ts +0 -491
  155. package/src/planning-guardrails.ts +0 -438
  156. package/src/plugin.ts +0 -23
  157. package/src/post-compaction-tracker.test.ts +0 -251
  158. package/src/post-compaction-tracker.ts +0 -237
  159. package/src/query-tools.test.ts +0 -636
  160. package/src/query-tools.ts +0 -324
  161. package/src/rate-limiter.integration.test.ts +0 -466
  162. package/src/rate-limiter.ts +0 -774
  163. package/src/replay-tools.test.ts +0 -496
  164. package/src/replay-tools.ts +0 -240
  165. package/src/repo-crawl.integration.test.ts +0 -441
  166. package/src/repo-crawl.ts +0 -610
  167. package/src/schemas/cell-events.test.ts +0 -347
  168. package/src/schemas/cell-events.ts +0 -807
  169. package/src/schemas/cell.ts +0 -257
  170. package/src/schemas/evaluation.ts +0 -166
  171. package/src/schemas/index.test.ts +0 -199
  172. package/src/schemas/index.ts +0 -286
  173. package/src/schemas/mandate.ts +0 -232
  174. package/src/schemas/swarm-context.ts +0 -115
  175. package/src/schemas/task.ts +0 -161
  176. package/src/schemas/worker-handoff.test.ts +0 -302
  177. package/src/schemas/worker-handoff.ts +0 -131
  178. package/src/sessions/agent-discovery.test.ts +0 -137
  179. package/src/sessions/agent-discovery.ts +0 -112
  180. package/src/sessions/index.ts +0 -15
  181. package/src/skills.integration.test.ts +0 -1192
  182. package/src/skills.test.ts +0 -643
  183. package/src/skills.ts +0 -1549
  184. package/src/storage.integration.test.ts +0 -341
  185. package/src/storage.ts +0 -884
  186. package/src/structured.integration.test.ts +0 -817
  187. package/src/structured.test.ts +0 -1046
  188. package/src/structured.ts +0 -762
  189. package/src/swarm-decompose.test.ts +0 -188
  190. package/src/swarm-decompose.ts +0 -1302
  191. package/src/swarm-deferred.integration.test.ts +0 -157
  192. package/src/swarm-deferred.test.ts +0 -38
  193. package/src/swarm-insights.test.ts +0 -214
  194. package/src/swarm-insights.ts +0 -459
  195. package/src/swarm-mail.integration.test.ts +0 -970
  196. package/src/swarm-mail.ts +0 -739
  197. package/src/swarm-orchestrate.integration.test.ts +0 -282
  198. package/src/swarm-orchestrate.test.ts +0 -548
  199. package/src/swarm-orchestrate.ts +0 -3084
  200. package/src/swarm-prompts.test.ts +0 -1270
  201. package/src/swarm-prompts.ts +0 -2077
  202. package/src/swarm-research.integration.test.ts +0 -701
  203. package/src/swarm-research.test.ts +0 -698
  204. package/src/swarm-research.ts +0 -472
  205. package/src/swarm-review.integration.test.ts +0 -285
  206. package/src/swarm-review.test.ts +0 -879
  207. package/src/swarm-review.ts +0 -709
  208. package/src/swarm-strategies.ts +0 -407
  209. package/src/swarm-worktree.test.ts +0 -501
  210. package/src/swarm-worktree.ts +0 -575
  211. package/src/swarm.integration.test.ts +0 -2377
  212. package/src/swarm.ts +0 -38
  213. package/src/tool-adapter.integration.test.ts +0 -1221
  214. package/src/tool-availability.ts +0 -461
  215. package/tsconfig.json +0 -28
@@ -1,281 +0,0 @@
1
- /**
2
- * PGlite-backed eval data loader
3
- *
4
- * Loads real decomposition outcomes from the eval_records table
5
- * for use in Evalite evals.
6
- */
7
- import * as fs from "node:fs";
8
- import {
9
- getEvalRecords,
10
- getEvalStats,
11
- type EvalRecord,
12
- } from "swarm-mail";
13
-
14
- export interface EvalCase {
15
- input: { task: string; context?: string };
16
- expected: {
17
- minSubtasks: number;
18
- maxSubtasks: number;
19
- requiredFiles?: string[];
20
- overallSuccess?: boolean;
21
- };
22
- actual?: EvalRecord;
23
- }
24
-
25
- /**
26
- * Load eval cases from PGlite
27
- *
28
- * @param projectKey - Project key for filtering records
29
- * @param options - Filter options
30
- * @returns Array of eval cases ready for Evalite
31
- */
32
- export async function loadEvalCases(
33
- projectKey: string,
34
- options?: {
35
- limit?: number;
36
- strategy?: "file-based" | "feature-based" | "risk-based";
37
- successOnly?: boolean;
38
- projectPath?: string;
39
- },
40
- ): Promise<EvalCase[]> {
41
- const { limit, strategy, successOnly, projectPath } = options ?? {};
42
-
43
- // Query eval records from PGlite
44
- const records = await getEvalRecords(
45
- projectKey,
46
- { limit, strategy },
47
- projectPath,
48
- );
49
-
50
- // Filter by success if requested
51
- const filtered = successOnly
52
- ? records.filter((r) => r.overall_success === true)
53
- : records;
54
-
55
- // Transform to EvalCase format
56
- return filtered.map((record) => ({
57
- input: {
58
- task: record.task,
59
- context: record.context ?? undefined,
60
- },
61
- expected: {
62
- minSubtasks: 2,
63
- maxSubtasks: record.subtasks.length,
64
- requiredFiles: record.subtasks.flatMap((s) => s.files),
65
- overallSuccess: record.overall_success ?? undefined,
66
- },
67
- actual: record,
68
- }));
69
- }
70
-
71
- /**
72
- * Check if we have enough real data to run evals
73
- *
74
- * @param projectKey - Project key to check
75
- * @param minRecords - Minimum number of records required (default: 5)
76
- * @param projectPath - Optional project path for database lookup
77
- * @returns True if enough data exists
78
- */
79
- export async function hasRealEvalData(
80
- projectKey: string,
81
- minRecords: number = 5,
82
- projectPath?: string,
83
- ): Promise<boolean> {
84
- const stats = await getEvalStats(projectKey, projectPath);
85
- return stats.totalRecords >= minRecords;
86
- }
87
-
88
- /**
89
- * Get eval data stats for reporting
90
- *
91
- * @param projectKey - Project key to query
92
- * @param projectPath - Optional project path for database lookup
93
- * @returns Summary of available eval data
94
- */
95
- export async function getEvalDataSummary(
96
- projectKey: string,
97
- projectPath?: string,
98
- ): Promise<{
99
- totalRecords: number;
100
- successRate: number;
101
- byStrategy: Record<string, number>;
102
- hasEnoughData: boolean;
103
- }> {
104
- const stats = await getEvalStats(projectKey, projectPath);
105
-
106
- return {
107
- totalRecords: stats.totalRecords,
108
- successRate: stats.successRate,
109
- byStrategy: stats.byStrategy,
110
- hasEnoughData: stats.totalRecords >= 5,
111
- };
112
- }
113
-
114
- /**
115
- * Check if a session meets quality criteria
116
- */
117
- function meetsQualityCriteria(
118
- session: import("../../src/eval-capture.js").CoordinatorSession,
119
- criteria: {
120
- minEvents: number;
121
- requireWorkerSpawn: boolean;
122
- requireReview: boolean;
123
- },
124
- ): boolean {
125
- // Filter 1: minEvents
126
- if (session.events.length < criteria.minEvents) {
127
- return false;
128
- }
129
-
130
- // Filter 2: requireWorkerSpawn
131
- if (
132
- criteria.requireWorkerSpawn &&
133
- !session.events.some(
134
- (e) => e.event_type === "DECISION" && e.decision_type === "worker_spawned",
135
- )
136
- ) {
137
- return false;
138
- }
139
-
140
- // Filter 3: requireReview
141
- if (
142
- criteria.requireReview &&
143
- !session.events.some(
144
- (e) =>
145
- e.event_type === "DECISION" && e.decision_type === "review_completed",
146
- )
147
- ) {
148
- return false;
149
- }
150
-
151
- return true;
152
- }
153
-
154
- /**
155
- * Load captured coordinator sessions from ~/.config/swarm-tools/sessions/
156
- *
157
- * Reads all JSONL session files and returns CoordinatorSession objects.
158
- *
159
- * Quality filters are applied to focus on high-signal coordinator sessions:
160
- * - minEvents: Filter out incomplete/aborted sessions (default: 3)
161
- * - requireWorkerSpawn: Ensure session delegated to workers (default: true)
162
- * - requireReview: Ensure coordinator reviewed work (default: true)
163
- *
164
- * Filters are applied BEFORE the limit for accurate sampling.
165
- *
166
- * @param options - Filter options
167
- * @returns Array of coordinator sessions that meet quality criteria
168
- */
169
- export async function loadCapturedSessions(options?: {
170
- sessionIds?: string[];
171
- limit?: number;
172
- /** Minimum number of events required (default: 3) */
173
- minEvents?: number;
174
- /** Require at least one worker_spawned event (default: true) */
175
- requireWorkerSpawn?: boolean;
176
- /** Require at least one review_completed event (default: true) */
177
- requireReview?: boolean;
178
- /** Override session directory for testing */
179
- sessionDir?: string;
180
- }): Promise<
181
- Array<{ session: import("../../src/eval-capture.js").CoordinatorSession }>
182
- > {
183
- const { getSessionDir, readSessionEvents, saveSession } = await import(
184
- "../../src/eval-capture.js"
185
- );
186
- const sessionDir = options?.sessionDir ?? getSessionDir();
187
-
188
- // Default quality filters
189
- const qualityCriteria = {
190
- minEvents: options?.minEvents ?? 3,
191
- requireWorkerSpawn: options?.requireWorkerSpawn ?? true,
192
- requireReview: options?.requireReview ?? true,
193
- };
194
-
195
- // If session dir doesn't exist, return empty
196
- if (!fs.existsSync(sessionDir)) {
197
- return [];
198
- }
199
-
200
- // Read all .jsonl files in session directory
201
- const files = fs
202
- .readdirSync(sessionDir)
203
- .filter((f) => f.endsWith(".jsonl"));
204
-
205
- // Filter by sessionIds if provided
206
- const targetFiles = options?.sessionIds
207
- ? files.filter((f) => options.sessionIds?.includes(f.replace(".jsonl", "")))
208
- : files;
209
-
210
- // Load each session
211
- const sessions: Array<{
212
- session: import("../../src/eval-capture.js").CoordinatorSession;
213
- }> = [];
214
- let filteredOutCount = 0;
215
-
216
- for (const file of targetFiles) {
217
- const sessionId = file.replace(".jsonl", "");
218
-
219
- try {
220
- let events: import("../../src/eval-capture.js").CoordinatorEvent[];
221
-
222
- // If custom sessionDir, read directly; otherwise use eval-capture functions
223
- if (options?.sessionDir) {
224
- const sessionPath = `${sessionDir}/${sessionId}.jsonl`;
225
- if (!fs.existsSync(sessionPath)) continue;
226
-
227
- const content = fs.readFileSync(sessionPath, "utf-8");
228
- const lines = content.trim().split("\n").filter(Boolean);
229
- const { CoordinatorEventSchema } = await import(
230
- "../../src/eval-capture.js"
231
- );
232
- events = lines.map((line) => {
233
- const parsed = JSON.parse(line);
234
- return CoordinatorEventSchema.parse(parsed);
235
- });
236
- } else {
237
- events = readSessionEvents(sessionId);
238
- }
239
-
240
- if (events.length === 0) continue;
241
-
242
- // Find epic_id from first event
243
- const epicId = events[0]?.epic_id;
244
- if (!epicId) continue;
245
-
246
- // Build session object
247
- const session: import("../../src/eval-capture.js").CoordinatorSession = {
248
- session_id: sessionId,
249
- epic_id: epicId,
250
- start_time: events[0]?.timestamp ?? new Date().toISOString(),
251
- end_time: events[events.length - 1]?.timestamp,
252
- events,
253
- };
254
- if (!session) continue;
255
-
256
- // Apply quality filters BEFORE limit
257
- if (meetsQualityCriteria(session, qualityCriteria)) {
258
- sessions.push({ session });
259
- } else {
260
- filteredOutCount++;
261
- }
262
- } catch (error) {
263
- // Skip invalid sessions
264
- console.warn(`Failed to load session ${sessionId}:`, error);
265
- }
266
-
267
- // Apply limit AFTER filtering
268
- if (options?.limit && sessions.length >= options.limit) {
269
- break;
270
- }
271
- }
272
-
273
- // Log filtering stats for visibility
274
- if (filteredOutCount > 0) {
275
- console.log(
276
- `Filtered out ${filteredOutCount} sessions (minEvents=${qualityCriteria.minEvents}, requireWorkerSpawn=${qualityCriteria.requireWorkerSpawn}, requireReview=${qualityCriteria.requireReview})`,
277
- );
278
- }
279
-
280
- return sessions;
281
- }
package/evals/lib/llm.ts DELETED
@@ -1,115 +0,0 @@
1
- /**
2
- * LLM Client for Evalite Evals
3
- *
4
- * Uses AI SDK v6 with Vercel AI Gateway.
5
- * Gateway handles provider routing - just pass "provider/model" string.
6
- *
7
- * @module evals/lib/llm
8
- */
9
- import { generateText, gateway } from "ai";
10
- import type { GatewayModelId } from "ai";
11
-
12
- /**
13
- * Default model for decomposition evals
14
- * Using Claude Sonnet for good balance of quality and cost
15
- */
16
- export const DEFAULT_MODEL: GatewayModelId = "anthropic/claude-sonnet-4-5";
17
-
18
- /**
19
- * Generate a decomposition from a task description
20
- *
21
- * @param prompt - The full decomposition prompt
22
- * @param model - Gateway model ID (e.g., "anthropic/claude-sonnet-4-5")
23
- * @returns The raw text response from the LLM
24
- */
25
- export async function generateDecomposition(
26
- prompt: string,
27
- model: GatewayModelId = DEFAULT_MODEL,
28
- ): Promise<string> {
29
- const { text } = await generateText({
30
- model: gateway(model),
31
- prompt,
32
- maxOutputTokens: 4096,
33
- });
34
-
35
- return text;
36
- }
37
-
38
- /**
39
- * Format a decomposition prompt from task and context
40
- *
41
- * Uses the same prompt template as swarm_plan_prompt
42
- */
43
- export function formatDecompositionPrompt(
44
- task: string,
45
- context?: string,
46
- maxSubtasks: number = 6,
47
- ): string {
48
- const contextSection = context ? `## Context\n${context}` : "";
49
-
50
- return `You are decomposing a task into parallelizable subtasks for a swarm of agents.
51
-
52
- ## Task
53
- ${task}
54
-
55
- ${contextSection}
56
-
57
- ## Requirements
58
-
59
- 1. **Break into 2-${maxSubtasks} independent subtasks** that can run in parallel
60
- 2. **Assign files** - each subtask must specify which files it will modify
61
- 3. **No file overlap** - files cannot appear in multiple subtasks (they get exclusive locks)
62
- 4. **Order by dependency** - if subtask B needs subtask A's output, A must come first in the array
63
- 5. **Estimate complexity** - 1 (trivial) to 5 (complex)
64
-
65
- ## Response Format
66
-
67
- Respond with ONLY a JSON object matching this schema (no markdown, no explanation):
68
-
69
- {
70
- "epic": {
71
- "title": "string",
72
- "description": "string"
73
- },
74
- "subtasks": [
75
- {
76
- "title": "string",
77
- "description": "string",
78
- "files": ["string"],
79
- "dependencies": [0],
80
- "estimated_complexity": 1
81
- }
82
- ]
83
- }
84
-
85
- ## Guidelines
86
-
87
- - **Plan aggressively** - when in doubt, split further
88
- - **Prefer smaller, focused subtasks** over large complex ones
89
- - **Include test files** in the same subtask as the code they test
90
- - **Be specific about files** - use actual file paths, not placeholders
91
-
92
- Now decompose the task. Respond with JSON only:`;
93
- }
94
-
95
- /**
96
- * Extract JSON from LLM response
97
- *
98
- * Handles responses that may have markdown code blocks or extra text
99
- */
100
- export function extractJson(text: string): string {
101
- // Try to find JSON in code blocks first
102
- const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
103
- if (codeBlockMatch) {
104
- return codeBlockMatch[1].trim();
105
- }
106
-
107
- // Try to find raw JSON object
108
- const jsonMatch = text.match(/\{[\s\S]*\}/);
109
- if (jsonMatch) {
110
- return jsonMatch[0];
111
- }
112
-
113
- // Return as-is if no JSON found
114
- return text;
115
- }
@@ -1,145 +0,0 @@
1
- /**
2
- * Compaction Prompt Quality Scorers - Evalite Wrappers
3
- *
4
- * These wrap the pure scoring functions from src/compaction-prompt-scoring.ts
5
- * for use with evalite's test runner.
6
- *
7
- * Weighted scoring:
8
- * - epicIdSpecificity (0.20) - real IDs not placeholders
9
- * - actionability (0.20) - swarm_status/inbox with real values
10
- * - coordinatorIdentity (0.25) - ASCII header + strong mandates
11
- * - forbiddenToolsPresent (0.15) - lists forbidden tools by name
12
- * - postCompactionDiscipline (0.20) - first tool correct, no edit/write
13
- */
14
-
15
- import { createScorer } from "evalite";
16
- import type { CompactionPrompt } from "../../src/compaction-prompt-scoring.js";
17
- import {
18
- scoreActionability,
19
- scoreCoordinatorIdentity,
20
- scoreEpicIdSpecificity,
21
- scoreForbiddenToolsPresent,
22
- scorePostCompactionDiscipline,
23
- } from "../../src/compaction-prompt-scoring.js";
24
-
25
- // Re-export types for convenience
26
- export type { CompactionPrompt, ScorerResult } from "../../src/compaction-prompt-scoring.js";
27
-
28
- // Re-export pure functions for direct use
29
- export {
30
- scoreActionability,
31
- scoreCoordinatorIdentity,
32
- scoreEpicIdSpecificity,
33
- scoreForbiddenToolsPresent,
34
- scorePostCompactionDiscipline,
35
- } from "../../src/compaction-prompt-scoring.js";
36
-
37
- /**
38
- * Epic ID Specificity Scorer
39
- *
40
- * Validates that epic IDs are REAL, not placeholders.
41
- * Score: 1.0 if real IDs, 0.0 if placeholders found
42
- */
43
- export const epicIdSpecificity = createScorer({
44
- name: "Epic ID Specificity",
45
- description: "Prompt uses real epic IDs, not placeholders",
46
- scorer: ({ output }) => {
47
- try {
48
- const prompt = JSON.parse(String(output)) as CompactionPrompt;
49
- return scoreEpicIdSpecificity(prompt);
50
- } catch (error) {
51
- return {
52
- score: 0,
53
- message: `Failed to parse prompt: ${error}`,
54
- };
55
- }
56
- },
57
- });
58
-
59
- /**
60
- * Actionability Scorer
61
- *
62
- * Validates that the prompt includes SPECIFIC actionable tool calls.
63
- * Score: 1.0 if actionable tool calls with real values, 0.0 otherwise
64
- */
65
- export const actionability = createScorer({
66
- name: "Actionability",
67
- description: "Prompt includes specific tool calls with real values",
68
- scorer: ({ output }) => {
69
- try {
70
- const prompt = JSON.parse(String(output)) as CompactionPrompt;
71
- return scoreActionability(prompt);
72
- } catch (error) {
73
- return {
74
- score: 0,
75
- message: `Failed to parse prompt: ${error}`,
76
- };
77
- }
78
- },
79
- });
80
-
81
- /**
82
- * Coordinator Identity Scorer
83
- *
84
- * Validates that the prompt has STRONG coordinator identity reinforcement.
85
- * Score: 1.0 for ASCII header + strong mandates, 0.5 for header only, 0.0 otherwise
86
- */
87
- export const coordinatorIdentity = createScorer({
88
- name: "Coordinator Identity",
89
- description: "Prompt has ASCII header and strong mandates",
90
- scorer: ({ output }) => {
91
- try {
92
- const prompt = JSON.parse(String(output)) as CompactionPrompt;
93
- return scoreCoordinatorIdentity(prompt);
94
- } catch (error) {
95
- return {
96
- score: 0,
97
- message: `Failed to parse prompt: ${error}`,
98
- };
99
- }
100
- },
101
- });
102
-
103
- /**
104
- * Forbidden Tools Present Scorer
105
- *
106
- * Validates that the prompt LISTS forbidden tools by name.
107
- * Score: ratio of forbidden tools mentioned (0.0 to 1.0)
108
- */
109
- export const forbiddenToolsPresent = createScorer({
110
- name: "Forbidden Tools Present",
111
- description: "Prompt lists forbidden tools by name",
112
- scorer: ({ output }) => {
113
- try {
114
- const prompt = JSON.parse(String(output)) as CompactionPrompt;
115
- return scoreForbiddenToolsPresent(prompt);
116
- } catch (error) {
117
- return {
118
- score: 0,
119
- message: `Failed to parse prompt: ${error}`,
120
- };
121
- }
122
- },
123
- });
124
-
125
- /**
126
- * Post-Compaction Discipline Scorer
127
- *
128
- * Validates that the FIRST suggested tool is correct.
129
- * Score: 1.0 if first tool is swarm_status or inbox, 0.0 otherwise
130
- */
131
- export const postCompactionDiscipline = createScorer({
132
- name: "Post-Compaction Discipline",
133
- description: "First suggested tool is swarm_status or inbox",
134
- scorer: ({ output }) => {
135
- try {
136
- const prompt = JSON.parse(String(output)) as CompactionPrompt;
137
- return scorePostCompactionDiscipline(prompt);
138
- } catch (error) {
139
- return {
140
- score: 0,
141
- message: `Failed to parse prompt: ${error}`,
142
- };
143
- }
144
- },
145
- });