opencode-swarm-plugin 0.39.1 → 0.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +331 -0
- package/.hive/analysis/session-data-quality-audit.md +320 -0
- package/.hive/eval-results.json +481 -24
- package/.hive/issues.jsonl +76 -11
- package/.hive/memories.jsonl +159 -1
- package/.opencode/eval-history.jsonl +315 -0
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +207 -0
- package/README.md +2 -0
- package/SCORER-ANALYSIS.md +598 -0
- package/bin/eval-gate.test.ts +158 -0
- package/bin/eval-gate.ts +74 -0
- package/bin/swarm.test.ts +1054 -719
- package/bin/swarm.ts +577 -0
- package/dist/compaction-hook.d.ts +10 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-observability.d.ts +173 -0
- package/dist/compaction-observability.d.ts.map +1 -0
- package/dist/compaction-prompt-scoring.d.ts +1 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -1
- package/dist/eval-capture.d.ts +93 -0
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-runner.d.ts +134 -0
- package/dist/eval-runner.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +65 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +84043 -28070
- package/dist/memory-tools.d.ts +70 -2
- package/dist/memory-tools.d.ts.map +1 -1
- package/dist/memory.d.ts +37 -0
- package/dist/memory.d.ts.map +1 -1
- package/dist/observability-tools.d.ts +64 -0
- package/dist/observability-tools.d.ts.map +1 -1
- package/dist/plugin.js +83570 -27466
- package/dist/schemas/task.d.ts +3 -3
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +32 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +353 -0
- package/evals/ARCHITECTURE.md +1189 -0
- package/evals/README.md +113 -0
- package/evals/example.eval.ts +3 -4
- package/evals/fixtures/compaction-prompt-cases.ts +6 -0
- package/evals/scorers/coordinator-discipline.evalite-test.ts +163 -0
- package/evals/scorers/coordinator-discipline.ts +82 -2
- package/evals/scorers/index.test.ts +146 -0
- package/evals/scorers/index.ts +104 -0
- package/evals/swarm-decomposition.eval.ts +13 -4
- package/examples/commands/swarm.md +291 -21
- package/package.json +4 -3
- package/src/compaction-hook.ts +258 -110
- package/src/compaction-observability.integration.test.ts +139 -0
- package/src/compaction-observability.test.ts +187 -0
- package/src/compaction-observability.ts +324 -0
- package/src/compaction-prompt-scorers.test.ts +10 -9
- package/src/compaction-prompt-scoring.ts +7 -5
- package/src/eval-capture.test.ts +204 -1
- package/src/eval-capture.ts +194 -2
- package/src/eval-runner.test.ts +223 -0
- package/src/eval-runner.ts +402 -0
- package/src/hive.ts +57 -22
- package/src/index.ts +54 -1
- package/src/memory-tools.test.ts +84 -0
- package/src/memory-tools.ts +68 -3
- package/src/memory.test.ts +2 -2
- package/src/memory.ts +122 -49
- package/src/observability-tools.test.ts +13 -0
- package/src/observability-tools.ts +277 -0
- package/src/swarm-orchestrate.test.ts +162 -0
- package/src/swarm-orchestrate.ts +7 -5
- package/src/swarm-prompts.test.ts +168 -4
- package/src/swarm-prompts.ts +228 -7
- package/.env +0 -2
- package/.turbo/turbo-test.log +0 -481
- package/.turbo/turbo-typecheck.log +0 -1
- package/dist/beads.d.ts +0 -386
- package/dist/beads.d.ts.map +0 -1
- package/dist/schemas/bead-events.d.ts +0 -698
- package/dist/schemas/bead-events.d.ts.map +0 -1
- package/dist/schemas/bead.d.ts +0 -255
- package/dist/schemas/bead.d.ts.map +0 -1
package/src/swarm-prompts.ts
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import { tool } from "@opencode-ai/plugin";
|
|
16
16
|
import { generateWorkerHandoff } from "./swarm-orchestrate";
|
|
17
17
|
import { captureCoordinatorEvent } from "./eval-capture.js";
|
|
18
|
+
import { getMemoryAdapter } from "./memory-tools.js";
|
|
18
19
|
|
|
19
20
|
// ============================================================================
|
|
20
21
|
// Prompt Templates
|
|
@@ -1097,6 +1098,209 @@ For each criterion, assess passed/failed and provide brief feedback:
|
|
|
1097
1098
|
If any criterion fails, the overall evaluation fails and retry_suggestion
|
|
1098
1099
|
should describe what needs to be fixed.`;
|
|
1099
1100
|
|
|
1101
|
+
// ============================================================================
|
|
1102
|
+
// Eval Failure Learning Integration
|
|
1103
|
+
// ============================================================================
|
|
1104
|
+
|
|
1105
|
+
/**
|
|
1106
|
+
* Query recent eval failures from semantic memory
|
|
1107
|
+
*
|
|
1108
|
+
* Coordinators call this at session start to learn from recent eval regressions.
|
|
1109
|
+
* Returns formatted string for injection into coordinator prompts.
|
|
1110
|
+
*
|
|
1111
|
+
* @returns Formatted string of recent failures (empty if none or memory unavailable)
|
|
1112
|
+
*/
|
|
1113
|
+
export async function getRecentEvalFailures(): Promise<string> {
|
|
1114
|
+
try {
|
|
1115
|
+
const adapter = await getMemoryAdapter();
|
|
1116
|
+
|
|
1117
|
+
// Query memories for eval failures
|
|
1118
|
+
const result = await adapter.find({
|
|
1119
|
+
query: "eval-failure regression coordinator",
|
|
1120
|
+
limit: 3,
|
|
1121
|
+
});
|
|
1122
|
+
|
|
1123
|
+
if (result.count === 0) {
|
|
1124
|
+
return "";
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
const lines = result.results.map((f) => `- ${f.content.slice(0, 200)}...`);
|
|
1128
|
+
|
|
1129
|
+
return `
|
|
1130
|
+
## ⚠️ Recent Eval Failures (Learn From These)
|
|
1131
|
+
|
|
1132
|
+
The following eval regressions were detected recently. Avoid these patterns:
|
|
1133
|
+
|
|
1134
|
+
${lines.join("\n")}
|
|
1135
|
+
|
|
1136
|
+
**Action:** Review these failures and ensure your coordination avoids similar issues.
|
|
1137
|
+
`;
|
|
1138
|
+
} catch (e) {
|
|
1139
|
+
// Best effort - don't fail if memory unavailable
|
|
1140
|
+
console.warn("Failed to query eval failures:", e);
|
|
1141
|
+
return "";
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
// ============================================================================
|
|
1146
|
+
// Prompt Insights Integration
|
|
1147
|
+
// ============================================================================
|
|
1148
|
+
|
|
1149
|
+
interface PromptInsightsOptions {
|
|
1150
|
+
role: "coordinator" | "worker";
|
|
1151
|
+
project_key?: string;
|
|
1152
|
+
files?: string[];
|
|
1153
|
+
domain?: string;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
/**
|
|
1157
|
+
* Get swarm insights for prompt injection
|
|
1158
|
+
*
|
|
1159
|
+
* Queries recent swarm outcomes and semantic memory to surface:
|
|
1160
|
+
* - Strategy success rates
|
|
1161
|
+
* - Common failure modes
|
|
1162
|
+
* - Anti-patterns
|
|
1163
|
+
* - File/domain-specific learnings
|
|
1164
|
+
*
|
|
1165
|
+
* Returns formatted string for injection into coordinator or worker prompts.
|
|
1166
|
+
*
|
|
1167
|
+
* @param options - Role and filters for insights
|
|
1168
|
+
* @returns Formatted insights string (empty if no data or errors)
|
|
1169
|
+
*/
|
|
1170
|
+
export async function getPromptInsights(
|
|
1171
|
+
options: PromptInsightsOptions,
|
|
1172
|
+
): Promise<string> {
|
|
1173
|
+
try {
|
|
1174
|
+
if (options.role === "coordinator") {
|
|
1175
|
+
return await getCoordinatorInsights(options.project_key);
|
|
1176
|
+
} else {
|
|
1177
|
+
return await getWorkerInsights(options.files, options.domain);
|
|
1178
|
+
}
|
|
1179
|
+
} catch (e) {
|
|
1180
|
+
// Best effort - don't fail if data unavailable
|
|
1181
|
+
console.warn("Failed to query prompt insights:", e);
|
|
1182
|
+
return "";
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
/**
|
|
1187
|
+
* Get coordinator-specific insights (strategy stats, anti-patterns)
|
|
1188
|
+
*/
|
|
1189
|
+
async function getCoordinatorInsights(project_key?: string): Promise<string> {
|
|
1190
|
+
try {
|
|
1191
|
+
// Import swarm-mail and analytics
|
|
1192
|
+
const { createLibSQLAdapter, createSwarmMailAdapter, strategySuccessRates } = await import("swarm-mail");
|
|
1193
|
+
|
|
1194
|
+
// Create libSQL database adapter
|
|
1195
|
+
const dbAdapter = await createLibSQLAdapter({ url: "file:./.swarm-mail/streams.db" });
|
|
1196
|
+
|
|
1197
|
+
// Create swarm-mail adapter with database
|
|
1198
|
+
const adapter = createSwarmMailAdapter(dbAdapter, project_key || "default");
|
|
1199
|
+
|
|
1200
|
+
// Get database for raw queries
|
|
1201
|
+
const db = await adapter.getDatabase();
|
|
1202
|
+
|
|
1203
|
+
// Query strategy success rates
|
|
1204
|
+
const query = strategySuccessRates({ project_key });
|
|
1205
|
+
const result = await db.query(query.sql, Object.values(query.parameters || {}));
|
|
1206
|
+
|
|
1207
|
+
if (!result || !result.rows || result.rows.length === 0) {
|
|
1208
|
+
return "";
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
// Format as markdown table
|
|
1212
|
+
const rows = result.rows.map((r: any) => {
|
|
1213
|
+
const strategy = r.strategy || "unknown";
|
|
1214
|
+
const total = r.total_attempts || 0;
|
|
1215
|
+
const successRate = r.success_rate || 0;
|
|
1216
|
+
const emoji = successRate >= 80 ? "✅" : successRate >= 60 ? "⚠️" : "❌";
|
|
1217
|
+
|
|
1218
|
+
return `| ${emoji} ${strategy} | ${successRate.toFixed(1)}% | ${total} |`;
|
|
1219
|
+
});
|
|
1220
|
+
|
|
1221
|
+
// Limit to top 5 strategies to prevent context bloat
|
|
1222
|
+
const topRows = rows.slice(0, 5);
|
|
1223
|
+
|
|
1224
|
+
// Add anti-pattern hints for low-success strategies
|
|
1225
|
+
const antiPatterns = result.rows
|
|
1226
|
+
.filter((r: any) => r.success_rate < 60)
|
|
1227
|
+
.map((r: any) => `- AVOID: ${r.strategy} strategy (${r.success_rate.toFixed(1)}% success rate)`)
|
|
1228
|
+
.slice(0, 3);
|
|
1229
|
+
|
|
1230
|
+
const antiPatternsSection = antiPatterns.length > 0
|
|
1231
|
+
? `\n\n**Anti-Patterns:**\n${antiPatterns.join("\n")}`
|
|
1232
|
+
: "";
|
|
1233
|
+
|
|
1234
|
+
return `
|
|
1235
|
+
## 📊 Swarm Insights (Strategy Success Rates)
|
|
1236
|
+
|
|
1237
|
+
| Strategy | Success Rate | Total Attempts |
|
|
1238
|
+
|----------|--------------|----------------|
|
|
1239
|
+
${topRows.join("\n")}
|
|
1240
|
+
|
|
1241
|
+
**Use these insights to select decomposition strategies.**${antiPatternsSection}
|
|
1242
|
+
`;
|
|
1243
|
+
} catch (e) {
|
|
1244
|
+
console.warn("Failed to get coordinator insights:", e);
|
|
1245
|
+
return "";
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
/**
|
|
1250
|
+
* Get worker-specific insights (file/domain learnings, common pitfalls)
|
|
1251
|
+
*/
|
|
1252
|
+
async function getWorkerInsights(
|
|
1253
|
+
files?: string[],
|
|
1254
|
+
domain?: string,
|
|
1255
|
+
): Promise<string> {
|
|
1256
|
+
try {
|
|
1257
|
+
const adapter = await getMemoryAdapter();
|
|
1258
|
+
|
|
1259
|
+
// Build query from files and domain
|
|
1260
|
+
let query = "";
|
|
1261
|
+
if (files && files.length > 0) {
|
|
1262
|
+
// Extract domain keywords from file paths
|
|
1263
|
+
const keywords = files
|
|
1264
|
+
.flatMap((f) => f.split(/[\/\\.]/).filter((part) => part.length > 2))
|
|
1265
|
+
.slice(0, 5);
|
|
1266
|
+
query = keywords.join(" ");
|
|
1267
|
+
} else if (domain) {
|
|
1268
|
+
query = domain;
|
|
1269
|
+
} else {
|
|
1270
|
+
return ""; // No context to query
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
// Query semantic memory for relevant learnings
|
|
1274
|
+
const result = await adapter.find({
|
|
1275
|
+
query: `${query} gotcha pitfall pattern bug`,
|
|
1276
|
+
limit: 3,
|
|
1277
|
+
});
|
|
1278
|
+
|
|
1279
|
+
if (result.count === 0) {
|
|
1280
|
+
return "";
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
// Format as bullet list
|
|
1284
|
+
const learnings = result.results.map((r) => {
|
|
1285
|
+
const content = r.content.length > 150
|
|
1286
|
+
? r.content.slice(0, 150) + "..."
|
|
1287
|
+
: r.content;
|
|
1288
|
+
return `- ${content}`;
|
|
1289
|
+
});
|
|
1290
|
+
|
|
1291
|
+
return `
|
|
1292
|
+
## 💡 Relevant Learnings (from past agents)
|
|
1293
|
+
|
|
1294
|
+
${learnings.join("\n")}
|
|
1295
|
+
|
|
1296
|
+
**Check semantic-memory for full details if needed.**
|
|
1297
|
+
`;
|
|
1298
|
+
} catch (e) {
|
|
1299
|
+
console.warn("Failed to get worker insights:", e);
|
|
1300
|
+
return "";
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1100
1304
|
// ============================================================================
|
|
1101
1305
|
// Helper Functions
|
|
1102
1306
|
// ============================================================================
|
|
@@ -1140,7 +1344,7 @@ export function formatCoordinatorPrompt(params: {
|
|
|
1140
1344
|
/**
|
|
1141
1345
|
* Format the V2 subtask prompt for a specific agent
|
|
1142
1346
|
*/
|
|
1143
|
-
export function formatSubtaskPromptV2(params: {
|
|
1347
|
+
export async function formatSubtaskPromptV2(params: {
|
|
1144
1348
|
bead_id: string;
|
|
1145
1349
|
epic_id: string;
|
|
1146
1350
|
subtask_title: string;
|
|
@@ -1155,7 +1359,7 @@ export function formatSubtaskPromptV2(params: {
|
|
|
1155
1359
|
skills_to_load?: string[];
|
|
1156
1360
|
coordinator_notes?: string;
|
|
1157
1361
|
};
|
|
1158
|
-
}): string {
|
|
1362
|
+
}): Promise<string> {
|
|
1159
1363
|
const fileList =
|
|
1160
1364
|
params.files.length > 0
|
|
1161
1365
|
? params.files.map((f) => `- \`${f}\``).join("\n")
|
|
@@ -1166,6 +1370,13 @@ export function formatSubtaskPromptV2(params: {
|
|
|
1166
1370
|
: "";
|
|
1167
1371
|
|
|
1168
1372
|
const errorSection = params.error_context ? params.error_context : "";
|
|
1373
|
+
|
|
1374
|
+
// Fetch worker insights (file/domain specific learnings)
|
|
1375
|
+
const insights = await getPromptInsights({
|
|
1376
|
+
role: "worker",
|
|
1377
|
+
files: params.files,
|
|
1378
|
+
domain: params.subtask_title.split(/\s+/).slice(0, 3).join(" ") // Extract domain from title
|
|
1379
|
+
});
|
|
1169
1380
|
|
|
1170
1381
|
// Build recovery context section
|
|
1171
1382
|
let recoverySection = "";
|
|
@@ -1218,6 +1429,11 @@ export function formatSubtaskPromptV2(params: {
|
|
|
1218
1429
|
const handoffJson = JSON.stringify(handoff, null, 2);
|
|
1219
1430
|
const handoffSection = `\n## WorkerHandoff Contract\n\nThis is your machine-readable contract. The contract IS the instruction.\n\n\`\`\`json\n${handoffJson}\n\`\`\`\n`;
|
|
1220
1431
|
|
|
1432
|
+
// Inject insights into shared_context section
|
|
1433
|
+
const sharedContextWithInsights = insights
|
|
1434
|
+
? `${params.shared_context || "(none)"}\n\n${insights}`
|
|
1435
|
+
: params.shared_context || "(none)";
|
|
1436
|
+
|
|
1221
1437
|
return SUBTASK_PROMPT_V2.replace(/{bead_id}/g, params.bead_id)
|
|
1222
1438
|
.replace(/{epic_id}/g, params.epic_id)
|
|
1223
1439
|
.replace(/{project_path}/g, params.project_path || "$PWD")
|
|
@@ -1227,7 +1443,7 @@ export function formatSubtaskPromptV2(params: {
|
|
|
1227
1443
|
params.subtask_description || "(see title)",
|
|
1228
1444
|
)
|
|
1229
1445
|
.replace("{file_list}", fileList)
|
|
1230
|
-
.replace("{shared_context}",
|
|
1446
|
+
.replace("{shared_context}", sharedContextWithInsights)
|
|
1231
1447
|
.replace("{compressed_context}", compressedSection)
|
|
1232
1448
|
.replace("{error_context}", errorSection + recoverySection + handoffSection);
|
|
1233
1449
|
}
|
|
@@ -1359,7 +1575,7 @@ export const swarm_spawn_subtask = tool({
|
|
|
1359
1575
|
.describe("Optional explicit model override (auto-selected if not provided)"),
|
|
1360
1576
|
},
|
|
1361
1577
|
async execute(args, _ctx) {
|
|
1362
|
-
const prompt = formatSubtaskPromptV2({
|
|
1578
|
+
const prompt = await formatSubtaskPromptV2({
|
|
1363
1579
|
bead_id: args.bead_id,
|
|
1364
1580
|
epic_id: args.epic_id,
|
|
1365
1581
|
subtask_title: args.subtask_title,
|
|
@@ -1776,13 +1992,18 @@ export const swarm_plan_prompt = tool({
|
|
|
1776
1992
|
}
|
|
1777
1993
|
}
|
|
1778
1994
|
|
|
1995
|
+
// Fetch swarm insights (strategy success rates, anti-patterns)
|
|
1996
|
+
const insights = await getPromptInsights({ role: "coordinator" });
|
|
1997
|
+
|
|
1779
1998
|
// Format strategy guidelines
|
|
1780
1999
|
const strategyGuidelines = formatStrategyGuidelines(selectedStrategy);
|
|
1781
2000
|
|
|
1782
|
-
// Combine user context
|
|
2001
|
+
// Combine user context and insights
|
|
1783
2002
|
const contextSection = args.context
|
|
1784
|
-
? `## Additional Context\n${args.context}`
|
|
1785
|
-
:
|
|
2003
|
+
? `## Additional Context\n${args.context}\n\n${insights}`
|
|
2004
|
+
: insights
|
|
2005
|
+
? `## Additional Context\n(none provided)\n\n${insights}`
|
|
2006
|
+
: "## Additional Context\n(none provided)";
|
|
1786
2007
|
|
|
1787
2008
|
// Build the prompt (without CASS - we'll let the module handle that)
|
|
1788
2009
|
const prompt = STRATEGY_DECOMPOSITION_PROMPT.replace("{task}", args.task)
|
package/.env
DELETED