opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.env +2 -0
  2. package/.hive/eval-results.json +26 -0
  3. package/.hive/issues.jsonl +20 -5
  4. package/.hive/memories.jsonl +35 -1
  5. package/.opencode/eval-history.jsonl +12 -0
  6. package/.turbo/turbo-build.log +4 -4
  7. package/.turbo/turbo-test.log +319 -319
  8. package/CHANGELOG.md +258 -0
  9. package/README.md +50 -0
  10. package/bin/swarm.test.ts +475 -0
  11. package/bin/swarm.ts +385 -208
  12. package/dist/compaction-hook.d.ts +1 -1
  13. package/dist/compaction-hook.d.ts.map +1 -1
  14. package/dist/compaction-prompt-scoring.d.ts +124 -0
  15. package/dist/compaction-prompt-scoring.d.ts.map +1 -0
  16. package/dist/eval-capture.d.ts +81 -1
  17. package/dist/eval-capture.d.ts.map +1 -1
  18. package/dist/eval-gates.d.ts +84 -0
  19. package/dist/eval-gates.d.ts.map +1 -0
  20. package/dist/eval-history.d.ts +117 -0
  21. package/dist/eval-history.d.ts.map +1 -0
  22. package/dist/eval-learning.d.ts +216 -0
  23. package/dist/eval-learning.d.ts.map +1 -0
  24. package/dist/hive.d.ts +59 -0
  25. package/dist/hive.d.ts.map +1 -1
  26. package/dist/index.d.ts +87 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +823 -131
  29. package/dist/plugin.js +655 -131
  30. package/dist/post-compaction-tracker.d.ts +133 -0
  31. package/dist/post-compaction-tracker.d.ts.map +1 -0
  32. package/dist/swarm-decompose.d.ts +30 -0
  33. package/dist/swarm-decompose.d.ts.map +1 -1
  34. package/dist/swarm-orchestrate.d.ts +23 -0
  35. package/dist/swarm-orchestrate.d.ts.map +1 -1
  36. package/dist/swarm-prompts.d.ts +25 -1
  37. package/dist/swarm-prompts.d.ts.map +1 -1
  38. package/dist/swarm.d.ts +19 -0
  39. package/dist/swarm.d.ts.map +1 -1
  40. package/evals/README.md +595 -94
  41. package/evals/compaction-prompt.eval.ts +149 -0
  42. package/evals/coordinator-behavior.eval.ts +8 -8
  43. package/evals/fixtures/compaction-prompt-cases.ts +305 -0
  44. package/evals/lib/compaction-loader.test.ts +248 -0
  45. package/evals/lib/compaction-loader.ts +320 -0
  46. package/evals/lib/data-loader.test.ts +345 -0
  47. package/evals/lib/data-loader.ts +107 -6
  48. package/evals/scorers/compaction-prompt-scorers.ts +145 -0
  49. package/evals/scorers/compaction-scorers.ts +13 -13
  50. package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
  51. package/evals/scorers/coordinator-discipline.ts +13 -13
  52. package/examples/plugin-wrapper-template.ts +177 -8
  53. package/package.json +7 -2
  54. package/scripts/migrate-unknown-sessions.ts +349 -0
  55. package/src/compaction-capture.integration.test.ts +257 -0
  56. package/src/compaction-hook.test.ts +139 -2
  57. package/src/compaction-hook.ts +113 -2
  58. package/src/compaction-prompt-scorers.test.ts +299 -0
  59. package/src/compaction-prompt-scoring.ts +298 -0
  60. package/src/eval-capture.test.ts +422 -0
  61. package/src/eval-capture.ts +94 -2
  62. package/src/eval-gates.test.ts +306 -0
  63. package/src/eval-gates.ts +218 -0
  64. package/src/eval-history.test.ts +508 -0
  65. package/src/eval-history.ts +214 -0
  66. package/src/eval-learning.test.ts +378 -0
  67. package/src/eval-learning.ts +360 -0
  68. package/src/index.ts +61 -1
  69. package/src/post-compaction-tracker.test.ts +251 -0
  70. package/src/post-compaction-tracker.ts +237 -0
  71. package/src/swarm-decompose.test.ts +40 -47
  72. package/src/swarm-decompose.ts +2 -2
  73. package/src/swarm-orchestrate.test.ts +270 -7
  74. package/src/swarm-orchestrate.ts +100 -13
  75. package/src/swarm-prompts.test.ts +121 -0
  76. package/src/swarm-prompts.ts +297 -4
  77. package/src/swarm-research.integration.test.ts +157 -0
  78. package/src/swarm-review.ts +3 -3
  79. /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/dist/index.js CHANGED
@@ -22178,6 +22178,383 @@ Codebase context considered: ${args.codebase_context.slice(0, 200)}...`;
22178
22178
  };
22179
22179
  });
22180
22180
 
22181
+ // src/eval-capture.ts
22182
+ var exports_eval_capture = {};
22183
+ __export(exports_eval_capture, {
22184
+ updateEvalRecord: () => updateEvalRecord,
22185
+ saveSession: () => saveSession,
22186
+ readSessionEvents: () => readSessionEvents,
22187
+ readPartialRecords: () => readPartialRecords,
22188
+ readEvalRecords: () => readEvalRecords,
22189
+ getSessionPath: () => getSessionPath,
22190
+ getSessionDir: () => getSessionDir,
22191
+ getEvalDataStats: () => getEvalDataStats,
22192
+ getEvalDataPath: () => getEvalDataPath,
22193
+ finalizeEvalRecord: () => finalizeEvalRecord,
22194
+ exportForEvalite: () => exportForEvalite,
22195
+ ensureSessionDir: () => ensureSessionDir,
22196
+ ensureEvalDataDir: () => ensureEvalDataDir,
22197
+ captureSubtaskOutcome: () => captureSubtaskOutcome,
22198
+ captureHumanFeedback: () => captureHumanFeedback,
22199
+ captureDecomposition: () => captureDecomposition,
22200
+ captureCoordinatorEvent: () => captureCoordinatorEvent,
22201
+ captureCompactionEvent: () => captureCompactionEvent,
22202
+ appendEvalRecord: () => appendEvalRecord,
22203
+ SubtaskOutcomeSchema: () => SubtaskOutcomeSchema,
22204
+ EvalRecordSchema: () => EvalRecordSchema,
22205
+ DEFAULT_EVAL_DATA_PATH: () => DEFAULT_EVAL_DATA_PATH,
22206
+ CoordinatorSessionSchema: () => CoordinatorSessionSchema,
22207
+ CoordinatorEventSchema: () => CoordinatorEventSchema
22208
+ });
22209
+ import * as fs from "node:fs";
22210
+ import * as os from "node:os";
22211
+ import * as path from "node:path";
22212
+ function getEvalDataPath(projectPath) {
22213
+ return path.join(projectPath, DEFAULT_EVAL_DATA_PATH);
22214
+ }
22215
+ function ensureEvalDataDir(projectPath) {
22216
+ const evalPath = getEvalDataPath(projectPath);
22217
+ const dir = path.dirname(evalPath);
22218
+ if (!fs.existsSync(dir)) {
22219
+ fs.mkdirSync(dir, { recursive: true });
22220
+ }
22221
+ }
22222
+ function appendEvalRecord(projectPath, record2) {
22223
+ ensureEvalDataDir(projectPath);
22224
+ const evalPath = getEvalDataPath(projectPath);
22225
+ const line = `${JSON.stringify(record2)}
22226
+ `;
22227
+ fs.appendFileSync(evalPath, line, "utf-8");
22228
+ }
22229
+ function readEvalRecords(projectPath) {
22230
+ const evalPath = getEvalDataPath(projectPath);
22231
+ if (!fs.existsSync(evalPath)) {
22232
+ return [];
22233
+ }
22234
+ const content = fs.readFileSync(evalPath, "utf-8");
22235
+ const lines = content.trim().split(`
22236
+ `).filter(Boolean);
22237
+ return lines.map((line) => {
22238
+ const parsed = JSON.parse(line);
22239
+ return EvalRecordSchema.parse(parsed);
22240
+ });
22241
+ }
22242
+ function readPartialRecords(projectPath) {
22243
+ const evalPath = getEvalDataPath(projectPath);
22244
+ if (!fs.existsSync(evalPath)) {
22245
+ return [];
22246
+ }
22247
+ const content = fs.readFileSync(evalPath, "utf-8");
22248
+ const lines = content.trim().split(`
22249
+ `).filter(Boolean);
22250
+ return lines.map((line) => JSON.parse(line));
22251
+ }
22252
+ function updateEvalRecord(projectPath, id, updates) {
22253
+ const records = readPartialRecords(projectPath);
22254
+ const index = records.findIndex((r) => r.id === id);
22255
+ if (index === -1) {
22256
+ return false;
22257
+ }
22258
+ records[index] = { ...records[index], ...updates };
22259
+ const evalPath = getEvalDataPath(projectPath);
22260
+ const content = `${records.map((r) => JSON.stringify(r)).join(`
22261
+ `)}
22262
+ `;
22263
+ fs.writeFileSync(evalPath, content, "utf-8");
22264
+ return true;
22265
+ }
22266
+ function captureDecomposition(params) {
22267
+ const record2 = {
22268
+ id: params.epicId,
22269
+ timestamp: new Date().toISOString(),
22270
+ project_path: params.projectPath,
22271
+ task: params.task,
22272
+ context: params.context,
22273
+ strategy: params.strategy,
22274
+ subtask_count: params.subtasks.length,
22275
+ epic_title: params.epicTitle,
22276
+ epic_description: params.epicDescription,
22277
+ subtasks: params.subtasks,
22278
+ outcomes: []
22279
+ };
22280
+ inProgressRecords.set(params.epicId, record2);
22281
+ appendEvalRecord(params.projectPath, record2);
22282
+ return record2;
22283
+ }
22284
+ function captureSubtaskOutcome(params) {
22285
+ const outcome = {
22286
+ bead_id: params.beadId,
22287
+ title: params.title,
22288
+ planned_files: params.plannedFiles,
22289
+ actual_files: params.actualFiles,
22290
+ duration_ms: params.durationMs,
22291
+ error_count: params.errorCount,
22292
+ retry_count: params.retryCount,
22293
+ success: params.success,
22294
+ failure_mode: params.failureMode
22295
+ };
22296
+ const record2 = inProgressRecords.get(params.epicId);
22297
+ if (record2) {
22298
+ record2.outcomes = record2.outcomes || [];
22299
+ record2.outcomes.push(outcome);
22300
+ }
22301
+ updateEvalRecord(params.projectPath, params.epicId, {
22302
+ outcomes: record2?.outcomes
22303
+ });
22304
+ }
22305
+ function finalizeEvalRecord(params) {
22306
+ const record2 = inProgressRecords.get(params.epicId);
22307
+ if (!record2 || !record2.outcomes || record2.outcomes.length === 0) {
22308
+ return null;
22309
+ }
22310
+ const outcomes = record2.outcomes;
22311
+ const overallSuccess = outcomes.every((o) => o.success);
22312
+ const totalDurationMs = outcomes.reduce((sum, o) => sum + o.duration_ms, 0);
22313
+ const totalErrors = outcomes.reduce((sum, o) => sum + o.error_count, 0);
22314
+ const allPlannedFiles = record2.subtasks?.flatMap((s) => s.files) || [];
22315
+ const fileOccurrences = new Map;
22316
+ for (const file2 of allPlannedFiles) {
22317
+ fileOccurrences.set(file2, (fileOccurrences.get(file2) || 0) + 1);
22318
+ }
22319
+ const fileOverlapCount = Array.from(fileOccurrences.values()).filter((count) => count > 1).length;
22320
+ const plannedFileSet = new Set(allPlannedFiles);
22321
+ const actualFileSet = new Set(outcomes.flatMap((o) => o.actual_files));
22322
+ const scopeAccuracy = plannedFileSet.size > 0 ? actualFileSet.size / plannedFileSet.size : 1;
22323
+ const durations = outcomes.map((o) => o.duration_ms).filter((d) => d > 0);
22324
+ const timeBalanceRatio = durations.length > 1 ? Math.max(...durations) / Math.min(...durations) : 1;
22325
+ const finalRecord = {
22326
+ ...record2,
22327
+ overall_success: overallSuccess,
22328
+ total_duration_ms: totalDurationMs,
22329
+ total_errors: totalErrors,
22330
+ file_overlap_count: fileOverlapCount,
22331
+ scope_accuracy: scopeAccuracy,
22332
+ time_balance_ratio: timeBalanceRatio
22333
+ };
22334
+ updateEvalRecord(params.projectPath, params.epicId, finalRecord);
22335
+ inProgressRecords.delete(params.epicId);
22336
+ return finalRecord;
22337
+ }
22338
+ function captureHumanFeedback(params) {
22339
+ updateEvalRecord(params.projectPath, params.epicId, {
22340
+ human_accepted: params.accepted,
22341
+ human_modified: params.modified,
22342
+ human_notes: params.notes
22343
+ });
22344
+ }
22345
+ function exportForEvalite(projectPath) {
22346
+ const records = readEvalRecords(projectPath);
22347
+ return records.filter((r) => r.outcomes && r.outcomes.length > 0).map((record2) => ({
22348
+ input: {
22349
+ task: record2.task,
22350
+ context: record2.context
22351
+ },
22352
+ expected: {
22353
+ minSubtasks: 2,
22354
+ subtaskCount: record2.subtask_count,
22355
+ requiredFiles: record2.subtasks.flatMap((s) => s.files),
22356
+ overallSuccess: record2.overall_success
22357
+ },
22358
+ actual: record2
22359
+ }));
22360
+ }
22361
+ function getEvalDataStats(projectPath) {
22362
+ const records = readEvalRecords(projectPath);
22363
+ const complete = records.filter((r) => r.outcomes && r.outcomes.length > 0);
22364
+ if (complete.length === 0) {
22365
+ return {
22366
+ totalRecords: records.length,
22367
+ completeRecords: 0,
22368
+ successRate: 0,
22369
+ avgSubtasks: 0,
22370
+ avgDurationMs: 0,
22371
+ avgScopeAccuracy: 0,
22372
+ avgTimeBalance: 0
22373
+ };
22374
+ }
22375
+ const successCount = complete.filter((r) => r.overall_success).length;
22376
+ const avgSubtasks = complete.reduce((sum, r) => sum + (r.outcomes?.length || 0), 0) / complete.length;
22377
+ const avgDurationMs = complete.reduce((sum, r) => sum + (r.total_duration_ms || 0), 0) / complete.length;
22378
+ const avgScopeAccuracy = complete.reduce((sum, r) => sum + (r.scope_accuracy || 1), 0) / complete.length;
22379
+ const avgTimeBalance = complete.reduce((sum, r) => sum + (r.time_balance_ratio || 1), 0) / complete.length;
22380
+ return {
22381
+ totalRecords: records.length,
22382
+ completeRecords: complete.length,
22383
+ successRate: successCount / complete.length,
22384
+ avgSubtasks,
22385
+ avgDurationMs,
22386
+ avgScopeAccuracy,
22387
+ avgTimeBalance
22388
+ };
22389
+ }
22390
+ function getSessionDir() {
22391
+ return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
22392
+ }
22393
+ function getSessionPath(sessionId) {
22394
+ return path.join(getSessionDir(), `${sessionId}.jsonl`);
22395
+ }
22396
+ function ensureSessionDir() {
22397
+ const sessionDir = getSessionDir();
22398
+ if (!fs.existsSync(sessionDir)) {
22399
+ fs.mkdirSync(sessionDir, { recursive: true });
22400
+ }
22401
+ }
22402
+ function captureCoordinatorEvent(event) {
22403
+ CoordinatorEventSchema.parse(event);
22404
+ ensureSessionDir();
22405
+ const sessionPath = getSessionPath(event.session_id);
22406
+ const line = `${JSON.stringify(event)}
22407
+ `;
22408
+ fs.appendFileSync(sessionPath, line, "utf-8");
22409
+ }
22410
+ function captureCompactionEvent(params) {
22411
+ const event = {
22412
+ session_id: params.session_id,
22413
+ epic_id: params.epic_id,
22414
+ timestamp: new Date().toISOString(),
22415
+ event_type: "COMPACTION",
22416
+ compaction_type: params.compaction_type,
22417
+ payload: params.payload
22418
+ };
22419
+ captureCoordinatorEvent(event);
22420
+ }
22421
+ function readSessionEvents(sessionId) {
22422
+ const sessionPath = getSessionPath(sessionId);
22423
+ if (!fs.existsSync(sessionPath)) {
22424
+ return [];
22425
+ }
22426
+ const content = fs.readFileSync(sessionPath, "utf-8");
22427
+ const lines = content.trim().split(`
22428
+ `).filter(Boolean);
22429
+ return lines.map((line) => {
22430
+ const parsed = JSON.parse(line);
22431
+ return CoordinatorEventSchema.parse(parsed);
22432
+ });
22433
+ }
22434
+ function saveSession(params) {
22435
+ const events = readSessionEvents(params.session_id);
22436
+ if (events.length === 0) {
22437
+ return null;
22438
+ }
22439
+ const timestamps = events.map((e) => new Date(e.timestamp).getTime());
22440
+ const startTime = new Date(Math.min(...timestamps)).toISOString();
22441
+ const endTime = new Date(Math.max(...timestamps)).toISOString();
22442
+ const session = {
22443
+ session_id: params.session_id,
22444
+ epic_id: params.epic_id,
22445
+ start_time: startTime,
22446
+ end_time: endTime,
22447
+ events
22448
+ };
22449
+ return session;
22450
+ }
22451
+ var SubtaskOutcomeSchema, EvalRecordSchema, CoordinatorEventSchema, CoordinatorSessionSchema, DEFAULT_EVAL_DATA_PATH = ".opencode/eval-data.jsonl", inProgressRecords;
22452
+ var init_eval_capture = __esm(() => {
22453
+ init_zod();
22454
+ SubtaskOutcomeSchema = exports_external.object({
22455
+ bead_id: exports_external.string(),
22456
+ title: exports_external.string(),
22457
+ planned_files: exports_external.array(exports_external.string()),
22458
+ actual_files: exports_external.array(exports_external.string()),
22459
+ duration_ms: exports_external.number().int().min(0),
22460
+ error_count: exports_external.number().int().min(0),
22461
+ retry_count: exports_external.number().int().min(0),
22462
+ success: exports_external.boolean(),
22463
+ failure_mode: exports_external.string().optional()
22464
+ });
22465
+ EvalRecordSchema = exports_external.object({
22466
+ id: exports_external.string(),
22467
+ timestamp: exports_external.string(),
22468
+ project_path: exports_external.string(),
22469
+ task: exports_external.string(),
22470
+ context: exports_external.string().optional(),
22471
+ strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
22472
+ subtask_count: exports_external.number().int().min(1),
22473
+ epic_title: exports_external.string(),
22474
+ epic_description: exports_external.string().optional(),
22475
+ subtasks: exports_external.array(exports_external.object({
22476
+ title: exports_external.string(),
22477
+ description: exports_external.string().optional(),
22478
+ files: exports_external.array(exports_external.string()),
22479
+ dependencies: exports_external.array(exports_external.number()).optional(),
22480
+ estimated_complexity: exports_external.number().int().min(1).max(5).optional()
22481
+ })),
22482
+ outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
22483
+ overall_success: exports_external.boolean().optional(),
22484
+ total_duration_ms: exports_external.number().int().min(0).optional(),
22485
+ total_errors: exports_external.number().int().min(0).optional(),
22486
+ human_accepted: exports_external.boolean().optional(),
22487
+ human_modified: exports_external.boolean().optional(),
22488
+ human_notes: exports_external.string().optional(),
22489
+ file_overlap_count: exports_external.number().int().min(0).optional(),
22490
+ scope_accuracy: exports_external.number().min(0).max(2).optional(),
22491
+ time_balance_ratio: exports_external.number().min(1).optional()
22492
+ });
22493
+ CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
22494
+ exports_external.object({
22495
+ session_id: exports_external.string(),
22496
+ epic_id: exports_external.string(),
22497
+ timestamp: exports_external.string(),
22498
+ event_type: exports_external.literal("DECISION"),
22499
+ decision_type: exports_external.enum([
22500
+ "strategy_selected",
22501
+ "worker_spawned",
22502
+ "review_completed",
22503
+ "decomposition_complete"
22504
+ ]),
22505
+ payload: exports_external.any()
22506
+ }),
22507
+ exports_external.object({
22508
+ session_id: exports_external.string(),
22509
+ epic_id: exports_external.string(),
22510
+ timestamp: exports_external.string(),
22511
+ event_type: exports_external.literal("VIOLATION"),
22512
+ violation_type: exports_external.enum([
22513
+ "coordinator_edited_file",
22514
+ "coordinator_ran_tests",
22515
+ "coordinator_reserved_files",
22516
+ "no_worker_spawned"
22517
+ ]),
22518
+ payload: exports_external.any()
22519
+ }),
22520
+ exports_external.object({
22521
+ session_id: exports_external.string(),
22522
+ epic_id: exports_external.string(),
22523
+ timestamp: exports_external.string(),
22524
+ event_type: exports_external.literal("OUTCOME"),
22525
+ outcome_type: exports_external.enum([
22526
+ "subtask_success",
22527
+ "subtask_retry",
22528
+ "subtask_failed",
22529
+ "epic_complete"
22530
+ ]),
22531
+ payload: exports_external.any()
22532
+ }),
22533
+ exports_external.object({
22534
+ session_id: exports_external.string(),
22535
+ epic_id: exports_external.string(),
22536
+ timestamp: exports_external.string(),
22537
+ event_type: exports_external.literal("COMPACTION"),
22538
+ compaction_type: exports_external.enum([
22539
+ "detection_complete",
22540
+ "prompt_generated",
22541
+ "context_injected",
22542
+ "resumption_started",
22543
+ "tool_call_tracked"
22544
+ ]),
22545
+ payload: exports_external.any()
22546
+ })
22547
+ ]);
22548
+ CoordinatorSessionSchema = exports_external.object({
22549
+ session_id: exports_external.string(),
22550
+ epic_id: exports_external.string(),
22551
+ start_time: exports_external.string(),
22552
+ end_time: exports_external.string().optional(),
22553
+ events: exports_external.array(CoordinatorEventSchema)
22554
+ });
22555
+ inProgressRecords = new Map;
22556
+ });
22557
+
22181
22558
  // src/learning.ts
22182
22559
  var exports_learning = {};
22183
22560
  __export(exports_learning, {
@@ -39409,6 +39786,71 @@ var hive_ready = tool({
39409
39786
  }
39410
39787
  }
39411
39788
  });
39789
+ var hive_cells = tool({
39790
+ description: `Query cells from the hive database with flexible filtering.
39791
+
39792
+ USE THIS TOOL TO:
39793
+ - List all open cells: hive_cells()
39794
+ - Find cells by status: hive_cells({ status: "in_progress" })
39795
+ - Find cells by type: hive_cells({ type: "bug" })
39796
+ - Get a specific cell by partial ID: hive_cells({ id: "mjkmd" })
39797
+ - Get the next ready (unblocked) cell: hive_cells({ ready: true })
39798
+ - Combine filters: hive_cells({ status: "open", type: "task" })
39799
+
39800
+ RETURNS: Array of cells with id, title, status, priority, type, parent_id, created_at, updated_at
39801
+
39802
+ PREFER THIS OVER hive_query when you need to:
39803
+ - See what work is available
39804
+ - Check status of multiple cells
39805
+ - Find cells matching criteria
39806
+ - Look up a cell by partial ID`,
39807
+ args: {
39808
+ id: tool.schema.string().optional().describe("Partial or full cell ID to look up"),
39809
+ status: tool.schema.enum(["open", "in_progress", "blocked", "closed"]).optional().describe("Filter by status"),
39810
+ type: tool.schema.enum(["task", "bug", "feature", "epic", "chore"]).optional().describe("Filter by type"),
39811
+ ready: tool.schema.boolean().optional().describe("If true, return only the next unblocked cell"),
39812
+ limit: tool.schema.number().optional().describe("Max cells to return (default 20)")
39813
+ },
39814
+ async execute(args, ctx) {
39815
+ const projectKey = getHiveWorkingDirectory();
39816
+ const adapter = await getHiveAdapter(projectKey);
39817
+ try {
39818
+ if (args.id) {
39819
+ const fullId = await resolvePartialId(adapter, projectKey, args.id) || args.id;
39820
+ const cell = await adapter.getCell(projectKey, fullId);
39821
+ if (!cell) {
39822
+ throw new HiveError(`No cell found matching ID '${args.id}'`, "hive_cells");
39823
+ }
39824
+ const formatted2 = formatCellForOutput(cell);
39825
+ return JSON.stringify([formatted2], null, 2);
39826
+ }
39827
+ if (args.ready) {
39828
+ const ready = await adapter.getNextReadyCell(projectKey);
39829
+ if (!ready) {
39830
+ return JSON.stringify([], null, 2);
39831
+ }
39832
+ const formatted2 = formatCellForOutput(ready);
39833
+ return JSON.stringify([formatted2], null, 2);
39834
+ }
39835
+ const cells = await adapter.queryCells(projectKey, {
39836
+ status: args.status,
39837
+ type: args.type,
39838
+ limit: args.limit || 20
39839
+ });
39840
+ const formatted = cells.map((c) => formatCellForOutput(c));
39841
+ return JSON.stringify(formatted, null, 2);
39842
+ } catch (error45) {
39843
+ const message = error45 instanceof Error ? error45.message : String(error45);
39844
+ if (message.includes("Ambiguous hash")) {
39845
+ throw new HiveError(`Ambiguous ID '${args.id}': multiple cells match. Please provide more characters.`, "hive_cells");
39846
+ }
39847
+ if (message.includes("Bead not found") || message.includes("Cell not found")) {
39848
+ throw new HiveError(`No cell found matching ID '${args.id || "unknown"}'`, "hive_cells");
39849
+ }
39850
+ throw new HiveError(`Failed to query cells: ${message}`, "hive_cells");
39851
+ }
39852
+ }
39853
+ });
39412
39854
  var hive_sync = tool({
39413
39855
  description: "Sync hive to git and push (MANDATORY at session end)",
39414
39856
  args: {
@@ -39550,6 +39992,7 @@ var hiveTools = {
39550
39992
  hive_close,
39551
39993
  hive_start,
39552
39994
  hive_ready,
39995
+ hive_cells,
39553
39996
  hive_sync,
39554
39997
  hive_link_thread
39555
39998
  };
@@ -41846,122 +42289,7 @@ init_swarm_strategies();
41846
42289
  init_dist();
41847
42290
  init_zod();
41848
42291
  init_swarm_strategies();
41849
-
41850
- // src/eval-capture.ts
41851
- init_zod();
41852
- import * as fs from "node:fs";
41853
- import * as os from "node:os";
41854
- import * as path from "node:path";
41855
- var SubtaskOutcomeSchema = exports_external.object({
41856
- bead_id: exports_external.string(),
41857
- title: exports_external.string(),
41858
- planned_files: exports_external.array(exports_external.string()),
41859
- actual_files: exports_external.array(exports_external.string()),
41860
- duration_ms: exports_external.number().int().min(0),
41861
- error_count: exports_external.number().int().min(0),
41862
- retry_count: exports_external.number().int().min(0),
41863
- success: exports_external.boolean(),
41864
- failure_mode: exports_external.string().optional()
41865
- });
41866
- var EvalRecordSchema = exports_external.object({
41867
- id: exports_external.string(),
41868
- timestamp: exports_external.string(),
41869
- project_path: exports_external.string(),
41870
- task: exports_external.string(),
41871
- context: exports_external.string().optional(),
41872
- strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
41873
- subtask_count: exports_external.number().int().min(1),
41874
- epic_title: exports_external.string(),
41875
- epic_description: exports_external.string().optional(),
41876
- subtasks: exports_external.array(exports_external.object({
41877
- title: exports_external.string(),
41878
- description: exports_external.string().optional(),
41879
- files: exports_external.array(exports_external.string()),
41880
- dependencies: exports_external.array(exports_external.number()).optional(),
41881
- estimated_complexity: exports_external.number().int().min(1).max(5).optional()
41882
- })),
41883
- outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
41884
- overall_success: exports_external.boolean().optional(),
41885
- total_duration_ms: exports_external.number().int().min(0).optional(),
41886
- total_errors: exports_external.number().int().min(0).optional(),
41887
- human_accepted: exports_external.boolean().optional(),
41888
- human_modified: exports_external.boolean().optional(),
41889
- human_notes: exports_external.string().optional(),
41890
- file_overlap_count: exports_external.number().int().min(0).optional(),
41891
- scope_accuracy: exports_external.number().min(0).max(2).optional(),
41892
- time_balance_ratio: exports_external.number().min(1).optional()
41893
- });
41894
- var CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
41895
- exports_external.object({
41896
- session_id: exports_external.string(),
41897
- epic_id: exports_external.string(),
41898
- timestamp: exports_external.string(),
41899
- event_type: exports_external.literal("DECISION"),
41900
- decision_type: exports_external.enum([
41901
- "strategy_selected",
41902
- "worker_spawned",
41903
- "review_completed",
41904
- "decomposition_complete"
41905
- ]),
41906
- payload: exports_external.any()
41907
- }),
41908
- exports_external.object({
41909
- session_id: exports_external.string(),
41910
- epic_id: exports_external.string(),
41911
- timestamp: exports_external.string(),
41912
- event_type: exports_external.literal("VIOLATION"),
41913
- violation_type: exports_external.enum([
41914
- "coordinator_edited_file",
41915
- "coordinator_ran_tests",
41916
- "coordinator_reserved_files",
41917
- "no_worker_spawned"
41918
- ]),
41919
- payload: exports_external.any()
41920
- }),
41921
- exports_external.object({
41922
- session_id: exports_external.string(),
41923
- epic_id: exports_external.string(),
41924
- timestamp: exports_external.string(),
41925
- event_type: exports_external.literal("OUTCOME"),
41926
- outcome_type: exports_external.enum([
41927
- "subtask_success",
41928
- "subtask_retry",
41929
- "subtask_failed",
41930
- "epic_complete"
41931
- ]),
41932
- payload: exports_external.any()
41933
- })
41934
- ]);
41935
- var CoordinatorSessionSchema = exports_external.object({
41936
- session_id: exports_external.string(),
41937
- epic_id: exports_external.string(),
41938
- start_time: exports_external.string(),
41939
- end_time: exports_external.string().optional(),
41940
- events: exports_external.array(CoordinatorEventSchema)
41941
- });
41942
- var inProgressRecords = new Map;
41943
- function getSessionDir() {
41944
- return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
41945
- }
41946
- function getSessionPath(sessionId) {
41947
- return path.join(getSessionDir(), `${sessionId}.jsonl`);
41948
- }
41949
- function ensureSessionDir() {
41950
- const sessionDir = getSessionDir();
41951
- if (!fs.existsSync(sessionDir)) {
41952
- fs.mkdirSync(sessionDir, { recursive: true });
41953
- }
41954
- }
41955
- function captureCoordinatorEvent(event) {
41956
- CoordinatorEventSchema.parse(event);
41957
- ensureSessionDir();
41958
- const sessionPath = getSessionPath(event.session_id);
41959
- const line = `${JSON.stringify(event)}
41960
- `;
41961
- fs.appendFileSync(sessionPath, line, "utf-8");
41962
- }
41963
-
41964
- // src/swarm-decompose.ts
42292
+ init_eval_capture();
41965
42293
  var DECOMPOSITION_PROMPT = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
41966
42294
 
41967
42295
  ## Task
@@ -42279,9 +42607,14 @@ ${fullContext}` : `## Additional Context
42279
42607
  }
42280
42608
  });
42281
42609
  var swarm_validate_decomposition = tool({
42282
- description: "Validate a decomposition response against CellTreeSchema",
42610
+ description: "Validate a decomposition response against CellTreeSchema and capture for eval",
42283
42611
  args: {
42284
- response: tool.schema.string().describe("JSON response from agent (CellTree format)")
42612
+ response: tool.schema.string().describe("JSON response from agent (CellTree format)"),
42613
+ project_path: tool.schema.string().optional().describe("Project path for eval capture"),
42614
+ task: tool.schema.string().optional().describe("Original task description for eval capture"),
42615
+ context: tool.schema.string().optional().describe("Context provided for decomposition"),
42616
+ strategy: tool.schema.enum(["file-based", "feature-based", "risk-based", "auto"]).optional().describe("Decomposition strategy used"),
42617
+ epic_id: tool.schema.string().optional().describe("Epic ID for eval capture")
42285
42618
  },
42286
42619
  async execute(args) {
42287
42620
  try {
@@ -42315,6 +42648,29 @@ var swarm_validate_decomposition = tool({
42315
42648
  }
42316
42649
  }
42317
42650
  const instructionConflicts = detectInstructionConflicts(validated.subtasks);
42651
+ if (args.project_path && args.task && args.strategy && args.epic_id) {
42652
+ try {
42653
+ const { captureDecomposition: captureDecomposition2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
42654
+ captureDecomposition2({
42655
+ epicId: args.epic_id,
42656
+ projectPath: args.project_path,
42657
+ task: args.task,
42658
+ context: args.context,
42659
+ strategy: args.strategy,
42660
+ epicTitle: validated.epic.title,
42661
+ epicDescription: validated.epic.description,
42662
+ subtasks: validated.subtasks.map((s) => ({
42663
+ title: s.title,
42664
+ description: s.description,
42665
+ files: s.files,
42666
+ dependencies: s.dependencies,
42667
+ estimated_complexity: s.estimated_complexity
42668
+ }))
42669
+ });
42670
+ } catch (error45) {
42671
+ console.warn("[swarm_validate_decomposition] Failed to capture decomposition:", error45);
42672
+ }
42673
+ }
42318
42674
  return JSON.stringify({
42319
42675
  valid: true,
42320
42676
  cell_tree: validated,
@@ -42355,7 +42711,7 @@ var swarm_delegate_planning = tool({
42355
42711
  strategy: tool.schema.enum(["auto", "file-based", "feature-based", "risk-based"]).optional().default("auto").describe("Decomposition strategy (default: auto-detect)"),
42356
42712
  query_cass: tool.schema.boolean().optional().default(true).describe("Query CASS for similar past tasks (default: true)")
42357
42713
  },
42358
- async execute(args) {
42714
+ async execute(args, _ctx) {
42359
42715
  const { selectStrategy: selectStrategy2, formatStrategyGuidelines: formatStrategyGuidelines2 } = await Promise.resolve().then(() => (init_swarm_strategies(), exports_swarm_strategies));
42360
42716
  const { formatMemoryQueryForDecomposition: formatMemoryQueryForDecomposition2 } = await Promise.resolve().then(() => (init_learning(), exports_learning));
42361
42717
  const { listSkills: listSkills2, getSkillsContextForSwarm: getSkillsContextForSwarm2, findRelevantSkills: findRelevantSkills2 } = await Promise.resolve().then(() => (init_skills(), exports_skills));
@@ -42371,7 +42727,7 @@ var swarm_delegate_planning = tool({
42371
42727
  }
42372
42728
  try {
42373
42729
  captureCoordinatorEvent({
42374
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
42730
+ session_id: _ctx.sessionID || "unknown",
42375
42731
  epic_id: "planning",
42376
42732
  timestamp: new Date().toISOString(),
42377
42733
  event_type: "DECISION",
@@ -44481,6 +44837,7 @@ var worktreeTools = {
44481
44837
  init_dist();
44482
44838
  init_zod();
44483
44839
  import { sendSwarmMessage as sendSwarmMessage2 } from "swarm-mail";
44840
+ init_eval_capture();
44484
44841
  var ReviewIssueSchema = exports_external.object({
44485
44842
  file: exports_external.string(),
44486
44843
  line: exports_external.number().optional(),
@@ -44705,7 +45062,7 @@ var swarm_review_feedback = tool({
44705
45062
  summary: exports_external.string().optional().describe("Review summary"),
44706
45063
  issues: exports_external.string().optional().describe("JSON array of ReviewIssue objects (for needs_changes)")
44707
45064
  },
44708
- async execute(args) {
45065
+ async execute(args, _ctx) {
44709
45066
  let parsedIssues = [];
44710
45067
  if (args.issues) {
44711
45068
  try {
@@ -44728,7 +45085,7 @@ var swarm_review_feedback = tool({
44728
45085
  markReviewApproved(args.task_id);
44729
45086
  try {
44730
45087
  captureCoordinatorEvent({
44731
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
45088
+ session_id: _ctx.sessionID || "unknown",
44732
45089
  epic_id: epicId,
44733
45090
  timestamp: new Date().toISOString(),
44734
45091
  event_type: "DECISION",
@@ -44766,7 +45123,7 @@ You may now complete the task with \`swarm_complete\`.`,
44766
45123
  const remaining = MAX_REVIEW_ATTEMPTS - attemptNumber;
44767
45124
  try {
44768
45125
  captureCoordinatorEvent({
44769
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
45126
+ session_id: _ctx.sessionID || "unknown",
44770
45127
  epic_id: epicId,
44771
45128
  timestamp: new Date().toISOString(),
44772
45129
  event_type: "DECISION",
@@ -44837,6 +45194,7 @@ var reviewTools = {
44837
45194
  };
44838
45195
 
44839
45196
  // src/swarm-orchestrate.ts
45197
+ init_eval_capture();
44840
45198
  function generateWorkerHandoff(params) {
44841
45199
  const handoff = {
44842
45200
  contract: {
@@ -45737,10 +46095,29 @@ Files touched: ${args.files_touched?.join(", ") || "none recorded"}`,
45737
46095
  reason: "No files_owned contract found (non-epic subtask or decomposition event missing)"
45738
46096
  }
45739
46097
  };
46098
+ try {
46099
+ const { captureSubtaskOutcome: captureSubtaskOutcome2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
46100
+ const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
46101
+ const evalEpicId = cell.parent_id || epicId2;
46102
+ captureSubtaskOutcome2({
46103
+ epicId: evalEpicId,
46104
+ projectPath: args.project_key,
46105
+ beadId: args.bead_id,
46106
+ title: cell.title,
46107
+ plannedFiles: args.planned_files || [],
46108
+ actualFiles: args.files_touched || [],
46109
+ durationMs: durationMs2,
46110
+ errorCount: args.error_count || 0,
46111
+ retryCount: args.retry_count || 0,
46112
+ success: true
46113
+ });
46114
+ } catch (error45) {
46115
+ console.warn("[swarm_complete] Failed to capture subtask outcome:", error45);
46116
+ }
45740
46117
  try {
45741
46118
  const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
45742
46119
  captureCoordinatorEvent({
45743
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
46120
+ session_id: _ctx.sessionID || "unknown",
45744
46121
  epic_id: epicId2,
45745
46122
  timestamp: new Date().toISOString(),
45746
46123
  event_type: "OUTCOME",
@@ -45822,7 +46199,7 @@ ${errorStack.slice(0, 1000)}
45822
46199
  try {
45823
46200
  const durationMs = args.start_time ? Date.now() - args.start_time : 0;
45824
46201
  captureCoordinatorEvent({
45825
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
46202
+ session_id: _ctx.sessionID || "unknown",
45826
46203
  epic_id: epicId,
45827
46204
  timestamp: new Date().toISOString(),
45828
46205
  event_type: "OUTCOME",
@@ -45889,7 +46266,9 @@ var swarm_record_outcome = tool({
45889
46266
  "user_cancelled",
45890
46267
  "unknown"
45891
46268
  ]).optional().describe("Failure classification (only when success=false). Auto-classified if not provided."),
45892
- failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)")
46269
+ failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)"),
46270
+ project_path: tool.schema.string().optional().describe("Project path (for finalizing eval records when all subtasks complete)"),
46271
+ epic_id: tool.schema.string().optional().describe("Epic ID (for finalizing eval records when all subtasks complete)")
45893
46272
  },
45894
46273
  async execute(args) {
45895
46274
  const signals = {
@@ -45911,6 +46290,18 @@ var swarm_record_outcome = tool({
45911
46290
  const validated = OutcomeSignalsSchema.parse(signals);
45912
46291
  const scored = scoreImplicitFeedback(validated, DEFAULT_LEARNING_CONFIG);
45913
46292
  const errorStats = await globalErrorAccumulator.getErrorStats(args.bead_id);
46293
+ let finalizedRecord = null;
46294
+ if (args.project_path && args.epic_id) {
46295
+ try {
46296
+ const { finalizeEvalRecord: finalizeEvalRecord2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
46297
+ finalizedRecord = finalizeEvalRecord2({
46298
+ epicId: args.epic_id,
46299
+ projectPath: args.project_path
46300
+ });
46301
+ } catch (error45) {
46302
+ console.warn("[swarm_record_outcome] Failed to finalize eval record:", error45);
46303
+ }
46304
+ }
45914
46305
  const criteriaToScore = args.criteria ?? [
45915
46306
  "type_safe",
45916
46307
  "no_bugs",
@@ -45952,6 +46343,7 @@ var swarm_record_outcome = tool({
45952
46343
  accumulated_errors: errorStats.total,
45953
46344
  unresolved_errors: errorStats.unresolved
45954
46345
  },
46346
+ finalized_eval_record: finalizedRecord || undefined,
45955
46347
  note: "Feedback events should be stored for criterion weight calculation. Use learning.ts functions to apply weights."
45956
46348
  }, null, 2);
45957
46349
  }
@@ -45983,12 +46375,31 @@ async function runResearchPhase(task, projectPath, options2) {
45983
46375
  if (techStack.length === 0) {
45984
46376
  return {
45985
46377
  tech_stack: [],
46378
+ spawn_instructions: [],
45986
46379
  summaries: {},
45987
46380
  memory_ids: []
45988
46381
  };
45989
46382
  }
46383
+ const spawnInstructions = [];
46384
+ for (const tech of techStack) {
46385
+ const researchId = `research-${tech}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
46386
+ const prompt = formatResearcherPrompt({
46387
+ research_id: researchId,
46388
+ epic_id: "standalone-research",
46389
+ tech_stack: [tech],
46390
+ project_path: projectPath,
46391
+ check_upgrades: options2?.checkUpgrades ?? false
46392
+ });
46393
+ spawnInstructions.push({
46394
+ research_id: researchId,
46395
+ tech,
46396
+ prompt,
46397
+ subagent_type: "swarm/researcher"
46398
+ });
46399
+ }
45990
46400
  return {
45991
46401
  tech_stack: techStack,
46402
+ spawn_instructions: spawnInstructions,
45992
46403
  summaries: {},
45993
46404
  memory_ids: []
45994
46405
  };
@@ -46425,6 +46836,7 @@ var orchestrateTools = {
46425
46836
  };
46426
46837
 
46427
46838
  // src/swarm-prompts.ts
46839
+ init_eval_capture();
46428
46840
  var STRATEGY_DECOMPOSITION_PROMPT2 = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
46429
46841
 
46430
46842
  ## Task
@@ -47221,7 +47633,7 @@ var swarm_spawn_subtask = tool({
47221
47633
  }).optional().describe("Recovery context from checkpoint compaction"),
47222
47634
  model: tool.schema.string().optional().describe("Optional explicit model override (auto-selected if not provided)")
47223
47635
  },
47224
- async execute(args) {
47636
+ async execute(args, _ctx) {
47225
47637
  const prompt = formatSubtaskPromptV2({
47226
47638
  bead_id: args.bead_id,
47227
47639
  epic_id: args.epic_id,
@@ -47250,7 +47662,7 @@ var swarm_spawn_subtask = tool({
47250
47662
  const postCompletionInstructions = COORDINATOR_POST_WORKER_CHECKLIST.replace(/{project_key}/g, args.project_path || "$PWD").replace(/{epic_id}/g, args.epic_id).replace(/{task_id}/g, args.bead_id).replace(/{files_touched}/g, filesJoined).replace(/{worker_id}/g, "worker");
47251
47663
  try {
47252
47664
  captureCoordinatorEvent({
47253
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
47665
+ session_id: _ctx.sessionID || "unknown",
47254
47666
  epic_id: args.epic_id,
47255
47667
  timestamp: new Date().toISOString(),
47256
47668
  event_type: "DECISION",
@@ -63171,6 +63583,7 @@ function createMetrics(result, toolName) {
63171
63583
  }
63172
63584
 
63173
63585
  // src/planning-guardrails.ts
63586
+ init_eval_capture();
63174
63587
  var FILE_MODIFICATION_PATTERNS = [
63175
63588
  /\bimplement\b/i,
63176
63589
  /\bcreate\b.*\.(ts|js|tsx|jsx|py|rs|go|java|rb|swift|kt)/i,
@@ -63456,9 +63869,21 @@ function getLog() {
63456
63869
  }
63457
63870
  return _logger;
63458
63871
  }
63459
- var SWARM_COMPACTION_CONTEXT = `## \uD83D\uDC1D SWARM ACTIVE - You Are The COORDINATOR
63872
+ var SWARM_COMPACTION_CONTEXT = `
63873
+ ┌─────────────────────────────────────────────────────────────┐
63874
+ │ │
63875
+ │ \uD83D\uDC1D YOU ARE THE COORDINATOR \uD83D\uDC1D │
63876
+ │ │
63877
+ │ NOT A WORKER. NOT AN IMPLEMENTER. │
63878
+ │ YOU ORCHESTRATE. │
63879
+ │ │
63880
+ └─────────────────────────────────────────────────────────────┘
63881
+
63882
+ ## \uD83C\uDFAF NON-NEGOTIABLE: YOU ARE THE COORDINATOR
63883
+
63884
+ Context was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**
63460
63885
 
63461
- Context was compacted but the swarm is still running. You are the **COORDINATOR**.
63886
+ Your role is ORCHESTRATION, not implementation. When you catch yourself about to do work directly, STOP.
63462
63887
 
63463
63888
  ### ⛔ NEVER DO THESE (Coordinator Anti-Patterns)
63464
63889
 
@@ -63469,9 +63894,27 @@ Context was compacted but the swarm is still running. You are the **COORDINATOR*
63469
63894
  - ❌ **NEVER** implement features yourself - SPAWN A WORKER
63470
63895
  - ❌ **NEVER** "just do it myself to save time" - NO. SPAWN A WORKER.
63471
63896
  - ❌ **NEVER** reserve files with \`swarmmail_reserve\` - Workers reserve files
63897
+ - ❌ **NEVER** fetch files/docs directly - SPAWN A RESEARCHER
63472
63898
 
63473
63899
  **If you catch yourself about to edit a file, STOP. Use \`swarm_spawn_subtask\` instead.**
63474
63900
 
63901
+ ### \uD83D\uDEAB FORBIDDEN TOOLS (Coordinators MUST delegate these)
63902
+
63903
+ **NEVER use these tools directly. ALWAYS spawn a researcher worker via \`swarm_spawn_researcher\`:**
63904
+
63905
+ **Repository fetching:**
63906
+ - \`repo-crawl_file\`, \`repo-crawl_readme\`, \`repo-crawl_search\`, \`repo-crawl_structure\`, \`repo-crawl_tree\`
63907
+ - \`repo-autopsy_*\` (all repo-autopsy tools)
63908
+
63909
+ **Web/documentation fetching:**
63910
+ - \`webfetch\`, \`fetch_fetch\`
63911
+ - \`context7_resolve-library-id\`, \`context7_get-library-docs\`
63912
+
63913
+ **Knowledge base:**
63914
+ - \`pdf-brain_search\`, \`pdf-brain_read\`
63915
+
63916
+ **If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
63917
+
63475
63918
  ### ✅ ALWAYS DO THESE (Coordinator Checklist)
63476
63919
 
63477
63920
  On resume, execute this checklist IN ORDER:
@@ -63521,6 +63964,87 @@ Extract from session context:
63521
63964
  - **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
63522
63965
  - **Close the loop** - When all subtasks done, verify and close the epic
63523
63966
 
63967
+ **You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
63968
+
63969
+ ---
63970
+
63971
+ ## \uD83D\uDCCB FULL COORDINATOR WORKFLOW (Reference)
63972
+
63973
+ You are ALWAYS swarming. Here is the complete workflow for any new work:
63974
+
63975
+ ### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
63976
+
63977
+ **If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
63978
+
63979
+ \`\`\`
63980
+ swarm_spawn_researcher(
63981
+ research_id="research-<topic>",
63982
+ epic_id="<epic-id>",
63983
+ tech_stack=["<technology>"],
63984
+ project_path="<path>"
63985
+ )
63986
+ // Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
63987
+ \`\`\`
63988
+
63989
+ ### Phase 2: Knowledge Gathering
63990
+
63991
+ \`\`\`
63992
+ semantic-memory_find(query="<task keywords>", limit=5) # Past learnings
63993
+ cass_search(query="<task description>", limit=5) # Similar past tasks
63994
+ skills_list() # Available skills
63995
+ \`\`\`
63996
+
63997
+ ### Phase 3: Decompose
63998
+
63999
+ \`\`\`
64000
+ swarm_select_strategy(task="<task>")
64001
+ swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
64002
+ swarm_validate_decomposition(response="<CellTree JSON>")
64003
+ \`\`\`
64004
+
64005
+ ### Phase 4: Create Cells
64006
+
64007
+ \`hive_create_epic(epic_title="<task>", subtasks=[...])\`
64008
+
64009
+ ### Phase 5: DO NOT Reserve Files
64010
+
64011
+ > **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
64012
+
64013
+ ### Phase 6: Spawn Workers
64014
+
64015
+ \`\`\`
64016
+ swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
64017
+ Task(subagent_type="swarm/worker", prompt="<from above>")
64018
+ \`\`\`
64019
+
64020
+ ### Phase 7: MANDATORY Review Loop
64021
+
64022
+ **AFTER EVERY Task() RETURNS:**
64023
+
64024
+ 1. \`swarmmail_inbox()\` - Check for messages
64025
+ 2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
64026
+ 3. Evaluate against epic goals
64027
+ 4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
64028
+
64029
+ **If needs_changes:**
64030
+ \`\`\`
64031
+ swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
64032
+ // Spawn NEW worker with Task() using retry prompt
64033
+ // Max 3 attempts before marking task blocked
64034
+ \`\`\`
64035
+
64036
+ ### Phase 8: Complete
64037
+
64038
+ \`hive_sync()\` - Sync all cells to git
64039
+
64040
+ ## Strategy Reference
64041
+
64042
+ | Strategy | Best For | Keywords |
64043
+ | -------------- | ------------------------ | -------------------------------------- |
64044
+ | file-based | Refactoring, migrations | refactor, migrate, rename, update all |
64045
+ | feature-based | New features | add, implement, build, create, feature |
64046
+ | risk-based | Bug fixes, security | fix, bug, security, critical, urgent |
64047
+
63524
64048
  **You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
63525
64049
  `;
63526
64050
  var SWARM_DETECTION_FALLBACK = `## \uD83D\uDC1D Swarm Detection - Check Your Context
@@ -64458,6 +64982,161 @@ async function resetStorage() {
64458
64982
 
64459
64983
  // src/index.ts
64460
64984
  init_skills();
64985
+
64986
+ // src/eval-history.ts
64987
+ import * as fs2 from "node:fs";
64988
+ import * as path3 from "node:path";
64989
+ var DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
64990
+ var VARIANCE_THRESHOLD = 0.1;
64991
+ var BOOTSTRAP_THRESHOLD = 10;
64992
+ var STABILIZATION_THRESHOLD = 50;
64993
+ function getEvalHistoryPath(projectPath) {
64994
+ return path3.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
64995
+ }
64996
+ function ensureEvalHistoryDir(projectPath) {
64997
+ const historyPath = getEvalHistoryPath(projectPath);
64998
+ const dir = path3.dirname(historyPath);
64999
+ if (!fs2.existsSync(dir)) {
65000
+ fs2.mkdirSync(dir, { recursive: true });
65001
+ }
65002
+ }
65003
+ function recordEvalRun(projectPath, run) {
65004
+ ensureEvalHistoryDir(projectPath);
65005
+ const historyPath = getEvalHistoryPath(projectPath);
65006
+ const line = `${JSON.stringify(run)}
65007
+ `;
65008
+ fs2.appendFileSync(historyPath, line, "utf-8");
65009
+ }
65010
+ function readAllRecords(projectPath) {
65011
+ const historyPath = getEvalHistoryPath(projectPath);
65012
+ if (!fs2.existsSync(historyPath)) {
65013
+ return [];
65014
+ }
65015
+ const content = fs2.readFileSync(historyPath, "utf-8");
65016
+ const lines = content.trim().split(`
65017
+ `).filter(Boolean);
65018
+ return lines.map((line) => JSON.parse(line));
65019
+ }
65020
+ function getScoreHistory(projectPath, evalName) {
65021
+ return readAllRecords(projectPath).filter((run) => run.eval_name === evalName);
65022
+ }
65023
+ function calculateVariance(scores) {
65024
+ if (scores.length <= 1) {
65025
+ return 0;
65026
+ }
65027
+ const mean = scores.reduce((sum2, score) => sum2 + score, 0) / scores.length;
65028
+ const variance5 = scores.reduce((sum2, score) => {
65029
+ const deviation = score - mean;
65030
+ return sum2 + deviation * deviation;
65031
+ }, 0) / scores.length;
65032
+ return variance5;
65033
+ }
65034
+ function getPhase(projectPath, evalName) {
65035
+ const history = getScoreHistory(projectPath, evalName);
65036
+ if (history.length < BOOTSTRAP_THRESHOLD) {
65037
+ return "bootstrap";
65038
+ }
65039
+ if (history.length <= STABILIZATION_THRESHOLD) {
65040
+ return "stabilization";
65041
+ }
65042
+ const scores = history.map((run) => run.score);
65043
+ const variance5 = calculateVariance(scores);
65044
+ if (variance5 < VARIANCE_THRESHOLD) {
65045
+ return "production";
65046
+ }
65047
+ return "stabilization";
65048
+ }
65049
+ // src/eval-gates.ts
65050
+ var DEFAULT_THRESHOLDS = {
65051
+ stabilization: 0.1,
65052
+ production: 0.05
65053
+ };
65054
+ function calculateBaseline(history, currentScore) {
65055
+ if (history.length === 0) {
65056
+ return currentScore;
65057
+ }
65058
+ return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
65059
+ }
65060
+ function calculateRegression(baseline, currentScore) {
65061
+ if (baseline === 0) {
65062
+ return 0;
65063
+ }
65064
+ return (baseline - currentScore) / baseline;
65065
+ }
65066
+ function formatRegressionMessage(regressionPercent, baseline, currentScore) {
65067
+ return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
65068
+ }
65069
+ function checkGate(projectPath, evalName, currentScore, config2) {
65070
+ const thresholds = {
65071
+ stabilization: config2?.stabilizationThreshold ?? DEFAULT_THRESHOLDS.stabilization,
65072
+ production: config2?.productionThreshold ?? DEFAULT_THRESHOLDS.production
65073
+ };
65074
+ const phase = getPhase(projectPath, evalName);
65075
+ const history = getScoreHistory(projectPath, evalName);
65076
+ if (phase === "bootstrap") {
65077
+ return {
65078
+ passed: true,
65079
+ phase: "bootstrap",
65080
+ message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
65081
+ currentScore
65082
+ };
65083
+ }
65084
+ const baseline = calculateBaseline(history, currentScore);
65085
+ const regressionPercent = calculateRegression(baseline, currentScore);
65086
+ const regressionMsg = formatRegressionMessage(regressionPercent, baseline, currentScore);
65087
+ if (phase === "stabilization") {
65088
+ if (regressionPercent > thresholds.stabilization) {
65089
+ return {
65090
+ passed: true,
65091
+ phase: "stabilization",
65092
+ message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
65093
+ baseline,
65094
+ currentScore,
65095
+ regressionPercent
65096
+ };
65097
+ }
65098
+ if (history.length > 50) {
65099
+ const scores = history.map((run) => run.score);
65100
+ const variance5 = calculateVariance(scores);
65101
+ return {
65102
+ passed: true,
65103
+ phase: "stabilization",
65104
+ message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance5.toFixed(3)}) prevents production phase.`,
65105
+ baseline,
65106
+ currentScore,
65107
+ regressionPercent
65108
+ };
65109
+ }
65110
+ return {
65111
+ passed: true,
65112
+ phase: "stabilization",
65113
+ message: `Stabilization phase: ${regressionMsg} - acceptable`,
65114
+ baseline,
65115
+ currentScore,
65116
+ regressionPercent
65117
+ };
65118
+ }
65119
+ if (regressionPercent > thresholds.production) {
65120
+ return {
65121
+ passed: false,
65122
+ phase: "production",
65123
+ message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
65124
+ baseline,
65125
+ currentScore,
65126
+ regressionPercent
65127
+ };
65128
+ }
65129
+ return {
65130
+ passed: true,
65131
+ phase: "production",
65132
+ message: `Production phase: ${regressionMsg} - acceptable`,
65133
+ baseline,
65134
+ currentScore,
65135
+ regressionPercent
65136
+ };
65137
+ }
65138
+
65139
+ // src/index.ts
64461
65140
  var SwarmPlugin = async (input) => {
64462
65141
  const { $, directory, client } = input;
64463
65142
  setHiveWorkingDirectory(directory);
@@ -64524,7 +65203,7 @@ var SwarmPlugin = async (input) => {
64524
65203
  if (isInCoordinatorContext()) {
64525
65204
  const ctx = getCoordinatorContext();
64526
65205
  const violation = detectCoordinatorViolation({
64527
- sessionId: ctx.sessionId || "unknown",
65206
+ sessionId: input2.sessionID || "unknown",
64528
65207
  epicId: ctx.epicId || "unknown",
64529
65208
  toolName,
64530
65209
  toolArgs: output.args,
@@ -64638,6 +65317,7 @@ export {
64638
65317
  researchTools,
64639
65318
  requireTool,
64640
65319
  repoCrawlTools,
65320
+ recordEvalRun,
64641
65321
  parseFrontmatter,
64642
65322
  migrateBeadsToHive,
64643
65323
  mergeHistoricBeads,
@@ -64668,6 +65348,7 @@ export {
64668
65348
  hive_create_epic,
64669
65349
  hive_create,
64670
65350
  hive_close,
65351
+ hive_cells,
64671
65352
  hiveTools,
64672
65353
  guardrailOutput,
64673
65354
  groupByTransition,
@@ -64677,12 +65358,15 @@ export {
64677
65358
  getStatusChanges,
64678
65359
  getSkillsContextForSwarm,
64679
65360
  getSkill,
65361
+ getScoreHistory,
64680
65362
  getSchemaByName,
65363
+ getPhase,
64681
65364
  getMandateStorage,
64682
65365
  getLogger,
64683
65366
  getInstalledVersions,
64684
65367
  getHiveWorkingDirectory,
64685
65368
  getHiveAdapter,
65369
+ getEvalHistoryPath,
64686
65370
  getCellIdFromEvent,
64687
65371
  getBeadsWorkingDirectory,
64688
65372
  getBeadsAdapter,
@@ -64700,6 +65384,7 @@ export {
64700
65384
  evaluatePromotion,
64701
65385
  evaluateBatchPromotions,
64702
65386
  ensureHiveDirectory,
65387
+ ensureEvalHistoryDir,
64703
65388
  discoverSkills,
64704
65389
  discoverDocTools,
64705
65390
  src_default as default,
@@ -64715,8 +65400,10 @@ export {
64715
65400
  createAgentMailError,
64716
65401
  clearSessionState,
64717
65402
  checkTool,
65403
+ checkGate,
64718
65404
  checkBeadsMigrationNeeded,
64719
65405
  checkAllTools,
65406
+ calculateVariance,
64720
65407
  beads_update,
64721
65408
  beads_sync,
64722
65409
  beads_start,
@@ -64738,6 +65425,7 @@ export {
64738
65425
  VoteTypeSchema,
64739
65426
  VoteSchema,
64740
65427
  ValidationResultSchema,
65428
+ VARIANCE_THRESHOLD,
64741
65429
  UpdateSwarmContextArgsSchema,
64742
65430
  TaskDecompositionSchema,
64743
65431
  SwarmStrategySchema,
@@ -64757,6 +65445,7 @@ export {
64757
65445
  SWARM_COMPACTION_CONTEXT,
64758
65446
  SUBTASK_PROMPT_V2,
64759
65447
  STRATEGIES,
65448
+ STABILIZATION_THRESHOLD,
64760
65449
  RepoCrawlError,
64761
65450
  QuerySwarmContextsArgsSchema,
64762
65451
  QueryMandatesArgsSchema,
@@ -64779,10 +65468,12 @@ export {
64779
65468
  DecompositionError,
64780
65469
  DecomposedSubtaskSchema,
64781
65470
  DecomposeArgsSchema,
65471
+ DEFAULT_THRESHOLDS,
64782
65472
  DEFAULT_STORAGE_CONFIG,
64783
65473
  DEFAULT_MANDATE_STORAGE_CONFIG,
64784
65474
  DEFAULT_MANDATE_DECAY_CONFIG,
64785
65475
  DEFAULT_GUARDRAIL_CONFIG,
65476
+ DEFAULT_EVAL_HISTORY_PATH,
64786
65477
  DEFAULT_CRITERIA,
64787
65478
  CriterionEvaluationSchema,
64788
65479
  CreateSwarmContextArgsSchema,
@@ -64850,6 +65541,7 @@ export {
64850
65541
  BeadAssignedEventSchema,
64851
65542
  BaseCellEventSchema,
64852
65543
  BaseBeadEventSchema,
65544
+ BOOTSTRAP_THRESHOLD,
64853
65545
  AgentProgressSchema,
64854
65546
  AgentMailNotInitializedError,
64855
65547
  AgentMailError