npm - opencode-swarm-plugin - Versions diffs - 0.37.0 → 0.39.1 - Mend

opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/.env +2 -0
package/.hive/eval-results.json +26 -0
package/.hive/issues.jsonl +20 -5
package/.hive/memories.jsonl +35 -1
package/.opencode/eval-history.jsonl +12 -0
package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +319 -319
package/CHANGELOG.md +258 -0
package/README.md +50 -0
package/bin/swarm.test.ts +475 -0
package/bin/swarm.ts +385 -208
package/dist/compaction-hook.d.ts +1 -1
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/compaction-prompt-scoring.d.ts +124 -0
package/dist/compaction-prompt-scoring.d.ts.map +1 -0
package/dist/eval-capture.d.ts +81 -1
package/dist/eval-capture.d.ts.map +1 -1
package/dist/eval-gates.d.ts +84 -0
package/dist/eval-gates.d.ts.map +1 -0
package/dist/eval-history.d.ts +117 -0
package/dist/eval-history.d.ts.map +1 -0
package/dist/eval-learning.d.ts +216 -0
package/dist/eval-learning.d.ts.map +1 -0
package/dist/hive.d.ts +59 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +87 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +823 -131
package/dist/plugin.js +655 -131
package/dist/post-compaction-tracker.d.ts +133 -0
package/dist/post-compaction-tracker.d.ts.map +1 -0
package/dist/swarm-decompose.d.ts +30 -0
package/dist/swarm-decompose.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts +23 -0
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +25 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm.d.ts +19 -0
package/dist/swarm.d.ts.map +1 -1
package/evals/README.md +595 -94
package/evals/compaction-prompt.eval.ts +149 -0
package/evals/coordinator-behavior.eval.ts +8 -8
package/evals/fixtures/compaction-prompt-cases.ts +305 -0
package/evals/lib/compaction-loader.test.ts +248 -0
package/evals/lib/compaction-loader.ts +320 -0
package/evals/lib/data-loader.test.ts +345 -0
package/evals/lib/data-loader.ts +107 -6
package/evals/scorers/compaction-prompt-scorers.ts +145 -0
package/evals/scorers/compaction-scorers.ts +13 -13
package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
package/evals/scorers/coordinator-discipline.ts +13 -13
package/examples/plugin-wrapper-template.ts +177 -8
package/package.json +7 -2
package/scripts/migrate-unknown-sessions.ts +349 -0
package/src/compaction-capture.integration.test.ts +257 -0
package/src/compaction-hook.test.ts +139 -2
package/src/compaction-hook.ts +113 -2
package/src/compaction-prompt-scorers.test.ts +299 -0
package/src/compaction-prompt-scoring.ts +298 -0
package/src/eval-capture.test.ts +422 -0
package/src/eval-capture.ts +94 -2
package/src/eval-gates.test.ts +306 -0
package/src/eval-gates.ts +218 -0
package/src/eval-history.test.ts +508 -0
package/src/eval-history.ts +214 -0
package/src/eval-learning.test.ts +378 -0
package/src/eval-learning.ts +360 -0
package/src/index.ts +61 -1
package/src/post-compaction-tracker.test.ts +251 -0
package/src/post-compaction-tracker.ts +237 -0
package/src/swarm-decompose.test.ts +40 -47
package/src/swarm-decompose.ts +2 -2
package/src/swarm-orchestrate.test.ts +270 -7
package/src/swarm-orchestrate.ts +100 -13
package/src/swarm-prompts.test.ts +121 -0
package/src/swarm-prompts.ts +297 -4
package/src/swarm-research.integration.test.ts +157 -0
package/src/swarm-review.ts +3 -3
/package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0

package/dist/index.js CHANGED Viewed

@@ -22178,6 +22178,383 @@ Codebase context considered: ${args.codebase_context.slice(0, 200)}...`;
   };
 });
+// src/eval-capture.ts
+var exports_eval_capture = {};
+__export(exports_eval_capture, {
+  updateEvalRecord: () => updateEvalRecord,
+  saveSession: () => saveSession,
+  readSessionEvents: () => readSessionEvents,
+  readPartialRecords: () => readPartialRecords,
+  readEvalRecords: () => readEvalRecords,
+  getSessionPath: () => getSessionPath,
+  getSessionDir: () => getSessionDir,
+  getEvalDataStats: () => getEvalDataStats,
+  getEvalDataPath: () => getEvalDataPath,
+  finalizeEvalRecord: () => finalizeEvalRecord,
+  exportForEvalite: () => exportForEvalite,
+  ensureSessionDir: () => ensureSessionDir,
+  ensureEvalDataDir: () => ensureEvalDataDir,
+  captureSubtaskOutcome: () => captureSubtaskOutcome,
+  captureHumanFeedback: () => captureHumanFeedback,
+  captureDecomposition: () => captureDecomposition,
+  captureCoordinatorEvent: () => captureCoordinatorEvent,
+  captureCompactionEvent: () => captureCompactionEvent,
+  appendEvalRecord: () => appendEvalRecord,
+  SubtaskOutcomeSchema: () => SubtaskOutcomeSchema,
+  EvalRecordSchema: () => EvalRecordSchema,
+  DEFAULT_EVAL_DATA_PATH: () => DEFAULT_EVAL_DATA_PATH,
+  CoordinatorSessionSchema: () => CoordinatorSessionSchema,
+  CoordinatorEventSchema: () => CoordinatorEventSchema
+});
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+function getEvalDataPath(projectPath) {
+  return path.join(projectPath, DEFAULT_EVAL_DATA_PATH);
+}
+function ensureEvalDataDir(projectPath) {
+  const evalPath = getEvalDataPath(projectPath);
+  const dir = path.dirname(evalPath);
+  if (!fs.existsSync(dir)) {
+    fs.mkdirSync(dir, { recursive: true });
+  }
+}
+function appendEvalRecord(projectPath, record2) {
+  ensureEvalDataDir(projectPath);
+  const evalPath = getEvalDataPath(projectPath);
+  const line = `${JSON.stringify(record2)}
+`;
+  fs.appendFileSync(evalPath, line, "utf-8");
+}
+function readEvalRecords(projectPath) {
+  const evalPath = getEvalDataPath(projectPath);
+  if (!fs.existsSync(evalPath)) {
+    return [];
+  }
+  const content = fs.readFileSync(evalPath, "utf-8");
+  const lines = content.trim().split(`
+`).filter(Boolean);
+  return lines.map((line) => {
+    const parsed = JSON.parse(line);
+    return EvalRecordSchema.parse(parsed);
+  });
+}
+function readPartialRecords(projectPath) {
+  const evalPath = getEvalDataPath(projectPath);
+  if (!fs.existsSync(evalPath)) {
+    return [];
+  }
+  const content = fs.readFileSync(evalPath, "utf-8");
+  const lines = content.trim().split(`
+`).filter(Boolean);
+  return lines.map((line) => JSON.parse(line));
+}
+function updateEvalRecord(projectPath, id, updates) {
+  const records = readPartialRecords(projectPath);
+  const index = records.findIndex((r) => r.id === id);
+  if (index === -1) {
+    return false;
+  }
+  records[index] = { ...records[index], ...updates };
+  const evalPath = getEvalDataPath(projectPath);
+  const content = `${records.map((r) => JSON.stringify(r)).join(`
+`)}
+`;
+  fs.writeFileSync(evalPath, content, "utf-8");
+  return true;
+}
+function captureDecomposition(params) {
+  const record2 = {
+    id: params.epicId,
+    timestamp: new Date().toISOString(),
+    project_path: params.projectPath,
+    task: params.task,
+    context: params.context,
+    strategy: params.strategy,
+    subtask_count: params.subtasks.length,
+    epic_title: params.epicTitle,
+    epic_description: params.epicDescription,
+    subtasks: params.subtasks,
+    outcomes: []
+  };
+  inProgressRecords.set(params.epicId, record2);
+  appendEvalRecord(params.projectPath, record2);
+  return record2;
+}
+function captureSubtaskOutcome(params) {
+  const outcome = {
+    bead_id: params.beadId,
+    title: params.title,
+    planned_files: params.plannedFiles,
+    actual_files: params.actualFiles,
+    duration_ms: params.durationMs,
+    error_count: params.errorCount,
+    retry_count: params.retryCount,
+    success: params.success,
+    failure_mode: params.failureMode
+  };
+  const record2 = inProgressRecords.get(params.epicId);
+  if (record2) {
+    record2.outcomes = record2.outcomes || [];
+    record2.outcomes.push(outcome);
+  }
+  updateEvalRecord(params.projectPath, params.epicId, {
+    outcomes: record2?.outcomes
+  });
+}
+function finalizeEvalRecord(params) {
+  const record2 = inProgressRecords.get(params.epicId);
+  if (!record2 || !record2.outcomes || record2.outcomes.length === 0) {
+    return null;
+  }
+  const outcomes = record2.outcomes;
+  const overallSuccess = outcomes.every((o) => o.success);
+  const totalDurationMs = outcomes.reduce((sum, o) => sum + o.duration_ms, 0);
+  const totalErrors = outcomes.reduce((sum, o) => sum + o.error_count, 0);
+  const allPlannedFiles = record2.subtasks?.flatMap((s) => s.files) || [];
+  const fileOccurrences = new Map;
+  for (const file2 of allPlannedFiles) {
+    fileOccurrences.set(file2, (fileOccurrences.get(file2) || 0) + 1);
+  }
+  const fileOverlapCount = Array.from(fileOccurrences.values()).filter((count) => count > 1).length;
+  const plannedFileSet = new Set(allPlannedFiles);
+  const actualFileSet = new Set(outcomes.flatMap((o) => o.actual_files));
+  const scopeAccuracy = plannedFileSet.size > 0 ? actualFileSet.size / plannedFileSet.size : 1;
+  const durations = outcomes.map((o) => o.duration_ms).filter((d) => d > 0);
+  const timeBalanceRatio = durations.length > 1 ? Math.max(...durations) / Math.min(...durations) : 1;
+  const finalRecord = {
+    ...record2,
+    overall_success: overallSuccess,
+    total_duration_ms: totalDurationMs,
+    total_errors: totalErrors,
+    file_overlap_count: fileOverlapCount,
+    scope_accuracy: scopeAccuracy,
+    time_balance_ratio: timeBalanceRatio
+  };
+  updateEvalRecord(params.projectPath, params.epicId, finalRecord);
+  inProgressRecords.delete(params.epicId);
+  return finalRecord;
+}
+function captureHumanFeedback(params) {
+  updateEvalRecord(params.projectPath, params.epicId, {
+    human_accepted: params.accepted,
+    human_modified: params.modified,
+    human_notes: params.notes
+  });
+}
+function exportForEvalite(projectPath) {
+  const records = readEvalRecords(projectPath);
+  return records.filter((r) => r.outcomes && r.outcomes.length > 0).map((record2) => ({
+    input: {
+      task: record2.task,
+      context: record2.context
+    },
+    expected: {
+      minSubtasks: 2,
+      subtaskCount: record2.subtask_count,
+      requiredFiles: record2.subtasks.flatMap((s) => s.files),
+      overallSuccess: record2.overall_success
+    },
+    actual: record2
+  }));
+}
+function getEvalDataStats(projectPath) {
+  const records = readEvalRecords(projectPath);
+  const complete = records.filter((r) => r.outcomes && r.outcomes.length > 0);
+  if (complete.length === 0) {
+    return {
+      totalRecords: records.length,
+      completeRecords: 0,
+      successRate: 0,
+      avgSubtasks: 0,
+      avgDurationMs: 0,
+      avgScopeAccuracy: 0,
+      avgTimeBalance: 0
+    };
+  }
+  const successCount = complete.filter((r) => r.overall_success).length;
+  const avgSubtasks = complete.reduce((sum, r) => sum + (r.outcomes?.length || 0), 0) / complete.length;
+  const avgDurationMs = complete.reduce((sum, r) => sum + (r.total_duration_ms || 0), 0) / complete.length;
+  const avgScopeAccuracy = complete.reduce((sum, r) => sum + (r.scope_accuracy || 1), 0) / complete.length;
+  const avgTimeBalance = complete.reduce((sum, r) => sum + (r.time_balance_ratio || 1), 0) / complete.length;
+  return {
+    totalRecords: records.length,
+    completeRecords: complete.length,
+    successRate: successCount / complete.length,
+    avgSubtasks,
+    avgDurationMs,
+    avgScopeAccuracy,
+    avgTimeBalance
+  };
+}
+function getSessionDir() {
+  return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
+}
+function getSessionPath(sessionId) {
+  return path.join(getSessionDir(), `${sessionId}.jsonl`);
+}
+function ensureSessionDir() {
+  const sessionDir = getSessionDir();
+  if (!fs.existsSync(sessionDir)) {
+    fs.mkdirSync(sessionDir, { recursive: true });
+  }
+}
+function captureCoordinatorEvent(event) {
+  CoordinatorEventSchema.parse(event);
+  ensureSessionDir();
+  const sessionPath = getSessionPath(event.session_id);
+  const line = `${JSON.stringify(event)}
+`;
+  fs.appendFileSync(sessionPath, line, "utf-8");
+}
+function captureCompactionEvent(params) {
+  const event = {
+    session_id: params.session_id,
+    epic_id: params.epic_id,
+    timestamp: new Date().toISOString(),
+    event_type: "COMPACTION",
+    compaction_type: params.compaction_type,
+    payload: params.payload
+  };
+  captureCoordinatorEvent(event);
+}
+function readSessionEvents(sessionId) {
+  const sessionPath = getSessionPath(sessionId);
+  if (!fs.existsSync(sessionPath)) {
+    return [];
+  }
+  const content = fs.readFileSync(sessionPath, "utf-8");
+  const lines = content.trim().split(`
+`).filter(Boolean);
+  return lines.map((line) => {
+    const parsed = JSON.parse(line);
+    return CoordinatorEventSchema.parse(parsed);
+  });
+}
+function saveSession(params) {
+  const events = readSessionEvents(params.session_id);
+  if (events.length === 0) {
+    return null;
+  }
+  const timestamps = events.map((e) => new Date(e.timestamp).getTime());
+  const startTime = new Date(Math.min(...timestamps)).toISOString();
+  const endTime = new Date(Math.max(...timestamps)).toISOString();
+  const session = {
+    session_id: params.session_id,
+    epic_id: params.epic_id,
+    start_time: startTime,
+    end_time: endTime,
+    events
+  };
+  return session;
+}
+var SubtaskOutcomeSchema, EvalRecordSchema, CoordinatorEventSchema, CoordinatorSessionSchema, DEFAULT_EVAL_DATA_PATH = ".opencode/eval-data.jsonl", inProgressRecords;
+var init_eval_capture = __esm(() => {
+  init_zod();
+  SubtaskOutcomeSchema = exports_external.object({
+    bead_id: exports_external.string(),
+    title: exports_external.string(),
+    planned_files: exports_external.array(exports_external.string()),
+    actual_files: exports_external.array(exports_external.string()),
+    duration_ms: exports_external.number().int().min(0),
+    error_count: exports_external.number().int().min(0),
+    retry_count: exports_external.number().int().min(0),
+    success: exports_external.boolean(),
+    failure_mode: exports_external.string().optional()
+  });
+  EvalRecordSchema = exports_external.object({
+    id: exports_external.string(),
+    timestamp: exports_external.string(),
+    project_path: exports_external.string(),
+    task: exports_external.string(),
+    context: exports_external.string().optional(),
+    strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
+    subtask_count: exports_external.number().int().min(1),
+    epic_title: exports_external.string(),
+    epic_description: exports_external.string().optional(),
+    subtasks: exports_external.array(exports_external.object({
+      title: exports_external.string(),
+      description: exports_external.string().optional(),
+      files: exports_external.array(exports_external.string()),
+      dependencies: exports_external.array(exports_external.number()).optional(),
+      estimated_complexity: exports_external.number().int().min(1).max(5).optional()
+    })),
+    outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
+    overall_success: exports_external.boolean().optional(),
+    total_duration_ms: exports_external.number().int().min(0).optional(),
+    total_errors: exports_external.number().int().min(0).optional(),
+    human_accepted: exports_external.boolean().optional(),
+    human_modified: exports_external.boolean().optional(),
+    human_notes: exports_external.string().optional(),
+    file_overlap_count: exports_external.number().int().min(0).optional(),
+    scope_accuracy: exports_external.number().min(0).max(2).optional(),
+    time_balance_ratio: exports_external.number().min(1).optional()
+  });
+  CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
+    exports_external.object({
+      session_id: exports_external.string(),
+      epic_id: exports_external.string(),
+      timestamp: exports_external.string(),
+      event_type: exports_external.literal("DECISION"),
+      decision_type: exports_external.enum([
+        "strategy_selected",
+        "worker_spawned",
+        "review_completed",
+        "decomposition_complete"
+      ]),
+      payload: exports_external.any()
+    }),
+    exports_external.object({
+      session_id: exports_external.string(),
+      epic_id: exports_external.string(),
+      timestamp: exports_external.string(),
+      event_type: exports_external.literal("VIOLATION"),
+      violation_type: exports_external.enum([
+        "coordinator_edited_file",
+        "coordinator_ran_tests",
+        "coordinator_reserved_files",
+        "no_worker_spawned"
+      ]),
+      payload: exports_external.any()
+    }),
+    exports_external.object({
+      session_id: exports_external.string(),
+      epic_id: exports_external.string(),
+      timestamp: exports_external.string(),
+      event_type: exports_external.literal("OUTCOME"),
+      outcome_type: exports_external.enum([
+        "subtask_success",
+        "subtask_retry",
+        "subtask_failed",
+        "epic_complete"
+      ]),
+      payload: exports_external.any()
+    }),
+    exports_external.object({
+      session_id: exports_external.string(),
+      epic_id: exports_external.string(),
+      timestamp: exports_external.string(),
+      event_type: exports_external.literal("COMPACTION"),
+      compaction_type: exports_external.enum([
+        "detection_complete",
+        "prompt_generated",
+        "context_injected",
+        "resumption_started",
+        "tool_call_tracked"
+      ]),
+      payload: exports_external.any()
+    })
+  ]);
+  CoordinatorSessionSchema = exports_external.object({
+    session_id: exports_external.string(),
+    epic_id: exports_external.string(),
+    start_time: exports_external.string(),
+    end_time: exports_external.string().optional(),
+    events: exports_external.array(CoordinatorEventSchema)
+  });
+  inProgressRecords = new Map;
+});
 // src/learning.ts
 var exports_learning = {};
 __export(exports_learning, {
@@ -39409,6 +39786,71 @@ var hive_ready = tool({
     }
   }
 });
+var hive_cells = tool({
+  description: `Query cells from the hive database with flexible filtering.
+USE THIS TOOL TO:
+- List all open cells: hive_cells()
+- Find cells by status: hive_cells({ status: "in_progress" })
+- Find cells by type: hive_cells({ type: "bug" })
+- Get a specific cell by partial ID: hive_cells({ id: "mjkmd" })
+- Get the next ready (unblocked) cell: hive_cells({ ready: true })
+- Combine filters: hive_cells({ status: "open", type: "task" })
+RETURNS: Array of cells with id, title, status, priority, type, parent_id, created_at, updated_at
+PREFER THIS OVER hive_query when you need to:
+- See what work is available
+- Check status of multiple cells
+- Find cells matching criteria
+- Look up a cell by partial ID`,
+  args: {
+    id: tool.schema.string().optional().describe("Partial or full cell ID to look up"),
+    status: tool.schema.enum(["open", "in_progress", "blocked", "closed"]).optional().describe("Filter by status"),
+    type: tool.schema.enum(["task", "bug", "feature", "epic", "chore"]).optional().describe("Filter by type"),
+    ready: tool.schema.boolean().optional().describe("If true, return only the next unblocked cell"),
+    limit: tool.schema.number().optional().describe("Max cells to return (default 20)")
+  },
+  async execute(args, ctx) {
+    const projectKey = getHiveWorkingDirectory();
+    const adapter = await getHiveAdapter(projectKey);
+    try {
+      if (args.id) {
+        const fullId = await resolvePartialId(adapter, projectKey, args.id) || args.id;
+        const cell = await adapter.getCell(projectKey, fullId);
+        if (!cell) {
+          throw new HiveError(`No cell found matching ID '${args.id}'`, "hive_cells");
+        }
+        const formatted2 = formatCellForOutput(cell);
+        return JSON.stringify([formatted2], null, 2);
+      }
+      if (args.ready) {
+        const ready = await adapter.getNextReadyCell(projectKey);
+        if (!ready) {
+          return JSON.stringify([], null, 2);
+        }
+        const formatted2 = formatCellForOutput(ready);
+        return JSON.stringify([formatted2], null, 2);
+      }
+      const cells = await adapter.queryCells(projectKey, {
+        status: args.status,
+        type: args.type,
+        limit: args.limit || 20
+      });
+      const formatted = cells.map((c) => formatCellForOutput(c));
+      return JSON.stringify(formatted, null, 2);
+    } catch (error45) {
+      const message = error45 instanceof Error ? error45.message : String(error45);
+      if (message.includes("Ambiguous hash")) {
+        throw new HiveError(`Ambiguous ID '${args.id}': multiple cells match. Please provide more characters.`, "hive_cells");
+      }
+      if (message.includes("Bead not found") || message.includes("Cell not found")) {
+        throw new HiveError(`No cell found matching ID '${args.id || "unknown"}'`, "hive_cells");
+      }
+      throw new HiveError(`Failed to query cells: ${message}`, "hive_cells");
+    }
+  }
+});
 var hive_sync = tool({
   description: "Sync hive to git and push (MANDATORY at session end)",
   args: {
@@ -39550,6 +39992,7 @@ var hiveTools = {
   hive_close,
   hive_start,
   hive_ready,
+  hive_cells,
   hive_sync,
   hive_link_thread
 };
@@ -41846,122 +42289,7 @@ init_swarm_strategies();
 init_dist();
 init_zod();
 init_swarm_strategies();
-// src/eval-capture.ts
-init_zod();
-import * as fs from "node:fs";
-import * as os from "node:os";
-import * as path from "node:path";
-var SubtaskOutcomeSchema = exports_external.object({
-  bead_id: exports_external.string(),
-  title: exports_external.string(),
-  planned_files: exports_external.array(exports_external.string()),
-  actual_files: exports_external.array(exports_external.string()),
-  duration_ms: exports_external.number().int().min(0),
-  error_count: exports_external.number().int().min(0),
-  retry_count: exports_external.number().int().min(0),
-  success: exports_external.boolean(),
-  failure_mode: exports_external.string().optional()
-});
-var EvalRecordSchema = exports_external.object({
-  id: exports_external.string(),
-  timestamp: exports_external.string(),
-  project_path: exports_external.string(),
-  task: exports_external.string(),
-  context: exports_external.string().optional(),
-  strategy: exports_external.enum(["file-based", "feature-based", "risk-based", "auto"]),
-  subtask_count: exports_external.number().int().min(1),
-  epic_title: exports_external.string(),
-  epic_description: exports_external.string().optional(),
-  subtasks: exports_external.array(exports_external.object({
-    title: exports_external.string(),
-    description: exports_external.string().optional(),
-    files: exports_external.array(exports_external.string()),
-    dependencies: exports_external.array(exports_external.number()).optional(),
-    estimated_complexity: exports_external.number().int().min(1).max(5).optional()
-  })),
-  outcomes: exports_external.array(SubtaskOutcomeSchema).optional(),
-  overall_success: exports_external.boolean().optional(),
-  total_duration_ms: exports_external.number().int().min(0).optional(),
-  total_errors: exports_external.number().int().min(0).optional(),
-  human_accepted: exports_external.boolean().optional(),
-  human_modified: exports_external.boolean().optional(),
-  human_notes: exports_external.string().optional(),
-  file_overlap_count: exports_external.number().int().min(0).optional(),
-  scope_accuracy: exports_external.number().min(0).max(2).optional(),
-  time_balance_ratio: exports_external.number().min(1).optional()
-});
-var CoordinatorEventSchema = exports_external.discriminatedUnion("event_type", [
-  exports_external.object({
-    session_id: exports_external.string(),
-    epic_id: exports_external.string(),
-    timestamp: exports_external.string(),
-    event_type: exports_external.literal("DECISION"),
-    decision_type: exports_external.enum([
-      "strategy_selected",
-      "worker_spawned",
-      "review_completed",
-      "decomposition_complete"
-    ]),
-    payload: exports_external.any()
-  }),
-  exports_external.object({
-    session_id: exports_external.string(),
-    epic_id: exports_external.string(),
-    timestamp: exports_external.string(),
-    event_type: exports_external.literal("VIOLATION"),
-    violation_type: exports_external.enum([
-      "coordinator_edited_file",
-      "coordinator_ran_tests",
-      "coordinator_reserved_files",
-      "no_worker_spawned"
-    ]),
-    payload: exports_external.any()
-  }),
-  exports_external.object({
-    session_id: exports_external.string(),
-    epic_id: exports_external.string(),
-    timestamp: exports_external.string(),
-    event_type: exports_external.literal("OUTCOME"),
-    outcome_type: exports_external.enum([
-      "subtask_success",
-      "subtask_retry",
-      "subtask_failed",
-      "epic_complete"
-    ]),
-    payload: exports_external.any()
-  })
-]);
-var CoordinatorSessionSchema = exports_external.object({
-  session_id: exports_external.string(),
-  epic_id: exports_external.string(),
-  start_time: exports_external.string(),
-  end_time: exports_external.string().optional(),
-  events: exports_external.array(CoordinatorEventSchema)
-});
-var inProgressRecords = new Map;
-function getSessionDir() {
-  return path.join(os.homedir(), ".config", "swarm-tools", "sessions");
-}
-function getSessionPath(sessionId) {
-  return path.join(getSessionDir(), `${sessionId}.jsonl`);
-}
-function ensureSessionDir() {
-  const sessionDir = getSessionDir();
-  if (!fs.existsSync(sessionDir)) {
-    fs.mkdirSync(sessionDir, { recursive: true });
-  }
-}
-function captureCoordinatorEvent(event) {
-  CoordinatorEventSchema.parse(event);
-  ensureSessionDir();
-  const sessionPath = getSessionPath(event.session_id);
-  const line = `${JSON.stringify(event)}
-`;
-  fs.appendFileSync(sessionPath, line, "utf-8");
-}
-// src/swarm-decompose.ts
+init_eval_capture();
 var DECOMPOSITION_PROMPT = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
 ## Task
@@ -42279,9 +42607,14 @@ ${fullContext}` : `## Additional Context
   }
 });
 var swarm_validate_decomposition = tool({
-  description: "Validate a decomposition response against CellTreeSchema",
+  description: "Validate a decomposition response against CellTreeSchema and capture for eval",
   args: {
-    response: tool.schema.string().describe("JSON response from agent (CellTree format)")
+    response: tool.schema.string().describe("JSON response from agent (CellTree format)"),
+    project_path: tool.schema.string().optional().describe("Project path for eval capture"),
+    task: tool.schema.string().optional().describe("Original task description for eval capture"),
+    context: tool.schema.string().optional().describe("Context provided for decomposition"),
+    strategy: tool.schema.enum(["file-based", "feature-based", "risk-based", "auto"]).optional().describe("Decomposition strategy used"),
+    epic_id: tool.schema.string().optional().describe("Epic ID for eval capture")
   },
   async execute(args) {
     try {
@@ -42315,6 +42648,29 @@ var swarm_validate_decomposition = tool({
         }
       }
       const instructionConflicts = detectInstructionConflicts(validated.subtasks);
+      if (args.project_path && args.task && args.strategy && args.epic_id) {
+        try {
+          const { captureDecomposition: captureDecomposition2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
+          captureDecomposition2({
+            epicId: args.epic_id,
+            projectPath: args.project_path,
+            task: args.task,
+            context: args.context,
+            strategy: args.strategy,
+            epicTitle: validated.epic.title,
+            epicDescription: validated.epic.description,
+            subtasks: validated.subtasks.map((s) => ({
+              title: s.title,
+              description: s.description,
+              files: s.files,
+              dependencies: s.dependencies,
+              estimated_complexity: s.estimated_complexity
+            }))
+          });
+        } catch (error45) {
+          console.warn("[swarm_validate_decomposition] Failed to capture decomposition:", error45);
+        }
+      }
       return JSON.stringify({
         valid: true,
         cell_tree: validated,
@@ -42355,7 +42711,7 @@ var swarm_delegate_planning = tool({
     strategy: tool.schema.enum(["auto", "file-based", "feature-based", "risk-based"]).optional().default("auto").describe("Decomposition strategy (default: auto-detect)"),
     query_cass: tool.schema.boolean().optional().default(true).describe("Query CASS for similar past tasks (default: true)")
   },
-  async execute(args) {
+  async execute(args, _ctx) {
     const { selectStrategy: selectStrategy2, formatStrategyGuidelines: formatStrategyGuidelines2 } = await Promise.resolve().then(() => (init_swarm_strategies(), exports_swarm_strategies));
     const { formatMemoryQueryForDecomposition: formatMemoryQueryForDecomposition2 } = await Promise.resolve().then(() => (init_learning(), exports_learning));
     const { listSkills: listSkills2, getSkillsContextForSwarm: getSkillsContextForSwarm2, findRelevantSkills: findRelevantSkills2 } = await Promise.resolve().then(() => (init_skills(), exports_skills));
@@ -42371,7 +42727,7 @@ var swarm_delegate_planning = tool({
     }
     try {
       captureCoordinatorEvent({
-        session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+        session_id: _ctx.sessionID || "unknown",
         epic_id: "planning",
         timestamp: new Date().toISOString(),
         event_type: "DECISION",
@@ -44481,6 +44837,7 @@ var worktreeTools = {
 init_dist();
 init_zod();
 import { sendSwarmMessage as sendSwarmMessage2 } from "swarm-mail";
+init_eval_capture();
 var ReviewIssueSchema = exports_external.object({
   file: exports_external.string(),
   line: exports_external.number().optional(),
@@ -44705,7 +45062,7 @@ var swarm_review_feedback = tool({
     summary: exports_external.string().optional().describe("Review summary"),
     issues: exports_external.string().optional().describe("JSON array of ReviewIssue objects (for needs_changes)")
   },
-  async execute(args) {
+  async execute(args, _ctx) {
     let parsedIssues = [];
     if (args.issues) {
       try {
@@ -44728,7 +45085,7 @@ var swarm_review_feedback = tool({
       markReviewApproved(args.task_id);
       try {
         captureCoordinatorEvent({
-          session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+          session_id: _ctx.sessionID || "unknown",
           epic_id: epicId,
           timestamp: new Date().toISOString(),
           event_type: "DECISION",
@@ -44766,7 +45123,7 @@ You may now complete the task with \`swarm_complete\`.`,
     const remaining = MAX_REVIEW_ATTEMPTS - attemptNumber;
     try {
       captureCoordinatorEvent({
-        session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+        session_id: _ctx.sessionID || "unknown",
         epic_id: epicId,
         timestamp: new Date().toISOString(),
         event_type: "DECISION",
@@ -44837,6 +45194,7 @@ var reviewTools = {
 };
 // src/swarm-orchestrate.ts
+init_eval_capture();
 function generateWorkerHandoff(params) {
   const handoff = {
     contract: {
@@ -45737,10 +46095,29 @@ Files touched: ${args.files_touched?.join(", ") || "none recorded"}`,
           reason: "No files_owned contract found (non-epic subtask or decomposition event missing)"
         }
       };
+      try {
+        const { captureSubtaskOutcome: captureSubtaskOutcome2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
+        const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
+        const evalEpicId = cell.parent_id || epicId2;
+        captureSubtaskOutcome2({
+          epicId: evalEpicId,
+          projectPath: args.project_key,
+          beadId: args.bead_id,
+          title: cell.title,
+          plannedFiles: args.planned_files || [],
+          actualFiles: args.files_touched || [],
+          durationMs: durationMs2,
+          errorCount: args.error_count || 0,
+          retryCount: args.retry_count || 0,
+          success: true
+        });
+      } catch (error45) {
+        console.warn("[swarm_complete] Failed to capture subtask outcome:", error45);
+      }
       try {
         const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
         captureCoordinatorEvent({
-          session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+          session_id: _ctx.sessionID || "unknown",
           epic_id: epicId2,
           timestamp: new Date().toISOString(),
           event_type: "OUTCOME",
@@ -45822,7 +46199,7 @@ ${errorStack.slice(0, 1000)}
       try {
         const durationMs = args.start_time ? Date.now() - args.start_time : 0;
         captureCoordinatorEvent({
-          session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+          session_id: _ctx.sessionID || "unknown",
           epic_id: epicId,
           timestamp: new Date().toISOString(),
           event_type: "OUTCOME",
@@ -45889,7 +46266,9 @@ var swarm_record_outcome = tool({
       "user_cancelled",
       "unknown"
     ]).optional().describe("Failure classification (only when success=false). Auto-classified if not provided."),
-    failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)")
+    failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)"),
+    project_path: tool.schema.string().optional().describe("Project path (for finalizing eval records when all subtasks complete)"),
+    epic_id: tool.schema.string().optional().describe("Epic ID (for finalizing eval records when all subtasks complete)")
   },
   async execute(args) {
     const signals = {
@@ -45911,6 +46290,18 @@ var swarm_record_outcome = tool({
     const validated = OutcomeSignalsSchema.parse(signals);
     const scored = scoreImplicitFeedback(validated, DEFAULT_LEARNING_CONFIG);
     const errorStats = await globalErrorAccumulator.getErrorStats(args.bead_id);
+    let finalizedRecord = null;
+    if (args.project_path && args.epic_id) {
+      try {
+        const { finalizeEvalRecord: finalizeEvalRecord2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
+        finalizedRecord = finalizeEvalRecord2({
+          epicId: args.epic_id,
+          projectPath: args.project_path
+        });
+      } catch (error45) {
+        console.warn("[swarm_record_outcome] Failed to finalize eval record:", error45);
+      }
+    }
     const criteriaToScore = args.criteria ?? [
       "type_safe",
       "no_bugs",
@@ -45952,6 +46343,7 @@ var swarm_record_outcome = tool({
         accumulated_errors: errorStats.total,
         unresolved_errors: errorStats.unresolved
       },
+      finalized_eval_record: finalizedRecord || undefined,
       note: "Feedback events should be stored for criterion weight calculation. Use learning.ts functions to apply weights."
     }, null, 2);
   }
@@ -45983,12 +46375,31 @@ async function runResearchPhase(task, projectPath, options2) {
   if (techStack.length === 0) {
     return {
       tech_stack: [],
+      spawn_instructions: [],
       summaries: {},
       memory_ids: []
     };
   }
+  const spawnInstructions = [];
+  for (const tech of techStack) {
+    const researchId = `research-${tech}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
+    const prompt = formatResearcherPrompt({
+      research_id: researchId,
+      epic_id: "standalone-research",
+      tech_stack: [tech],
+      project_path: projectPath,
+      check_upgrades: options2?.checkUpgrades ?? false
+    });
+    spawnInstructions.push({
+      research_id: researchId,
+      tech,
+      prompt,
+      subagent_type: "swarm/researcher"
+    });
+  }
   return {
     tech_stack: techStack,
+    spawn_instructions: spawnInstructions,
     summaries: {},
     memory_ids: []
   };
@@ -46425,6 +46836,7 @@ var orchestrateTools = {
 };
 // src/swarm-prompts.ts
+init_eval_capture();
 var STRATEGY_DECOMPOSITION_PROMPT2 = `You are decomposing a task into parallelizable subtasks for a swarm of agents.
 ## Task
@@ -47221,7 +47633,7 @@ var swarm_spawn_subtask = tool({
     }).optional().describe("Recovery context from checkpoint compaction"),
     model: tool.schema.string().optional().describe("Optional explicit model override (auto-selected if not provided)")
   },
-  async execute(args) {
+  async execute(args, _ctx) {
     const prompt = formatSubtaskPromptV2({
       bead_id: args.bead_id,
       epic_id: args.epic_id,
@@ -47250,7 +47662,7 @@ var swarm_spawn_subtask = tool({
     const postCompletionInstructions = COORDINATOR_POST_WORKER_CHECKLIST.replace(/{project_key}/g, args.project_path || "$PWD").replace(/{epic_id}/g, args.epic_id).replace(/{task_id}/g, args.bead_id).replace(/{files_touched}/g, filesJoined).replace(/{worker_id}/g, "worker");
     try {
       captureCoordinatorEvent({
-        session_id: process.env.OPENCODE_SESSION_ID || "unknown",
+        session_id: _ctx.sessionID || "unknown",
         epic_id: args.epic_id,
         timestamp: new Date().toISOString(),
         event_type: "DECISION",
@@ -63171,6 +63583,7 @@ function createMetrics(result, toolName) {
 }
 // src/planning-guardrails.ts
+init_eval_capture();
 var FILE_MODIFICATION_PATTERNS = [
   /\bimplement\b/i,
   /\bcreate\b.*\.(ts|js|tsx|jsx|py|rs|go|java|rb|swift|kt)/i,
@@ -63456,9 +63869,21 @@ function getLog() {
   }
   return _logger;
 }
-var SWARM_COMPACTION_CONTEXT = `## \uD83D\uDC1D SWARM ACTIVE - You Are The COORDINATOR
+var SWARM_COMPACTION_CONTEXT = `
+┌─────────────────────────────────────────────────────────────┐
+│                                                             │
+│             \uD83D\uDC1D  YOU ARE THE COORDINATOR  \uD83D\uDC1D                 │
+│                                                             │
+│             NOT A WORKER. NOT AN IMPLEMENTER.               │
+│                  YOU ORCHESTRATE.                           │
+│                                                             │
+└─────────────────────────────────────────────────────────────┘
+## \uD83C\uDFAF NON-NEGOTIABLE: YOU ARE THE COORDINATOR
+Context was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**
-Context was compacted but the swarm is still running. You are the **COORDINATOR**.
+Your role is ORCHESTRATION, not implementation. When you catch yourself about to do work directly, STOP.
 ### ⛔ NEVER DO THESE (Coordinator Anti-Patterns)
@@ -63469,9 +63894,27 @@ Context was compacted but the swarm is still running. You are the **COORDINATOR*
 - ❌ **NEVER** implement features yourself - SPAWN A WORKER
 - ❌ **NEVER** "just do it myself to save time" - NO. SPAWN A WORKER.
 - ❌ **NEVER** reserve files with \`swarmmail_reserve\` - Workers reserve files
+- ❌ **NEVER** fetch files/docs directly - SPAWN A RESEARCHER
 **If you catch yourself about to edit a file, STOP. Use \`swarm_spawn_subtask\` instead.**
+### \uD83D\uDEAB FORBIDDEN TOOLS (Coordinators MUST delegate these)
+**NEVER use these tools directly. ALWAYS spawn a researcher worker via \`swarm_spawn_researcher\`:**
+**Repository fetching:**
+- \`repo-crawl_file\`, \`repo-crawl_readme\`, \`repo-crawl_search\`, \`repo-crawl_structure\`, \`repo-crawl_tree\`
+- \`repo-autopsy_*\` (all repo-autopsy tools)
+**Web/documentation fetching:**
+- \`webfetch\`, \`fetch_fetch\`
+- \`context7_resolve-library-id\`, \`context7_get-library-docs\`
+**Knowledge base:**
+- \`pdf-brain_search\`, \`pdf-brain_read\`
+**If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
 ### ✅ ALWAYS DO THESE (Coordinator Checklist)
 On resume, execute this checklist IN ORDER:
@@ -63521,6 +63964,87 @@ Extract from session context:
 - **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
 - **Close the loop** - When all subtasks done, verify and close the epic
+**You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
+---
+## \uD83D\uDCCB FULL COORDINATOR WORKFLOW (Reference)
+You are ALWAYS swarming. Here is the complete workflow for any new work:
+### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
+**If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
+\`\`\`
+swarm_spawn_researcher(
+  research_id="research-<topic>",
+  epic_id="<epic-id>",
+  tech_stack=["<technology>"],
+  project_path="<path>"
+)
+// Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
+\`\`\`
+### Phase 2: Knowledge Gathering
+\`\`\`
+semantic-memory_find(query="<task keywords>", limit=5)   # Past learnings
+cass_search(query="<task description>", limit=5)         # Similar past tasks
+skills_list()                                            # Available skills
+\`\`\`
+### Phase 3: Decompose
+\`\`\`
+swarm_select_strategy(task="<task>")
+swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
+swarm_validate_decomposition(response="<CellTree JSON>")
+\`\`\`
+### Phase 4: Create Cells
+\`hive_create_epic(epic_title="<task>", subtasks=[...])\`
+### Phase 5: DO NOT Reserve Files
+> **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
+### Phase 6: Spawn Workers
+\`\`\`
+swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
+Task(subagent_type="swarm/worker", prompt="<from above>")
+\`\`\`
+### Phase 7: MANDATORY Review Loop
+**AFTER EVERY Task() RETURNS:**
+1. \`swarmmail_inbox()\` - Check for messages
+2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
+3. Evaluate against epic goals
+4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
+**If needs_changes:**
+\`\`\`
+swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
+// Spawn NEW worker with Task() using retry prompt
+// Max 3 attempts before marking task blocked
+\`\`\`
+### Phase 8: Complete
+\`hive_sync()\` - Sync all cells to git
+## Strategy Reference
+| Strategy       | Best For                 | Keywords                               |
+| -------------- | ------------------------ | -------------------------------------- |
+| file-based     | Refactoring, migrations  | refactor, migrate, rename, update all  |
+| feature-based  | New features             | add, implement, build, create, feature |
+| risk-based     | Bug fixes, security      | fix, bug, security, critical, urgent   |
 **You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
 `;
 var SWARM_DETECTION_FALLBACK = `## \uD83D\uDC1D Swarm Detection - Check Your Context
@@ -64458,6 +64982,161 @@ async function resetStorage() {
 // src/index.ts
 init_skills();
+// src/eval-history.ts
+import * as fs2 from "node:fs";
+import * as path3 from "node:path";
+var DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
+var VARIANCE_THRESHOLD = 0.1;
+var BOOTSTRAP_THRESHOLD = 10;
+var STABILIZATION_THRESHOLD = 50;
+function getEvalHistoryPath(projectPath) {
+  return path3.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
+}
+function ensureEvalHistoryDir(projectPath) {
+  const historyPath = getEvalHistoryPath(projectPath);
+  const dir = path3.dirname(historyPath);
+  if (!fs2.existsSync(dir)) {
+    fs2.mkdirSync(dir, { recursive: true });
+  }
+}
+function recordEvalRun(projectPath, run) {
+  ensureEvalHistoryDir(projectPath);
+  const historyPath = getEvalHistoryPath(projectPath);
+  const line = `${JSON.stringify(run)}
+`;
+  fs2.appendFileSync(historyPath, line, "utf-8");
+}
+function readAllRecords(projectPath) {
+  const historyPath = getEvalHistoryPath(projectPath);
+  if (!fs2.existsSync(historyPath)) {
+    return [];
+  }
+  const content = fs2.readFileSync(historyPath, "utf-8");
+  const lines = content.trim().split(`
+`).filter(Boolean);
+  return lines.map((line) => JSON.parse(line));
+}
+function getScoreHistory(projectPath, evalName) {
+  return readAllRecords(projectPath).filter((run) => run.eval_name === evalName);
+}
+function calculateVariance(scores) {
+  if (scores.length <= 1) {
+    return 0;
+  }
+  const mean = scores.reduce((sum2, score) => sum2 + score, 0) / scores.length;
+  const variance5 = scores.reduce((sum2, score) => {
+    const deviation = score - mean;
+    return sum2 + deviation * deviation;
+  }, 0) / scores.length;
+  return variance5;
+}
+function getPhase(projectPath, evalName) {
+  const history = getScoreHistory(projectPath, evalName);
+  if (history.length < BOOTSTRAP_THRESHOLD) {
+    return "bootstrap";
+  }
+  if (history.length <= STABILIZATION_THRESHOLD) {
+    return "stabilization";
+  }
+  const scores = history.map((run) => run.score);
+  const variance5 = calculateVariance(scores);
+  if (variance5 < VARIANCE_THRESHOLD) {
+    return "production";
+  }
+  return "stabilization";
+}
+// src/eval-gates.ts
+var DEFAULT_THRESHOLDS = {
+  stabilization: 0.1,
+  production: 0.05
+};
+function calculateBaseline(history, currentScore) {
+  if (history.length === 0) {
+    return currentScore;
+  }
+  return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
+}
+function calculateRegression(baseline, currentScore) {
+  if (baseline === 0) {
+    return 0;
+  }
+  return (baseline - currentScore) / baseline;
+}
+function formatRegressionMessage(regressionPercent, baseline, currentScore) {
+  return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
+}
+function checkGate(projectPath, evalName, currentScore, config2) {
+  const thresholds = {
+    stabilization: config2?.stabilizationThreshold ?? DEFAULT_THRESHOLDS.stabilization,
+    production: config2?.productionThreshold ?? DEFAULT_THRESHOLDS.production
+  };
+  const phase = getPhase(projectPath, evalName);
+  const history = getScoreHistory(projectPath, evalName);
+  if (phase === "bootstrap") {
+    return {
+      passed: true,
+      phase: "bootstrap",
+      message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
+      currentScore
+    };
+  }
+  const baseline = calculateBaseline(history, currentScore);
+  const regressionPercent = calculateRegression(baseline, currentScore);
+  const regressionMsg = formatRegressionMessage(regressionPercent, baseline, currentScore);
+  if (phase === "stabilization") {
+    if (regressionPercent > thresholds.stabilization) {
+      return {
+        passed: true,
+        phase: "stabilization",
+        message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
+        baseline,
+        currentScore,
+        regressionPercent
+      };
+    }
+    if (history.length > 50) {
+      const scores = history.map((run) => run.score);
+      const variance5 = calculateVariance(scores);
+      return {
+        passed: true,
+        phase: "stabilization",
+        message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance5.toFixed(3)}) prevents production phase.`,
+        baseline,
+        currentScore,
+        regressionPercent
+      };
+    }
+    return {
+      passed: true,
+      phase: "stabilization",
+      message: `Stabilization phase: ${regressionMsg} - acceptable`,
+      baseline,
+      currentScore,
+      regressionPercent
+    };
+  }
+  if (regressionPercent > thresholds.production) {
+    return {
+      passed: false,
+      phase: "production",
+      message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
+      baseline,
+      currentScore,
+      regressionPercent
+    };
+  }
+  return {
+    passed: true,
+    phase: "production",
+    message: `Production phase: ${regressionMsg} - acceptable`,
+    baseline,
+    currentScore,
+    regressionPercent
+  };
+}
+// src/index.ts
 var SwarmPlugin = async (input) => {
   const { $, directory, client } = input;
   setHiveWorkingDirectory(directory);
@@ -64524,7 +65203,7 @@ var SwarmPlugin = async (input) => {
       if (isInCoordinatorContext()) {
         const ctx = getCoordinatorContext();
         const violation = detectCoordinatorViolation({
-          sessionId: ctx.sessionId || "unknown",
+          sessionId: input2.sessionID || "unknown",
           epicId: ctx.epicId || "unknown",
           toolName,
           toolArgs: output.args,
@@ -64638,6 +65317,7 @@ export {
   researchTools,
   requireTool,
   repoCrawlTools,
+  recordEvalRun,
   parseFrontmatter,
   migrateBeadsToHive,
   mergeHistoricBeads,
@@ -64668,6 +65348,7 @@ export {
   hive_create_epic,
   hive_create,
   hive_close,
+  hive_cells,
   hiveTools,
   guardrailOutput,
   groupByTransition,
@@ -64677,12 +65358,15 @@ export {
   getStatusChanges,
   getSkillsContextForSwarm,
   getSkill,
+  getScoreHistory,
   getSchemaByName,
+  getPhase,
   getMandateStorage,
   getLogger,
   getInstalledVersions,
   getHiveWorkingDirectory,
   getHiveAdapter,
+  getEvalHistoryPath,
   getCellIdFromEvent,
   getBeadsWorkingDirectory,
   getBeadsAdapter,
@@ -64700,6 +65384,7 @@ export {
   evaluatePromotion,
   evaluateBatchPromotions,
   ensureHiveDirectory,
+  ensureEvalHistoryDir,
   discoverSkills,
   discoverDocTools,
   src_default as default,
@@ -64715,8 +65400,10 @@ export {
   createAgentMailError,
   clearSessionState,
   checkTool,
+  checkGate,
   checkBeadsMigrationNeeded,
   checkAllTools,
+  calculateVariance,
   beads_update,
   beads_sync,
   beads_start,
@@ -64738,6 +65425,7 @@ export {
   VoteTypeSchema,
   VoteSchema,
   ValidationResultSchema,
+  VARIANCE_THRESHOLD,
   UpdateSwarmContextArgsSchema,
   TaskDecompositionSchema,
   SwarmStrategySchema,
@@ -64757,6 +65445,7 @@ export {
   SWARM_COMPACTION_CONTEXT,
   SUBTASK_PROMPT_V2,
   STRATEGIES,
+  STABILIZATION_THRESHOLD,
   RepoCrawlError,
   QuerySwarmContextsArgsSchema,
   QueryMandatesArgsSchema,
@@ -64779,10 +65468,12 @@ export {
   DecompositionError,
   DecomposedSubtaskSchema,
   DecomposeArgsSchema,
+  DEFAULT_THRESHOLDS,
   DEFAULT_STORAGE_CONFIG,
   DEFAULT_MANDATE_STORAGE_CONFIG,
   DEFAULT_MANDATE_DECAY_CONFIG,
   DEFAULT_GUARDRAIL_CONFIG,
+  DEFAULT_EVAL_HISTORY_PATH,
   DEFAULT_CRITERIA,
   CriterionEvaluationSchema,
   CreateSwarmContextArgsSchema,
@@ -64850,6 +65541,7 @@ export {
   BeadAssignedEventSchema,
   BaseCellEventSchema,
   BaseBeadEventSchema,
+  BOOTSTRAP_THRESHOLD,
   AgentProgressSchema,
   AgentMailNotInitializedError,
   AgentMailError