opencode-swarm-plugin 0.38.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.env +2 -0
  2. package/.hive/eval-results.json +26 -0
  3. package/.hive/issues.jsonl +11 -0
  4. package/.hive/memories.jsonl +23 -1
  5. package/.opencode/eval-history.jsonl +12 -0
  6. package/CHANGELOG.md +130 -0
  7. package/README.md +29 -12
  8. package/bin/swarm.test.ts +475 -0
  9. package/bin/swarm.ts +383 -0
  10. package/dist/compaction-hook.d.ts +1 -1
  11. package/dist/compaction-hook.d.ts.map +1 -1
  12. package/dist/compaction-prompt-scoring.d.ts +124 -0
  13. package/dist/compaction-prompt-scoring.d.ts.map +1 -0
  14. package/dist/eval-capture.d.ts +81 -1
  15. package/dist/eval-capture.d.ts.map +1 -1
  16. package/dist/eval-gates.d.ts +84 -0
  17. package/dist/eval-gates.d.ts.map +1 -0
  18. package/dist/eval-history.d.ts +117 -0
  19. package/dist/eval-history.d.ts.map +1 -0
  20. package/dist/eval-learning.d.ts +216 -0
  21. package/dist/eval-learning.d.ts.map +1 -0
  22. package/dist/index.d.ts +44 -0
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +370 -13
  25. package/dist/plugin.js +203 -13
  26. package/dist/post-compaction-tracker.d.ts +133 -0
  27. package/dist/post-compaction-tracker.d.ts.map +1 -0
  28. package/dist/swarm-orchestrate.d.ts +23 -0
  29. package/dist/swarm-orchestrate.d.ts.map +1 -1
  30. package/dist/swarm-prompts.d.ts +25 -1
  31. package/dist/swarm-prompts.d.ts.map +1 -1
  32. package/dist/swarm.d.ts +4 -0
  33. package/dist/swarm.d.ts.map +1 -1
  34. package/evals/README.md +589 -105
  35. package/evals/compaction-prompt.eval.ts +149 -0
  36. package/evals/coordinator-behavior.eval.ts +8 -8
  37. package/evals/fixtures/compaction-prompt-cases.ts +305 -0
  38. package/evals/lib/compaction-loader.test.ts +248 -0
  39. package/evals/lib/compaction-loader.ts +320 -0
  40. package/evals/lib/data-loader.test.ts +345 -0
  41. package/evals/lib/data-loader.ts +107 -6
  42. package/evals/scorers/compaction-prompt-scorers.ts +145 -0
  43. package/evals/scorers/compaction-scorers.ts +13 -13
  44. package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
  45. package/evals/scorers/coordinator-discipline.ts +13 -13
  46. package/examples/plugin-wrapper-template.ts +117 -0
  47. package/package.json +7 -5
  48. package/scripts/migrate-unknown-sessions.ts +349 -0
  49. package/src/compaction-capture.integration.test.ts +257 -0
  50. package/src/compaction-hook.test.ts +42 -0
  51. package/src/compaction-hook.ts +81 -0
  52. package/src/compaction-prompt-scorers.test.ts +299 -0
  53. package/src/compaction-prompt-scoring.ts +298 -0
  54. package/src/eval-capture.test.ts +422 -0
  55. package/src/eval-capture.ts +94 -2
  56. package/src/eval-gates.test.ts +306 -0
  57. package/src/eval-gates.ts +218 -0
  58. package/src/eval-history.test.ts +508 -0
  59. package/src/eval-history.ts +214 -0
  60. package/src/eval-learning.test.ts +378 -0
  61. package/src/eval-learning.ts +360 -0
  62. package/src/index.ts +61 -1
  63. package/src/post-compaction-tracker.test.ts +251 -0
  64. package/src/post-compaction-tracker.ts +237 -0
  65. package/src/swarm-decompose.ts +2 -2
  66. package/src/swarm-orchestrate.ts +2 -2
  67. package/src/swarm-prompts.ts +2 -2
  68. package/src/swarm-review.ts +3 -3
  69. /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
package/dist/index.js CHANGED
@@ -22198,6 +22198,7 @@ __export(exports_eval_capture, {
22198
22198
  captureHumanFeedback: () => captureHumanFeedback,
22199
22199
  captureDecomposition: () => captureDecomposition,
22200
22200
  captureCoordinatorEvent: () => captureCoordinatorEvent,
22201
+ captureCompactionEvent: () => captureCompactionEvent,
22201
22202
  appendEvalRecord: () => appendEvalRecord,
22202
22203
  SubtaskOutcomeSchema: () => SubtaskOutcomeSchema,
22203
22204
  EvalRecordSchema: () => EvalRecordSchema,
@@ -22406,6 +22407,17 @@ function captureCoordinatorEvent(event) {
22406
22407
  `;
22407
22408
  fs.appendFileSync(sessionPath, line, "utf-8");
22408
22409
  }
22410
+ function captureCompactionEvent(params) {
22411
+ const event = {
22412
+ session_id: params.session_id,
22413
+ epic_id: params.epic_id,
22414
+ timestamp: new Date().toISOString(),
22415
+ event_type: "COMPACTION",
22416
+ compaction_type: params.compaction_type,
22417
+ payload: params.payload
22418
+ };
22419
+ captureCoordinatorEvent(event);
22420
+ }
22409
22421
  function readSessionEvents(sessionId) {
22410
22422
  const sessionPath = getSessionPath(sessionId);
22411
22423
  if (!fs.existsSync(sessionPath)) {
@@ -22517,6 +22529,20 @@ var init_eval_capture = __esm(() => {
22517
22529
  "epic_complete"
22518
22530
  ]),
22519
22531
  payload: exports_external.any()
22532
+ }),
22533
+ exports_external.object({
22534
+ session_id: exports_external.string(),
22535
+ epic_id: exports_external.string(),
22536
+ timestamp: exports_external.string(),
22537
+ event_type: exports_external.literal("COMPACTION"),
22538
+ compaction_type: exports_external.enum([
22539
+ "detection_complete",
22540
+ "prompt_generated",
22541
+ "context_injected",
22542
+ "resumption_started",
22543
+ "tool_call_tracked"
22544
+ ]),
22545
+ payload: exports_external.any()
22520
22546
  })
22521
22547
  ]);
22522
22548
  CoordinatorSessionSchema = exports_external.object({
@@ -42685,7 +42711,7 @@ var swarm_delegate_planning = tool({
42685
42711
  strategy: tool.schema.enum(["auto", "file-based", "feature-based", "risk-based"]).optional().default("auto").describe("Decomposition strategy (default: auto-detect)"),
42686
42712
  query_cass: tool.schema.boolean().optional().default(true).describe("Query CASS for similar past tasks (default: true)")
42687
42713
  },
42688
- async execute(args) {
42714
+ async execute(args, _ctx) {
42689
42715
  const { selectStrategy: selectStrategy2, formatStrategyGuidelines: formatStrategyGuidelines2 } = await Promise.resolve().then(() => (init_swarm_strategies(), exports_swarm_strategies));
42690
42716
  const { formatMemoryQueryForDecomposition: formatMemoryQueryForDecomposition2 } = await Promise.resolve().then(() => (init_learning(), exports_learning));
42691
42717
  const { listSkills: listSkills2, getSkillsContextForSwarm: getSkillsContextForSwarm2, findRelevantSkills: findRelevantSkills2 } = await Promise.resolve().then(() => (init_skills(), exports_skills));
@@ -42701,7 +42727,7 @@ var swarm_delegate_planning = tool({
42701
42727
  }
42702
42728
  try {
42703
42729
  captureCoordinatorEvent({
42704
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
42730
+ session_id: _ctx.sessionID || "unknown",
42705
42731
  epic_id: "planning",
42706
42732
  timestamp: new Date().toISOString(),
42707
42733
  event_type: "DECISION",
@@ -45036,7 +45062,7 @@ var swarm_review_feedback = tool({
45036
45062
  summary: exports_external.string().optional().describe("Review summary"),
45037
45063
  issues: exports_external.string().optional().describe("JSON array of ReviewIssue objects (for needs_changes)")
45038
45064
  },
45039
- async execute(args) {
45065
+ async execute(args, _ctx) {
45040
45066
  let parsedIssues = [];
45041
45067
  if (args.issues) {
45042
45068
  try {
@@ -45059,7 +45085,7 @@ var swarm_review_feedback = tool({
45059
45085
  markReviewApproved(args.task_id);
45060
45086
  try {
45061
45087
  captureCoordinatorEvent({
45062
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
45088
+ session_id: _ctx.sessionID || "unknown",
45063
45089
  epic_id: epicId,
45064
45090
  timestamp: new Date().toISOString(),
45065
45091
  event_type: "DECISION",
@@ -45097,7 +45123,7 @@ You may now complete the task with \`swarm_complete\`.`,
45097
45123
  const remaining = MAX_REVIEW_ATTEMPTS - attemptNumber;
45098
45124
  try {
45099
45125
  captureCoordinatorEvent({
45100
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
45126
+ session_id: _ctx.sessionID || "unknown",
45101
45127
  epic_id: epicId,
45102
45128
  timestamp: new Date().toISOString(),
45103
45129
  event_type: "DECISION",
@@ -46069,10 +46095,29 @@ Files touched: ${args.files_touched?.join(", ") || "none recorded"}`,
46069
46095
  reason: "No files_owned contract found (non-epic subtask or decomposition event missing)"
46070
46096
  }
46071
46097
  };
46098
+ try {
46099
+ const { captureSubtaskOutcome: captureSubtaskOutcome2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
46100
+ const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
46101
+ const evalEpicId = cell.parent_id || epicId2;
46102
+ captureSubtaskOutcome2({
46103
+ epicId: evalEpicId,
46104
+ projectPath: args.project_key,
46105
+ beadId: args.bead_id,
46106
+ title: cell.title,
46107
+ plannedFiles: args.planned_files || [],
46108
+ actualFiles: args.files_touched || [],
46109
+ durationMs: durationMs2,
46110
+ errorCount: args.error_count || 0,
46111
+ retryCount: args.retry_count || 0,
46112
+ success: true
46113
+ });
46114
+ } catch (error45) {
46115
+ console.warn("[swarm_complete] Failed to capture subtask outcome:", error45);
46116
+ }
46072
46117
  try {
46073
46118
  const durationMs2 = args.start_time ? Date.now() - args.start_time : 0;
46074
46119
  captureCoordinatorEvent({
46075
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
46120
+ session_id: _ctx.sessionID || "unknown",
46076
46121
  epic_id: epicId2,
46077
46122
  timestamp: new Date().toISOString(),
46078
46123
  event_type: "OUTCOME",
@@ -46154,7 +46199,7 @@ ${errorStack.slice(0, 1000)}
46154
46199
  try {
46155
46200
  const durationMs = args.start_time ? Date.now() - args.start_time : 0;
46156
46201
  captureCoordinatorEvent({
46157
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
46202
+ session_id: _ctx.sessionID || "unknown",
46158
46203
  epic_id: epicId,
46159
46204
  timestamp: new Date().toISOString(),
46160
46205
  event_type: "OUTCOME",
@@ -46221,7 +46266,9 @@ var swarm_record_outcome = tool({
46221
46266
  "user_cancelled",
46222
46267
  "unknown"
46223
46268
  ]).optional().describe("Failure classification (only when success=false). Auto-classified if not provided."),
46224
- failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)")
46269
+ failure_details: tool.schema.string().optional().describe("Detailed failure context (error message, stack trace, etc.)"),
46270
+ project_path: tool.schema.string().optional().describe("Project path (for finalizing eval records when all subtasks complete)"),
46271
+ epic_id: tool.schema.string().optional().describe("Epic ID (for finalizing eval records when all subtasks complete)")
46225
46272
  },
46226
46273
  async execute(args) {
46227
46274
  const signals = {
@@ -46243,6 +46290,18 @@ var swarm_record_outcome = tool({
46243
46290
  const validated = OutcomeSignalsSchema.parse(signals);
46244
46291
  const scored = scoreImplicitFeedback(validated, DEFAULT_LEARNING_CONFIG);
46245
46292
  const errorStats = await globalErrorAccumulator.getErrorStats(args.bead_id);
46293
+ let finalizedRecord = null;
46294
+ if (args.project_path && args.epic_id) {
46295
+ try {
46296
+ const { finalizeEvalRecord: finalizeEvalRecord2 } = await Promise.resolve().then(() => (init_eval_capture(), exports_eval_capture));
46297
+ finalizedRecord = finalizeEvalRecord2({
46298
+ epicId: args.epic_id,
46299
+ projectPath: args.project_path
46300
+ });
46301
+ } catch (error45) {
46302
+ console.warn("[swarm_record_outcome] Failed to finalize eval record:", error45);
46303
+ }
46304
+ }
46246
46305
  const criteriaToScore = args.criteria ?? [
46247
46306
  "type_safe",
46248
46307
  "no_bugs",
@@ -46284,6 +46343,7 @@ var swarm_record_outcome = tool({
46284
46343
  accumulated_errors: errorStats.total,
46285
46344
  unresolved_errors: errorStats.unresolved
46286
46345
  },
46346
+ finalized_eval_record: finalizedRecord || undefined,
46287
46347
  note: "Feedback events should be stored for criterion weight calculation. Use learning.ts functions to apply weights."
46288
46348
  }, null, 2);
46289
46349
  }
@@ -46315,12 +46375,31 @@ async function runResearchPhase(task, projectPath, options2) {
46315
46375
  if (techStack.length === 0) {
46316
46376
  return {
46317
46377
  tech_stack: [],
46378
+ spawn_instructions: [],
46318
46379
  summaries: {},
46319
46380
  memory_ids: []
46320
46381
  };
46321
46382
  }
46383
+ const spawnInstructions = [];
46384
+ for (const tech of techStack) {
46385
+ const researchId = `research-${tech}-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
46386
+ const prompt = formatResearcherPrompt({
46387
+ research_id: researchId,
46388
+ epic_id: "standalone-research",
46389
+ tech_stack: [tech],
46390
+ project_path: projectPath,
46391
+ check_upgrades: options2?.checkUpgrades ?? false
46392
+ });
46393
+ spawnInstructions.push({
46394
+ research_id: researchId,
46395
+ tech,
46396
+ prompt,
46397
+ subagent_type: "swarm/researcher"
46398
+ });
46399
+ }
46322
46400
  return {
46323
46401
  tech_stack: techStack,
46402
+ spawn_instructions: spawnInstructions,
46324
46403
  summaries: {},
46325
46404
  memory_ids: []
46326
46405
  };
@@ -47554,7 +47633,7 @@ var swarm_spawn_subtask = tool({
47554
47633
  }).optional().describe("Recovery context from checkpoint compaction"),
47555
47634
  model: tool.schema.string().optional().describe("Optional explicit model override (auto-selected if not provided)")
47556
47635
  },
47557
- async execute(args) {
47636
+ async execute(args, _ctx) {
47558
47637
  const prompt = formatSubtaskPromptV2({
47559
47638
  bead_id: args.bead_id,
47560
47639
  epic_id: args.epic_id,
@@ -47583,7 +47662,7 @@ var swarm_spawn_subtask = tool({
47583
47662
  const postCompletionInstructions = COORDINATOR_POST_WORKER_CHECKLIST.replace(/{project_key}/g, args.project_path || "$PWD").replace(/{epic_id}/g, args.epic_id).replace(/{task_id}/g, args.bead_id).replace(/{files_touched}/g, filesJoined).replace(/{worker_id}/g, "worker");
47584
47663
  try {
47585
47664
  captureCoordinatorEvent({
47586
- session_id: process.env.OPENCODE_SESSION_ID || "unknown",
47665
+ session_id: _ctx.sessionID || "unknown",
47587
47666
  epic_id: args.epic_id,
47588
47667
  timestamp: new Date().toISOString(),
47589
47668
  event_type: "DECISION",
@@ -63790,9 +63869,21 @@ function getLog() {
63790
63869
  }
63791
63870
  return _logger;
63792
63871
  }
63793
- var SWARM_COMPACTION_CONTEXT = `## \uD83D\uDC1D SWARM ACTIVE - You Are The COORDINATOR
63872
+ var SWARM_COMPACTION_CONTEXT = `
63873
+ ┌─────────────────────────────────────────────────────────────┐
63874
+ │ │
63875
+ │ \uD83D\uDC1D YOU ARE THE COORDINATOR \uD83D\uDC1D │
63876
+ │ │
63877
+ │ NOT A WORKER. NOT AN IMPLEMENTER. │
63878
+ │ YOU ORCHESTRATE. │
63879
+ │ │
63880
+ └─────────────────────────────────────────────────────────────┘
63881
+
63882
+ ## \uD83C\uDFAF NON-NEGOTIABLE: YOU ARE THE COORDINATOR
63794
63883
 
63795
- Context was compacted but the swarm is still running. You are the **COORDINATOR**.
63884
+ Context was compacted but the swarm is still running. **YOU ARE THE COORDINATOR.**
63885
+
63886
+ Your role is ORCHESTRATION, not implementation. When you catch yourself about to do work directly, STOP.
63796
63887
 
63797
63888
  ### ⛔ NEVER DO THESE (Coordinator Anti-Patterns)
63798
63889
 
@@ -63803,9 +63894,27 @@ Context was compacted but the swarm is still running. You are the **COORDINATOR*
63803
63894
  - ❌ **NEVER** implement features yourself - SPAWN A WORKER
63804
63895
  - ❌ **NEVER** "just do it myself to save time" - NO. SPAWN A WORKER.
63805
63896
  - ❌ **NEVER** reserve files with \`swarmmail_reserve\` - Workers reserve files
63897
+ - ❌ **NEVER** fetch files/docs directly - SPAWN A RESEARCHER
63806
63898
 
63807
63899
  **If you catch yourself about to edit a file, STOP. Use \`swarm_spawn_subtask\` instead.**
63808
63900
 
63901
+ ### \uD83D\uDEAB FORBIDDEN TOOLS (Coordinators MUST delegate these)
63902
+
63903
+ **NEVER use these tools directly. ALWAYS spawn a researcher worker via \`swarm_spawn_researcher\`:**
63904
+
63905
+ **Repository fetching:**
63906
+ - \`repo-crawl_file\`, \`repo-crawl_readme\`, \`repo-crawl_search\`, \`repo-crawl_structure\`, \`repo-crawl_tree\`
63907
+ - \`repo-autopsy_*\` (all repo-autopsy tools)
63908
+
63909
+ **Web/documentation fetching:**
63910
+ - \`webfetch\`, \`fetch_fetch\`
63911
+ - \`context7_resolve-library-id\`, \`context7_get-library-docs\`
63912
+
63913
+ **Knowledge base:**
63914
+ - \`pdf-brain_search\`, \`pdf-brain_read\`
63915
+
63916
+ **If you need external data:** Use \`swarm_spawn_researcher\` with a clear research task. The researcher will fetch, summarize, and return findings.
63917
+
63809
63918
  ### ✅ ALWAYS DO THESE (Coordinator Checklist)
63810
63919
 
63811
63920
  On resume, execute this checklist IN ORDER:
@@ -63855,6 +63964,87 @@ Extract from session context:
63855
63964
  - **Review work** - Use \`swarm_review\` and \`swarm_review_feedback\` for completed work
63856
63965
  - **Close the loop** - When all subtasks done, verify and close the epic
63857
63966
 
63967
+ **You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
63968
+
63969
+ ---
63970
+
63971
+ ## \uD83D\uDCCB FULL COORDINATOR WORKFLOW (Reference)
63972
+
63973
+ You are ALWAYS swarming. Here is the complete workflow for any new work:
63974
+
63975
+ ### Phase 1.5: Research Phase (FOR COMPLEX TASKS)
63976
+
63977
+ **If the task requires understanding unfamiliar technologies, spawn a researcher FIRST:**
63978
+
63979
+ \`\`\`
63980
+ swarm_spawn_researcher(
63981
+ research_id="research-<topic>",
63982
+ epic_id="<epic-id>",
63983
+ tech_stack=["<technology>"],
63984
+ project_path="<path>"
63985
+ )
63986
+ // Then spawn with Task(subagent_type="swarm/researcher", prompt="<from above>")
63987
+ \`\`\`
63988
+
63989
+ ### Phase 2: Knowledge Gathering
63990
+
63991
+ \`\`\`
63992
+ semantic-memory_find(query="<task keywords>", limit=5) # Past learnings
63993
+ cass_search(query="<task description>", limit=5) # Similar past tasks
63994
+ skills_list() # Available skills
63995
+ \`\`\`
63996
+
63997
+ ### Phase 3: Decompose
63998
+
63999
+ \`\`\`
64000
+ swarm_select_strategy(task="<task>")
64001
+ swarm_plan_prompt(task="<task>", context="<synthesized knowledge>")
64002
+ swarm_validate_decomposition(response="<CellTree JSON>")
64003
+ \`\`\`
64004
+
64005
+ ### Phase 4: Create Cells
64006
+
64007
+ \`hive_create_epic(epic_title="<task>", subtasks=[...])\`
64008
+
64009
+ ### Phase 5: DO NOT Reserve Files
64010
+
64011
+ > **⚠️ Coordinator NEVER reserves files.** Workers reserve their own files.
64012
+
64013
+ ### Phase 6: Spawn Workers
64014
+
64015
+ \`\`\`
64016
+ swarm_spawn_subtask(bead_id, epic_id, title, files, shared_context, project_path)
64017
+ Task(subagent_type="swarm/worker", prompt="<from above>")
64018
+ \`\`\`
64019
+
64020
+ ### Phase 7: MANDATORY Review Loop
64021
+
64022
+ **AFTER EVERY Task() RETURNS:**
64023
+
64024
+ 1. \`swarmmail_inbox()\` - Check for messages
64025
+ 2. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Generate review
64026
+ 3. Evaluate against epic goals
64027
+ 4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\`
64028
+
64029
+ **If needs_changes:**
64030
+ \`\`\`
64031
+ swarm_spawn_retry(bead_id, epic_id, original_prompt, attempt, issues, diff, files, project_path)
64032
+ // Spawn NEW worker with Task() using retry prompt
64033
+ // Max 3 attempts before marking task blocked
64034
+ \`\`\`
64035
+
64036
+ ### Phase 8: Complete
64037
+
64038
+ \`hive_sync()\` - Sync all cells to git
64039
+
64040
+ ## Strategy Reference
64041
+
64042
+ | Strategy | Best For | Keywords |
64043
+ | -------------- | ------------------------ | -------------------------------------- |
64044
+ | file-based | Refactoring, migrations | refactor, migrate, rename, update all |
64045
+ | feature-based | New features | add, implement, build, create, feature |
64046
+ | risk-based | Bug fixes, security | fix, bug, security, critical, urgent |
64047
+
63858
64048
  **You are the COORDINATOR. You orchestrate. You do NOT implement. Spawn workers.**
63859
64049
  `;
63860
64050
  var SWARM_DETECTION_FALLBACK = `## \uD83D\uDC1D Swarm Detection - Check Your Context
@@ -64792,6 +64982,161 @@ async function resetStorage() {
64792
64982
 
64793
64983
  // src/index.ts
64794
64984
  init_skills();
64985
+
64986
+ // src/eval-history.ts
64987
+ import * as fs2 from "node:fs";
64988
+ import * as path3 from "node:path";
64989
+ var DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
64990
+ var VARIANCE_THRESHOLD = 0.1;
64991
+ var BOOTSTRAP_THRESHOLD = 10;
64992
+ var STABILIZATION_THRESHOLD = 50;
64993
+ function getEvalHistoryPath(projectPath) {
64994
+ return path3.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
64995
+ }
64996
+ function ensureEvalHistoryDir(projectPath) {
64997
+ const historyPath = getEvalHistoryPath(projectPath);
64998
+ const dir = path3.dirname(historyPath);
64999
+ if (!fs2.existsSync(dir)) {
65000
+ fs2.mkdirSync(dir, { recursive: true });
65001
+ }
65002
+ }
65003
+ function recordEvalRun(projectPath, run) {
65004
+ ensureEvalHistoryDir(projectPath);
65005
+ const historyPath = getEvalHistoryPath(projectPath);
65006
+ const line = `${JSON.stringify(run)}
65007
+ `;
65008
+ fs2.appendFileSync(historyPath, line, "utf-8");
65009
+ }
65010
+ function readAllRecords(projectPath) {
65011
+ const historyPath = getEvalHistoryPath(projectPath);
65012
+ if (!fs2.existsSync(historyPath)) {
65013
+ return [];
65014
+ }
65015
+ const content = fs2.readFileSync(historyPath, "utf-8");
65016
+ const lines = content.trim().split(`
65017
+ `).filter(Boolean);
65018
+ return lines.map((line) => JSON.parse(line));
65019
+ }
65020
+ function getScoreHistory(projectPath, evalName) {
65021
+ return readAllRecords(projectPath).filter((run) => run.eval_name === evalName);
65022
+ }
65023
+ function calculateVariance(scores) {
65024
+ if (scores.length <= 1) {
65025
+ return 0;
65026
+ }
65027
+ const mean = scores.reduce((sum2, score) => sum2 + score, 0) / scores.length;
65028
+ const variance5 = scores.reduce((sum2, score) => {
65029
+ const deviation = score - mean;
65030
+ return sum2 + deviation * deviation;
65031
+ }, 0) / scores.length;
65032
+ return variance5;
65033
+ }
65034
+ function getPhase(projectPath, evalName) {
65035
+ const history = getScoreHistory(projectPath, evalName);
65036
+ if (history.length < BOOTSTRAP_THRESHOLD) {
65037
+ return "bootstrap";
65038
+ }
65039
+ if (history.length <= STABILIZATION_THRESHOLD) {
65040
+ return "stabilization";
65041
+ }
65042
+ const scores = history.map((run) => run.score);
65043
+ const variance5 = calculateVariance(scores);
65044
+ if (variance5 < VARIANCE_THRESHOLD) {
65045
+ return "production";
65046
+ }
65047
+ return "stabilization";
65048
+ }
65049
+ // src/eval-gates.ts
65050
+ var DEFAULT_THRESHOLDS = {
65051
+ stabilization: 0.1,
65052
+ production: 0.05
65053
+ };
65054
+ function calculateBaseline(history, currentScore) {
65055
+ if (history.length === 0) {
65056
+ return currentScore;
65057
+ }
65058
+ return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
65059
+ }
65060
+ function calculateRegression(baseline, currentScore) {
65061
+ if (baseline === 0) {
65062
+ return 0;
65063
+ }
65064
+ return (baseline - currentScore) / baseline;
65065
+ }
65066
+ function formatRegressionMessage(regressionPercent, baseline, currentScore) {
65067
+ return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
65068
+ }
65069
+ function checkGate(projectPath, evalName, currentScore, config2) {
65070
+ const thresholds = {
65071
+ stabilization: config2?.stabilizationThreshold ?? DEFAULT_THRESHOLDS.stabilization,
65072
+ production: config2?.productionThreshold ?? DEFAULT_THRESHOLDS.production
65073
+ };
65074
+ const phase = getPhase(projectPath, evalName);
65075
+ const history = getScoreHistory(projectPath, evalName);
65076
+ if (phase === "bootstrap") {
65077
+ return {
65078
+ passed: true,
65079
+ phase: "bootstrap",
65080
+ message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
65081
+ currentScore
65082
+ };
65083
+ }
65084
+ const baseline = calculateBaseline(history, currentScore);
65085
+ const regressionPercent = calculateRegression(baseline, currentScore);
65086
+ const regressionMsg = formatRegressionMessage(regressionPercent, baseline, currentScore);
65087
+ if (phase === "stabilization") {
65088
+ if (regressionPercent > thresholds.stabilization) {
65089
+ return {
65090
+ passed: true,
65091
+ phase: "stabilization",
65092
+ message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
65093
+ baseline,
65094
+ currentScore,
65095
+ regressionPercent
65096
+ };
65097
+ }
65098
+ if (history.length > 50) {
65099
+ const scores = history.map((run) => run.score);
65100
+ const variance5 = calculateVariance(scores);
65101
+ return {
65102
+ passed: true,
65103
+ phase: "stabilization",
65104
+ message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance5.toFixed(3)}) prevents production phase.`,
65105
+ baseline,
65106
+ currentScore,
65107
+ regressionPercent
65108
+ };
65109
+ }
65110
+ return {
65111
+ passed: true,
65112
+ phase: "stabilization",
65113
+ message: `Stabilization phase: ${regressionMsg} - acceptable`,
65114
+ baseline,
65115
+ currentScore,
65116
+ regressionPercent
65117
+ };
65118
+ }
65119
+ if (regressionPercent > thresholds.production) {
65120
+ return {
65121
+ passed: false,
65122
+ phase: "production",
65123
+ message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
65124
+ baseline,
65125
+ currentScore,
65126
+ regressionPercent
65127
+ };
65128
+ }
65129
+ return {
65130
+ passed: true,
65131
+ phase: "production",
65132
+ message: `Production phase: ${regressionMsg} - acceptable`,
65133
+ baseline,
65134
+ currentScore,
65135
+ regressionPercent
65136
+ };
65137
+ }
65138
+
65139
+ // src/index.ts
64795
65140
  var SwarmPlugin = async (input) => {
64796
65141
  const { $, directory, client } = input;
64797
65142
  setHiveWorkingDirectory(directory);
@@ -64858,7 +65203,7 @@ var SwarmPlugin = async (input) => {
64858
65203
  if (isInCoordinatorContext()) {
64859
65204
  const ctx = getCoordinatorContext();
64860
65205
  const violation = detectCoordinatorViolation({
64861
- sessionId: ctx.sessionId || "unknown",
65206
+ sessionId: input2.sessionID || "unknown",
64862
65207
  epicId: ctx.epicId || "unknown",
64863
65208
  toolName,
64864
65209
  toolArgs: output.args,
@@ -64972,6 +65317,7 @@ export {
64972
65317
  researchTools,
64973
65318
  requireTool,
64974
65319
  repoCrawlTools,
65320
+ recordEvalRun,
64975
65321
  parseFrontmatter,
64976
65322
  migrateBeadsToHive,
64977
65323
  mergeHistoricBeads,
@@ -65012,12 +65358,15 @@ export {
65012
65358
  getStatusChanges,
65013
65359
  getSkillsContextForSwarm,
65014
65360
  getSkill,
65361
+ getScoreHistory,
65015
65362
  getSchemaByName,
65363
+ getPhase,
65016
65364
  getMandateStorage,
65017
65365
  getLogger,
65018
65366
  getInstalledVersions,
65019
65367
  getHiveWorkingDirectory,
65020
65368
  getHiveAdapter,
65369
+ getEvalHistoryPath,
65021
65370
  getCellIdFromEvent,
65022
65371
  getBeadsWorkingDirectory,
65023
65372
  getBeadsAdapter,
@@ -65035,6 +65384,7 @@ export {
65035
65384
  evaluatePromotion,
65036
65385
  evaluateBatchPromotions,
65037
65386
  ensureHiveDirectory,
65387
+ ensureEvalHistoryDir,
65038
65388
  discoverSkills,
65039
65389
  discoverDocTools,
65040
65390
  src_default as default,
@@ -65050,8 +65400,10 @@ export {
65050
65400
  createAgentMailError,
65051
65401
  clearSessionState,
65052
65402
  checkTool,
65403
+ checkGate,
65053
65404
  checkBeadsMigrationNeeded,
65054
65405
  checkAllTools,
65406
+ calculateVariance,
65055
65407
  beads_update,
65056
65408
  beads_sync,
65057
65409
  beads_start,
@@ -65073,6 +65425,7 @@ export {
65073
65425
  VoteTypeSchema,
65074
65426
  VoteSchema,
65075
65427
  ValidationResultSchema,
65428
+ VARIANCE_THRESHOLD,
65076
65429
  UpdateSwarmContextArgsSchema,
65077
65430
  TaskDecompositionSchema,
65078
65431
  SwarmStrategySchema,
@@ -65092,6 +65445,7 @@ export {
65092
65445
  SWARM_COMPACTION_CONTEXT,
65093
65446
  SUBTASK_PROMPT_V2,
65094
65447
  STRATEGIES,
65448
+ STABILIZATION_THRESHOLD,
65095
65449
  RepoCrawlError,
65096
65450
  QuerySwarmContextsArgsSchema,
65097
65451
  QueryMandatesArgsSchema,
@@ -65114,10 +65468,12 @@ export {
65114
65468
  DecompositionError,
65115
65469
  DecomposedSubtaskSchema,
65116
65470
  DecomposeArgsSchema,
65471
+ DEFAULT_THRESHOLDS,
65117
65472
  DEFAULT_STORAGE_CONFIG,
65118
65473
  DEFAULT_MANDATE_STORAGE_CONFIG,
65119
65474
  DEFAULT_MANDATE_DECAY_CONFIG,
65120
65475
  DEFAULT_GUARDRAIL_CONFIG,
65476
+ DEFAULT_EVAL_HISTORY_PATH,
65121
65477
  DEFAULT_CRITERIA,
65122
65478
  CriterionEvaluationSchema,
65123
65479
  CreateSwarmContextArgsSchema,
@@ -65185,6 +65541,7 @@ export {
65185
65541
  BeadAssignedEventSchema,
65186
65542
  BaseCellEventSchema,
65187
65543
  BaseBeadEventSchema,
65544
+ BOOTSTRAP_THRESHOLD,
65188
65545
  AgentProgressSchema,
65189
65546
  AgentMailNotInitializedError,
65190
65547
  AgentMailError