npm - opencode-swarm-plugin - Versions diffs - 0.44.0 → 0.44.1 - Mend

opencode-swarm-plugin 0.44.0 → 0.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

package/bin/swarm.serve.test.ts +6 -4
package/bin/swarm.ts +16 -10
package/dist/compaction-prompt-scoring.js +139 -0
package/dist/eval-capture.js +12811 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.js +7644 -62599
package/dist/plugin.js +23766 -78721
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/package.json +17 -5
package/.changeset/swarm-insights-data-layer.md +0 -63
package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
package/.hive/analysis/session-data-quality-audit.md +0 -320
package/.hive/eval-results.json +0 -483
package/.hive/issues.jsonl +0 -138
package/.hive/memories.jsonl +0 -729
package/.opencode/eval-history.jsonl +0 -327
package/.turbo/turbo-build.log +0 -9
package/CHANGELOG.md +0 -2286
package/SCORER-ANALYSIS.md +0 -598
package/docs/analysis/subagent-coordination-patterns.md +0 -902
package/docs/analysis-socratic-planner-pattern.md +0 -504
package/docs/planning/ADR-001-monorepo-structure.md +0 -171
package/docs/planning/ADR-002-package-extraction.md +0 -393
package/docs/planning/ADR-003-performance-improvements.md +0 -451
package/docs/planning/ADR-004-message-queue-features.md +0 -187
package/docs/planning/ADR-005-devtools-observability.md +0 -202
package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
package/docs/planning/ROADMAP.md +0 -368
package/docs/semantic-memory-cli-syntax.md +0 -123
package/docs/swarm-mail-architecture.md +0 -1147
package/docs/testing/context-recovery-test.md +0 -470
package/evals/ARCHITECTURE.md +0 -1189
package/evals/README.md +0 -768
package/evals/compaction-prompt.eval.ts +0 -149
package/evals/compaction-resumption.eval.ts +0 -289
package/evals/coordinator-behavior.eval.ts +0 -307
package/evals/coordinator-session.eval.ts +0 -154
package/evals/evalite.config.ts.bak +0 -15
package/evals/example.eval.ts +0 -31
package/evals/fixtures/cass-baseline.ts +0 -217
package/evals/fixtures/compaction-cases.ts +0 -350
package/evals/fixtures/compaction-prompt-cases.ts +0 -311
package/evals/fixtures/coordinator-sessions.ts +0 -328
package/evals/fixtures/decomposition-cases.ts +0 -105
package/evals/lib/compaction-loader.test.ts +0 -248
package/evals/lib/compaction-loader.ts +0 -320
package/evals/lib/data-loader.evalite-test.ts +0 -289
package/evals/lib/data-loader.test.ts +0 -345
package/evals/lib/data-loader.ts +0 -281
package/evals/lib/llm.ts +0 -115
package/evals/scorers/compaction-prompt-scorers.ts +0 -145
package/evals/scorers/compaction-scorers.ts +0 -305
package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
package/evals/scorers/coordinator-discipline.ts +0 -325
package/evals/scorers/index.test.ts +0 -146
package/evals/scorers/index.ts +0 -328
package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
package/evals/scorers/outcome-scorers.ts +0 -349
package/evals/swarm-decomposition.eval.ts +0 -121
package/examples/commands/swarm.md +0 -745
package/examples/plugin-wrapper-template.ts +0 -2515
package/examples/skills/hive-workflow/SKILL.md +0 -212
package/examples/skills/skill-creator/SKILL.md +0 -223
package/examples/skills/swarm-coordination/SKILL.md +0 -292
package/global-skills/cli-builder/SKILL.md +0 -344
package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
package/global-skills/learning-systems/SKILL.md +0 -644
package/global-skills/skill-creator/LICENSE.txt +0 -202
package/global-skills/skill-creator/SKILL.md +0 -352
package/global-skills/skill-creator/references/output-patterns.md +0 -82
package/global-skills/skill-creator/references/workflows.md +0 -28
package/global-skills/swarm-coordination/SKILL.md +0 -995
package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
package/global-skills/swarm-coordination/references/strategies.md +0 -138
package/global-skills/system-design/SKILL.md +0 -213
package/global-skills/testing-patterns/SKILL.md +0 -430
package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
package/opencode-swarm-plugin-0.30.7.tgz +0 -0
package/opencode-swarm-plugin-0.31.0.tgz +0 -0
package/scripts/cleanup-test-memories.ts +0 -346
package/scripts/init-skill.ts +0 -222
package/scripts/migrate-unknown-sessions.ts +0 -349
package/scripts/validate-skill.ts +0 -204
package/src/agent-mail.ts +0 -1724
package/src/anti-patterns.test.ts +0 -1167
package/src/anti-patterns.ts +0 -448
package/src/compaction-capture.integration.test.ts +0 -257
package/src/compaction-hook.test.ts +0 -838
package/src/compaction-hook.ts +0 -1204
package/src/compaction-observability.integration.test.ts +0 -139
package/src/compaction-observability.test.ts +0 -187
package/src/compaction-observability.ts +0 -324
package/src/compaction-prompt-scorers.test.ts +0 -475
package/src/compaction-prompt-scoring.ts +0 -300
package/src/contributor-tools.test.ts +0 -133
package/src/contributor-tools.ts +0 -201
package/src/dashboard.test.ts +0 -611
package/src/dashboard.ts +0 -462
package/src/error-enrichment.test.ts +0 -403
package/src/error-enrichment.ts +0 -219
package/src/eval-capture.test.ts +0 -1015
package/src/eval-capture.ts +0 -929
package/src/eval-gates.test.ts +0 -306
package/src/eval-gates.ts +0 -218
package/src/eval-history.test.ts +0 -508
package/src/eval-history.ts +0 -214
package/src/eval-learning.test.ts +0 -378
package/src/eval-learning.ts +0 -360
package/src/eval-runner.test.ts +0 -223
package/src/eval-runner.ts +0 -402
package/src/export-tools.test.ts +0 -476
package/src/export-tools.ts +0 -257
package/src/hive.integration.test.ts +0 -2241
package/src/hive.ts +0 -1628
package/src/index.ts +0 -940
package/src/learning.integration.test.ts +0 -1815
package/src/learning.ts +0 -1079
package/src/logger.test.ts +0 -189
package/src/logger.ts +0 -135
package/src/mandate-promotion.test.ts +0 -473
package/src/mandate-promotion.ts +0 -239
package/src/mandate-storage.integration.test.ts +0 -601
package/src/mandate-storage.test.ts +0 -578
package/src/mandate-storage.ts +0 -794
package/src/mandates.ts +0 -540
package/src/memory-tools.test.ts +0 -195
package/src/memory-tools.ts +0 -344
package/src/memory.integration.test.ts +0 -334
package/src/memory.test.ts +0 -158
package/src/memory.ts +0 -527
package/src/model-selection.test.ts +0 -188
package/src/model-selection.ts +0 -68
package/src/observability-tools.test.ts +0 -359
package/src/observability-tools.ts +0 -871
package/src/output-guardrails.test.ts +0 -438
package/src/output-guardrails.ts +0 -381
package/src/pattern-maturity.test.ts +0 -1160
package/src/pattern-maturity.ts +0 -525
package/src/planning-guardrails.test.ts +0 -491
package/src/planning-guardrails.ts +0 -438
package/src/plugin.ts +0 -23
package/src/post-compaction-tracker.test.ts +0 -251
package/src/post-compaction-tracker.ts +0 -237
package/src/query-tools.test.ts +0 -636
package/src/query-tools.ts +0 -324
package/src/rate-limiter.integration.test.ts +0 -466
package/src/rate-limiter.ts +0 -774
package/src/replay-tools.test.ts +0 -496
package/src/replay-tools.ts +0 -240
package/src/repo-crawl.integration.test.ts +0 -441
package/src/repo-crawl.ts +0 -610
package/src/schemas/cell-events.test.ts +0 -347
package/src/schemas/cell-events.ts +0 -807
package/src/schemas/cell.ts +0 -257
package/src/schemas/evaluation.ts +0 -166
package/src/schemas/index.test.ts +0 -199
package/src/schemas/index.ts +0 -286
package/src/schemas/mandate.ts +0 -232
package/src/schemas/swarm-context.ts +0 -115
package/src/schemas/task.ts +0 -161
package/src/schemas/worker-handoff.test.ts +0 -302
package/src/schemas/worker-handoff.ts +0 -131
package/src/sessions/agent-discovery.test.ts +0 -137
package/src/sessions/agent-discovery.ts +0 -112
package/src/sessions/index.ts +0 -15
package/src/skills.integration.test.ts +0 -1192
package/src/skills.test.ts +0 -643
package/src/skills.ts +0 -1549
package/src/storage.integration.test.ts +0 -341
package/src/storage.ts +0 -884
package/src/structured.integration.test.ts +0 -817
package/src/structured.test.ts +0 -1046
package/src/structured.ts +0 -762
package/src/swarm-decompose.test.ts +0 -188
package/src/swarm-decompose.ts +0 -1302
package/src/swarm-deferred.integration.test.ts +0 -157
package/src/swarm-deferred.test.ts +0 -38
package/src/swarm-insights.test.ts +0 -214
package/src/swarm-insights.ts +0 -459
package/src/swarm-mail.integration.test.ts +0 -970
package/src/swarm-mail.ts +0 -739
package/src/swarm-orchestrate.integration.test.ts +0 -282
package/src/swarm-orchestrate.test.ts +0 -548
package/src/swarm-orchestrate.ts +0 -3084
package/src/swarm-prompts.test.ts +0 -1270
package/src/swarm-prompts.ts +0 -2077
package/src/swarm-research.integration.test.ts +0 -701
package/src/swarm-research.test.ts +0 -698
package/src/swarm-research.ts +0 -472
package/src/swarm-review.integration.test.ts +0 -285
package/src/swarm-review.test.ts +0 -879
package/src/swarm-review.ts +0 -709
package/src/swarm-strategies.ts +0 -407
package/src/swarm-worktree.test.ts +0 -501
package/src/swarm-worktree.ts +0 -575
package/src/swarm.integration.test.ts +0 -2377
package/src/swarm.ts +0 -38
package/src/tool-adapter.integration.test.ts +0 -1221
package/src/tool-availability.ts +0 -461
package/tsconfig.json +0 -28

package/src/eval-history.ts DELETED Viewed

@@ -1,214 +0,0 @@
-/**
- * Eval History Tracker - Progressive gates based on run history
- *
- * Tracks eval run scores over time and calculates the current phase:
- * - Bootstrap (<10 runs): No gates, just collect data
- * - Stabilization (10-50 runs): Warn on >10% regression
- * - Production (>50 runs + variance <0.1): Fail on >5% regression
- *
- * @module eval-history
- */
-import * as fs from "node:fs";
-import * as path from "node:path";
-/**
- * Progressive phases based on run count and variance
- */
-export type Phase = "bootstrap" | "stabilization" | "production";
-/**
- * Single eval run record
- */
-export interface EvalRunRecord {
-  /** ISO-8601 timestamp */
-  timestamp: string;
-  /** Name of the eval (e.g., "swarm-decomposition") */
-  eval_name: string;
-  /** Score (0-1 range typically) */
-  score: number;
-  /** Run count (monotonically increasing per eval) */
-  run_count: number;
-}
-/**
- * Default path for eval history
- */
-export const DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
-/**
- * Variance threshold for production phase
- */
-export const VARIANCE_THRESHOLD = 0.1;
-/**
- * Run count thresholds for phase transitions
- */
-export const BOOTSTRAP_THRESHOLD = 10;
-export const STABILIZATION_THRESHOLD = 50;
-/**
- * Get the eval history file path
- */
-export function getEvalHistoryPath(projectPath: string): string {
-  return path.join(projectPath, DEFAULT_EVAL_HISTORY_PATH);
-}
-/**
- * Ensure the eval history directory exists
- */
-export function ensureEvalHistoryDir(projectPath: string): void {
-  const historyPath = getEvalHistoryPath(projectPath);
-  const dir = path.dirname(historyPath);
-  if (!fs.existsSync(dir)) {
-    fs.mkdirSync(dir, { recursive: true });
-  }
-}
-/**
- * Record an eval run to JSONL history
- *
- * Appends atomically to `.opencode/eval-history.jsonl`. Each line is a complete JSON object
- * representing one eval run (timestamp, eval name, score, run count).
- *
- * **Auto-creates directory** if `.opencode/` doesn't exist.
- *
- * **Thread-safe**: Uses `appendFileSync` for atomic writes (safe for concurrent eval runs).
- *
- * **Integration**: Called automatically by evalite runner after each eval completes.
- * Also callable manually for custom eval tracking.
- *
- * @param projectPath - Absolute path to project root
- * @param run - Eval run record with timestamp, eval_name, score, run_count
- *
- * @example
- * ```typescript
- * import { recordEvalRun } from "./eval-history.js";
- *
- * recordEvalRun("/path/to/project", {
- *   timestamp: new Date().toISOString(),
- *   eval_name: "swarm-decomposition",
- *   score: 0.92,
- *   run_count: 15,
- * });
- * ```
- */
-export function recordEvalRun(
-  projectPath: string,
-  run: EvalRunRecord,
-): void {
-  ensureEvalHistoryDir(projectPath);
-  const historyPath = getEvalHistoryPath(projectPath);
-  const line = `${JSON.stringify(run)}\n`;
-  fs.appendFileSync(historyPath, line, "utf-8");
-}
-/**
- * Read all eval run records from JSONL file
- *
- * Internal helper for parsing the history file
- */
-function readAllRecords(projectPath: string): EvalRunRecord[] {
-  const historyPath = getEvalHistoryPath(projectPath);
-  if (!fs.existsSync(historyPath)) {
-    return [];
-  }
-  const content = fs.readFileSync(historyPath, "utf-8");
-  const lines = content.trim().split("\n").filter(Boolean);
-  return lines.map((line) => JSON.parse(line) as EvalRunRecord);
-}
-/**
- * Get score history for a specific eval
- *
- * Returns runs in chronological order (oldest first)
- */
-export function getScoreHistory(
-  projectPath: string,
-  evalName: string,
-): EvalRunRecord[] {
-  return readAllRecords(projectPath).filter(
-    (run) => run.eval_name === evalName,
-  );
-}
-/**
- * Calculate statistical variance of scores
- *
- * Variance = mean of squared deviations from the mean
- * Formula: Σ((x - μ)²) / n
- */
-export function calculateVariance(scores: number[]): number {
-  if (scores.length <= 1) {
-    return 0;
-  }
-  const mean = scores.reduce((sum, score) => sum + score, 0) / scores.length;
-  const variance = scores.reduce((sum, score) => {
-    const deviation = score - mean;
-    return sum + deviation * deviation;
-  }, 0) / scores.length;
-  return variance;
-}
-/**
- * Get the current phase for an eval based on run count and score variance
- *
- * Progressive phase logic ensures quality gates adapt to data maturity:
- *
- * - **Bootstrap (<10 runs)**: No gates, just collect baseline data
- * - **Stabilization (10-50 runs)**: Warn on >10% regression (but pass)
- * - **Production (>50 runs AND variance <0.1)**: Fail on >5% regression
- *
- * **Variance check**: If >50 runs but variance ≥0.1, stays in stabilization.
- * This prevents premature production gates when scores are still unstable.
- *
- * **Why variance matters**: An eval with wildly fluctuating scores isn't ready for
- * strict gates. Variance threshold (0.1) ensures the eval is consistent before
- * enforcing production-level quality control.
- *
- * @param projectPath - Absolute path to project root (contains `.opencode/eval-history.jsonl`)
- * @param evalName - Name of the eval (e.g., "swarm-decomposition")
- * @returns Current phase: "bootstrap" | "stabilization" | "production"
- *
- * @example
- * ```typescript
- * import { getPhase } from "./eval-history.js";
- *
- * const phase = getPhase("/path/to/project", "swarm-decomposition");
- *
- * if (phase === "production") {
- *   console.log("🚀 Production phase - strict gates enabled");
- * } else if (phase === "stabilization") {
- *   console.log("⚙️ Stabilization phase - warnings only");
- * } else {
- *   console.log("🌱 Bootstrap phase - collecting data");
- * }
- * ```
- */
-export function getPhase(projectPath: string, evalName: string): Phase {
-  const history = getScoreHistory(projectPath, evalName);
-  if (history.length < BOOTSTRAP_THRESHOLD) {
-    return "bootstrap";
-  }
-  if (history.length <= STABILIZATION_THRESHOLD) {
-    return "stabilization";
-  }
-  // >50 runs - check variance
-  const scores = history.map((run) => run.score);
-  const variance = calculateVariance(scores);
-  if (variance < VARIANCE_THRESHOLD) {
-    return "production";
-  }
-  // High variance - stay in stabilization
-  return "stabilization";
-}

package/src/eval-learning.test.ts DELETED Viewed

@@ -1,378 +0,0 @@
-/**
- * Tests for eval-learning.ts - Eval-to-Learning Feedback Loop
- *
- * TDD RED phase: Write failing tests first, then implement.
- *
- * Core behavior:
- * - Detect significant eval score drops (>15% from rolling average)
- * - Store failure context to semantic-memory with structured tags
- * - Ignore minor fluctuations (<15% variance)
- * - Configurable threshold for sensitivity tuning
- */
-import { describe, test, expect, beforeEach, mock } from "bun:test";
-import {
-	learnFromEvalFailure,
-	type EvalLearningConfig,
-	calculateRollingAverage,
-	isSignificantDrop,
-	formatFailureContext,
-	createLearningConfig,
-	DEFAULT_EVAL_LEARNING_CONFIG,
-} from "./eval-learning";
-import type { EvalRunRecord } from "./eval-history";
-import type { MemoryAdapter } from "./memory-tools";
-// ============================================================================
-// Mock Memory Adapter
-// ============================================================================
-/**
- * Create a mock memory adapter for testing
- *
- * Tracks store() calls without hitting real storage
- */
-function createMockMemoryAdapter(): MemoryAdapter {
-	const storedMemories: Array<{
-		information: string;
-		tags?: string;
-		metadata?: string;
-	}> = [];
-	return {
-		store: mock(async (args) => {
-			storedMemories.push(args);
-			return {
-				id: `mem_${Date.now()}`,
-				message: "Stored successfully",
-			};
-		}),
-		find: mock(async () => ({ results: [], total: 0 })),
-		get: mock(async () => null),
-		remove: mock(async () => ({ success: true, message: "Removed" })),
-		validate: mock(async () => ({ success: true, message: "Validated" })),
-		list: mock(async () => []),
-		stats: mock(async () => ({
-			total_memories: 0,
-			total_embeddings: 0,
-			collections: {},
-		})),
-		checkHealth: mock(async () => ({ ready: true, message: "OK" })),
-		getStoredMemories: () => storedMemories,
-	} as any;
-}
-// ============================================================================
-// Tests: Rolling Average Calculation
-// ============================================================================
-describe("calculateRollingAverage", () => {
-	test("returns 0 for empty history", () => {
-		const avg = calculateRollingAverage([]);
-		expect(avg).toBe(0);
-	});
-	test("returns single score for history of 1", () => {
-		const history: EvalRunRecord[] = [
-			{
-				eval_name: "test",
-				score: 0.85,
-				timestamp: "2024-12-01T00:00:00Z",
-				run_count: 1,
-			},
-		];
-		const avg = calculateRollingAverage(history);
-		expect(avg).toBe(0.85);
-	});
-	test("calculates average of last N runs (default 5)", () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.8, timestamp: "2024-12-01", run_count: 1 },
-			{ eval_name: "test", score: 0.82, timestamp: "2024-12-02", run_count: 2 },
-			{ eval_name: "test", score: 0.84, timestamp: "2024-12-03", run_count: 3 },
-			{ eval_name: "test", score: 0.86, timestamp: "2024-12-04", run_count: 4 },
-			{ eval_name: "test", score: 0.88, timestamp: "2024-12-05", run_count: 5 },
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-06", run_count: 6 },
-		];
-		const avg = calculateRollingAverage(history);
-		// Last 5: 0.82, 0.84, 0.86, 0.88, 0.9 => avg = 0.86
-		expect(avg).toBeCloseTo(0.86, 2);
-	});
-	test("uses custom window size", () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.8, timestamp: "2024-12-01", run_count: 1 },
-			{ eval_name: "test", score: 0.85, timestamp: "2024-12-02", run_count: 2 },
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-03", run_count: 3 },
-		];
-		const avg = calculateRollingAverage(history, 2);
-		// Last 2: 0.85, 0.9 => avg = 0.875
-		expect(avg).toBeCloseTo(0.875, 3);
-	});
-	test("handles window larger than history", () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.8, timestamp: "2024-12-01", run_count: 1 },
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-02", run_count: 2 },
-		];
-		const avg = calculateRollingAverage(history, 10);
-		// Uses all available: (0.8 + 0.9) / 2 = 0.85
-		expect(avg).toBeCloseTo(0.85, 2);
-	});
-});
-// ============================================================================
-// Tests: Significant Drop Detection
-// ============================================================================
-describe("isSignificantDrop", () => {
-	test("returns false when current equals baseline", () => {
-		expect(isSignificantDrop(0.85, 0.85)).toBe(false);
-	});
-	test("returns false when current is higher than baseline", () => {
-		expect(isSignificantDrop(0.9, 0.85)).toBe(false);
-	});
-	test("returns false for drop below threshold (default 15%)", () => {
-		// Drop of 10%: 0.85 -> 0.765 (90% of 0.85)
-		expect(isSignificantDrop(0.765, 0.85)).toBe(false);
-	});
-	test("returns true for drop at threshold (15%)", () => {
-		// Drop of exactly 15%: 0.85 -> 0.7225 (85% of 0.85)
-		// Use slightly lower to account for floating point precision
-		expect(isSignificantDrop(0.722, 0.85)).toBe(true);
-	});
-	test("returns true for drop above threshold (20%)", () => {
-		// Drop of 20%: 0.85 -> 0.68 (80% of 0.85)
-		expect(isSignificantDrop(0.68, 0.85)).toBe(true);
-	});
-	test("uses custom threshold", () => {
-		// Drop of 8%: 0.85 -> 0.782 (92% of 0.85)
-		// Default (15%) => false
-		expect(isSignificantDrop(0.782, 0.85)).toBe(false);
-		// Custom threshold (5%) => true
-		expect(isSignificantDrop(0.782, 0.85, 0.05)).toBe(true);
-	});
-	test("returns false when baseline is 0 (avoid division by zero)", () => {
-		expect(isSignificantDrop(0, 0)).toBe(false);
-		expect(isSignificantDrop(0.5, 0)).toBe(false);
-	});
-});
-// ============================================================================
-// Tests: Failure Context Formatting
-// ============================================================================
-describe("formatFailureContext", () => {
-	test("includes eval name, scores, and drop percentage", () => {
-		const context = formatFailureContext("compaction-test", 0.68, 0.85);
-		expect(context).toContain("compaction-test");
-		expect(context).toContain("0.68");
-		expect(context).toContain("0.85");
-		expect(context).toContain("20.0%"); // (0.85 - 0.68) / 0.85 = 20%
-	});
-	test("includes optional scorer context", () => {
-		const scorerContext = "violationCount scorer failed: 5 violations detected";
-		const context = formatFailureContext(
-			"coordinator-behavior",
-			0.5,
-			0.8,
-			scorerContext,
-		);
-		expect(context).toContain("coordinator-behavior");
-		expect(context).toContain(scorerContext);
-	});
-	test("handles baseline of 0 gracefully", () => {
-		const context = formatFailureContext("test", 0.5, 0);
-		expect(context).not.toContain("NaN");
-		expect(context).not.toContain("Infinity");
-	});
-});
-// ============================================================================
-// Tests: Main learnFromEvalFailure Function
-// ============================================================================
-describe("learnFromEvalFailure", () => {
-	let mockAdapter: MemoryAdapter;
-	beforeEach(() => {
-		mockAdapter = createMockMemoryAdapter();
-	});
-	test("stores memory when score drops significantly", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.85, timestamp: "2024-12-01", run_count: 1 },
-			{ eval_name: "test", score: 0.84, timestamp: "2024-12-02", run_count: 2 },
-			{ eval_name: "test", score: 0.86, timestamp: "2024-12-03", run_count: 3 },
-			{ eval_name: "test", score: 0.85, timestamp: "2024-12-04", run_count: 4 },
-			{ eval_name: "test", score: 0.84, timestamp: "2024-12-05", run_count: 5 },
-		];
-		const currentScore = 0.68; // Drop of ~20%
-		const result = await learnFromEvalFailure(
-			"test-eval",
-			currentScore,
-			history,
-			mockAdapter,
-		);
-		expect(result.triggered).toBe(true);
-		expect(result.baseline).toBeCloseTo(0.848, 2);
-		expect(result.drop_percentage).toBeCloseTo(0.198, 2); // ~20%
-		// Verify memory was stored
-		expect(mockAdapter.store).toHaveBeenCalledTimes(1);
-		const storedMemory = (mockAdapter as any).getStoredMemories()[0];
-		expect(storedMemory.information).toContain("test-eval");
-		expect(storedMemory.information).toContain("0.68");
-		expect(storedMemory.tags).toContain("eval-failure");
-		expect(storedMemory.tags).toContain("test-eval");
-	});
-	test("does not store memory for minor fluctuations", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.85, timestamp: "2024-12-01", run_count: 1 },
-			{ eval_name: "test", score: 0.84, timestamp: "2024-12-02", run_count: 2 },
-		];
-		const currentScore = 0.8; // Drop of ~5%, below 15% threshold
-		const result = await learnFromEvalFailure(
-			"test-eval",
-			currentScore,
-			history,
-			mockAdapter,
-		);
-		expect(result.triggered).toBe(false);
-		expect(mockAdapter.store).not.toHaveBeenCalled();
-	});
-	test("includes scorer context in memory if provided", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-01", run_count: 1 },
-		];
-		const currentScore = 0.7; // Drop of ~22%
-		const scorerContext = "violationCount: 8 protocol violations";
-		await learnFromEvalFailure(
-			"coordinator-behavior",
-			currentScore,
-			history,
-			mockAdapter,
-			{ scorerContext },
-		);
-		const storedMemory = (mockAdapter as any).getStoredMemories()[0];
-		expect(storedMemory.information).toContain(scorerContext);
-	});
-	test("uses custom threshold when provided", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-01", run_count: 1 },
-		];
-		const currentScore = 0.85; // Drop of ~5.5%
-		const customConfig: EvalLearningConfig = {
-			...DEFAULT_EVAL_LEARNING_CONFIG,
-			dropThreshold: 0.05, // 5% threshold
-		};
-		const result = await learnFromEvalFailure(
-			"test-eval",
-			currentScore,
-			history,
-			mockAdapter,
-			{ config: customConfig },
-		);
-		expect(result.triggered).toBe(true);
-		expect(mockAdapter.store).toHaveBeenCalledTimes(1);
-	});
-	test("handles empty history gracefully", async () => {
-		const result = await learnFromEvalFailure(
-			"test-eval",
-			0.5,
-			[],
-			mockAdapter,
-		);
-		expect(result.triggered).toBe(false);
-		expect(result.baseline).toBe(0);
-		expect(mockAdapter.store).not.toHaveBeenCalled();
-	});
-	test("generates structured tags for semantic search", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-01", run_count: 1 },
-		];
-		const currentScore = 0.7; // Significant drop
-		await learnFromEvalFailure(
-			"compaction-test",
-			currentScore,
-			history,
-			mockAdapter,
-		);
-		const storedMemory = (mockAdapter as any).getStoredMemories()[0];
-		const tags = storedMemory.tags;
-		expect(tags).toContain("eval-failure");
-		expect(tags).toContain("compaction-test");
-		expect(tags).toContain("regression");
-	});
-	test("stores metadata for future prompt generation", async () => {
-		const history: EvalRunRecord[] = [
-			{ eval_name: "test", score: 0.9, timestamp: "2024-12-01", run_count: 1 },
-		];
-		const currentScore = 0.7;
-		await learnFromEvalFailure("test-eval", currentScore, history, mockAdapter);
-		const storedMemory = (mockAdapter as any).getStoredMemories()[0];
-		expect(storedMemory.metadata).toBeDefined();
-		const metadata = JSON.parse(storedMemory.metadata!);
-		expect(metadata.eval_name).toBe("test-eval");
-		expect(metadata.baseline_score).toBeCloseTo(0.9, 2);
-		expect(metadata.current_score).toBe(0.7);
-		expect(metadata.drop_percentage).toBeCloseTo(0.222, 2);
-	});
-});
-// ============================================================================
-// Tests: Convenience Helpers
-// ============================================================================
-describe("createLearningConfig", () => {
-	test("creates config with custom threshold", () => {
-		const config = createLearningConfig(0.1);
-		expect(config.dropThreshold).toBe(0.1);
-		expect(config.windowSize).toBe(DEFAULT_EVAL_LEARNING_CONFIG.windowSize);
-	});
-	test("accepts custom window size", () => {
-		const config = createLearningConfig(0.2, 10);
-		expect(config.dropThreshold).toBe(0.2);
-		expect(config.windowSize).toBe(10);
-	});
-});