npm - opencode-swarm-plugin - Versions diffs - 0.43.0 → 0.44.1 - Mend

opencode-swarm-plugin 0.43.0 → 0.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

package/bin/cass.characterization.test.ts +422 -0
package/bin/swarm.serve.test.ts +6 -4
package/bin/swarm.test.ts +68 -0
package/bin/swarm.ts +81 -8
package/dist/compaction-prompt-scoring.js +139 -0
package/dist/contributor-tools.d.ts +42 -0
package/dist/contributor-tools.d.ts.map +1 -0
package/dist/eval-capture.js +12811 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +12 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +7728 -62590
package/dist/plugin.js +23833 -78695
package/dist/sessions/agent-discovery.d.ts +59 -0
package/dist/sessions/agent-discovery.d.ts.map +1 -0
package/dist/sessions/index.d.ts +10 -0
package/dist/sessions/index.d.ts.map +1 -0
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/package.json +17 -5
package/.changeset/swarm-insights-data-layer.md +0 -63
package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
package/.hive/analysis/session-data-quality-audit.md +0 -320
package/.hive/eval-results.json +0 -483
package/.hive/issues.jsonl +0 -138
package/.hive/memories.jsonl +0 -729
package/.opencode/eval-history.jsonl +0 -327
package/.turbo/turbo-build.log +0 -9
package/CHANGELOG.md +0 -2255
package/SCORER-ANALYSIS.md +0 -598
package/docs/analysis/subagent-coordination-patterns.md +0 -902
package/docs/analysis-socratic-planner-pattern.md +0 -504
package/docs/planning/ADR-001-monorepo-structure.md +0 -171
package/docs/planning/ADR-002-package-extraction.md +0 -393
package/docs/planning/ADR-003-performance-improvements.md +0 -451
package/docs/planning/ADR-004-message-queue-features.md +0 -187
package/docs/planning/ADR-005-devtools-observability.md +0 -202
package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
package/docs/planning/ROADMAP.md +0 -368
package/docs/semantic-memory-cli-syntax.md +0 -123
package/docs/swarm-mail-architecture.md +0 -1147
package/docs/testing/context-recovery-test.md +0 -470
package/evals/ARCHITECTURE.md +0 -1189
package/evals/README.md +0 -768
package/evals/compaction-prompt.eval.ts +0 -149
package/evals/compaction-resumption.eval.ts +0 -289
package/evals/coordinator-behavior.eval.ts +0 -307
package/evals/coordinator-session.eval.ts +0 -154
package/evals/evalite.config.ts.bak +0 -15
package/evals/example.eval.ts +0 -31
package/evals/fixtures/compaction-cases.ts +0 -350
package/evals/fixtures/compaction-prompt-cases.ts +0 -311
package/evals/fixtures/coordinator-sessions.ts +0 -328
package/evals/fixtures/decomposition-cases.ts +0 -105
package/evals/lib/compaction-loader.test.ts +0 -248
package/evals/lib/compaction-loader.ts +0 -320
package/evals/lib/data-loader.evalite-test.ts +0 -289
package/evals/lib/data-loader.test.ts +0 -345
package/evals/lib/data-loader.ts +0 -281
package/evals/lib/llm.ts +0 -115
package/evals/scorers/compaction-prompt-scorers.ts +0 -145
package/evals/scorers/compaction-scorers.ts +0 -305
package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
package/evals/scorers/coordinator-discipline.ts +0 -325
package/evals/scorers/index.test.ts +0 -146
package/evals/scorers/index.ts +0 -328
package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
package/evals/scorers/outcome-scorers.ts +0 -349
package/evals/swarm-decomposition.eval.ts +0 -121
package/examples/commands/swarm.md +0 -745
package/examples/plugin-wrapper-template.ts +0 -2426
package/examples/skills/hive-workflow/SKILL.md +0 -212
package/examples/skills/skill-creator/SKILL.md +0 -223
package/examples/skills/swarm-coordination/SKILL.md +0 -292
package/global-skills/cli-builder/SKILL.md +0 -344
package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
package/global-skills/learning-systems/SKILL.md +0 -644
package/global-skills/skill-creator/LICENSE.txt +0 -202
package/global-skills/skill-creator/SKILL.md +0 -352
package/global-skills/skill-creator/references/output-patterns.md +0 -82
package/global-skills/skill-creator/references/workflows.md +0 -28
package/global-skills/swarm-coordination/SKILL.md +0 -995
package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
package/global-skills/swarm-coordination/references/strategies.md +0 -138
package/global-skills/system-design/SKILL.md +0 -213
package/global-skills/testing-patterns/SKILL.md +0 -430
package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
package/opencode-swarm-plugin-0.30.7.tgz +0 -0
package/opencode-swarm-plugin-0.31.0.tgz +0 -0
package/scripts/cleanup-test-memories.ts +0 -346
package/scripts/init-skill.ts +0 -222
package/scripts/migrate-unknown-sessions.ts +0 -349
package/scripts/validate-skill.ts +0 -204
package/src/agent-mail.ts +0 -1724
package/src/anti-patterns.test.ts +0 -1167
package/src/anti-patterns.ts +0 -448
package/src/compaction-capture.integration.test.ts +0 -257
package/src/compaction-hook.test.ts +0 -838
package/src/compaction-hook.ts +0 -1204
package/src/compaction-observability.integration.test.ts +0 -139
package/src/compaction-observability.test.ts +0 -187
package/src/compaction-observability.ts +0 -324
package/src/compaction-prompt-scorers.test.ts +0 -475
package/src/compaction-prompt-scoring.ts +0 -300
package/src/dashboard.test.ts +0 -611
package/src/dashboard.ts +0 -462
package/src/error-enrichment.test.ts +0 -403
package/src/error-enrichment.ts +0 -219
package/src/eval-capture.test.ts +0 -1015
package/src/eval-capture.ts +0 -929
package/src/eval-gates.test.ts +0 -306
package/src/eval-gates.ts +0 -218
package/src/eval-history.test.ts +0 -508
package/src/eval-history.ts +0 -214
package/src/eval-learning.test.ts +0 -378
package/src/eval-learning.ts +0 -360
package/src/eval-runner.test.ts +0 -223
package/src/eval-runner.ts +0 -402
package/src/export-tools.test.ts +0 -476
package/src/export-tools.ts +0 -257
package/src/hive.integration.test.ts +0 -2241
package/src/hive.ts +0 -1628
package/src/index.ts +0 -935
package/src/learning.integration.test.ts +0 -1815
package/src/learning.ts +0 -1079
package/src/logger.test.ts +0 -189
package/src/logger.ts +0 -135
package/src/mandate-promotion.test.ts +0 -473
package/src/mandate-promotion.ts +0 -239
package/src/mandate-storage.integration.test.ts +0 -601
package/src/mandate-storage.test.ts +0 -578
package/src/mandate-storage.ts +0 -794
package/src/mandates.ts +0 -540
package/src/memory-tools.test.ts +0 -195
package/src/memory-tools.ts +0 -344
package/src/memory.integration.test.ts +0 -334
package/src/memory.test.ts +0 -158
package/src/memory.ts +0 -527
package/src/model-selection.test.ts +0 -188
package/src/model-selection.ts +0 -68
package/src/observability-tools.test.ts +0 -359
package/src/observability-tools.ts +0 -871
package/src/output-guardrails.test.ts +0 -438
package/src/output-guardrails.ts +0 -381
package/src/pattern-maturity.test.ts +0 -1160
package/src/pattern-maturity.ts +0 -525
package/src/planning-guardrails.test.ts +0 -491
package/src/planning-guardrails.ts +0 -438
package/src/plugin.ts +0 -23
package/src/post-compaction-tracker.test.ts +0 -251
package/src/post-compaction-tracker.ts +0 -237
package/src/query-tools.test.ts +0 -636
package/src/query-tools.ts +0 -324
package/src/rate-limiter.integration.test.ts +0 -466
package/src/rate-limiter.ts +0 -774
package/src/replay-tools.test.ts +0 -496
package/src/replay-tools.ts +0 -240
package/src/repo-crawl.integration.test.ts +0 -441
package/src/repo-crawl.ts +0 -610
package/src/schemas/cell-events.test.ts +0 -347
package/src/schemas/cell-events.ts +0 -807
package/src/schemas/cell.ts +0 -257
package/src/schemas/evaluation.ts +0 -166
package/src/schemas/index.test.ts +0 -199
package/src/schemas/index.ts +0 -286
package/src/schemas/mandate.ts +0 -232
package/src/schemas/swarm-context.ts +0 -115
package/src/schemas/task.ts +0 -161
package/src/schemas/worker-handoff.test.ts +0 -302
package/src/schemas/worker-handoff.ts +0 -131
package/src/skills.integration.test.ts +0 -1192
package/src/skills.test.ts +0 -643
package/src/skills.ts +0 -1549
package/src/storage.integration.test.ts +0 -341
package/src/storage.ts +0 -884
package/src/structured.integration.test.ts +0 -817
package/src/structured.test.ts +0 -1046
package/src/structured.ts +0 -762
package/src/swarm-decompose.test.ts +0 -188
package/src/swarm-decompose.ts +0 -1302
package/src/swarm-deferred.integration.test.ts +0 -157
package/src/swarm-deferred.test.ts +0 -38
package/src/swarm-insights.test.ts +0 -214
package/src/swarm-insights.ts +0 -459
package/src/swarm-mail.integration.test.ts +0 -970
package/src/swarm-mail.ts +0 -739
package/src/swarm-orchestrate.integration.test.ts +0 -282
package/src/swarm-orchestrate.test.ts +0 -548
package/src/swarm-orchestrate.ts +0 -3084
package/src/swarm-prompts.test.ts +0 -1270
package/src/swarm-prompts.ts +0 -2077
package/src/swarm-research.integration.test.ts +0 -701
package/src/swarm-research.test.ts +0 -698
package/src/swarm-research.ts +0 -472
package/src/swarm-review.integration.test.ts +0 -285
package/src/swarm-review.test.ts +0 -879
package/src/swarm-review.ts +0 -709
package/src/swarm-strategies.ts +0 -407
package/src/swarm-worktree.test.ts +0 -501
package/src/swarm-worktree.ts +0 -575
package/src/swarm.integration.test.ts +0 -2377
package/src/swarm.ts +0 -38
package/src/tool-adapter.integration.test.ts +0 -1221
package/src/tool-availability.ts +0 -461
package/tsconfig.json +0 -28

package/src/eval-history.test.ts DELETED Viewed

@@ -1,508 +0,0 @@
-/**
- * Tests for eval-history - tracks eval run scores and calculates progressive phases
- *
- * TDD: RED phase - all tests should fail initially
- */
-import { afterEach, beforeEach, describe, expect, test } from "bun:test";
-import * as fs from "node:fs";
-import * as path from "node:path";
-import {
-  type EvalRunRecord,
-  type Phase,
-  calculateVariance,
-  getPhase,
-  getScoreHistory,
-  recordEvalRun,
-} from "./eval-history.js";
-describe("eval-history", () => {
-  const testDir = path.join(import.meta.dir, ".test-eval-history");
-  const testProjectPath = path.join(testDir, "test-project");
-  beforeEach(() => {
-    // Clean slate for each test
-    if (fs.existsSync(testDir)) {
-      fs.rmSync(testDir, { recursive: true });
-    }
-    fs.mkdirSync(testProjectPath, { recursive: true });
-  });
-  afterEach(() => {
-    // Cleanup
-    if (fs.existsSync(testDir)) {
-      fs.rmSync(testDir, { recursive: true });
-    }
-  });
-  describe("recordEvalRun", () => {
-    test("appends eval run to JSONL file", () => {
-      const run: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "swarm-decomposition",
-        score: 0.85,
-        run_count: 1,
-      };
-      recordEvalRun(testProjectPath, run);
-      const historyPath = path.join(testProjectPath, ".opencode/eval-history.jsonl");
-      expect(fs.existsSync(historyPath)).toBe(true);
-      const content = fs.readFileSync(historyPath, "utf-8");
-      const lines = content.trim().split("\n");
-      expect(lines).toHaveLength(1);
-      const parsed = JSON.parse(lines[0]);
-      expect(parsed.eval_name).toBe("swarm-decomposition");
-      expect(parsed.score).toBe(0.85);
-      expect(parsed.run_count).toBe(1);
-    });
-    test("appends multiple runs sequentially", () => {
-      const run1: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "swarm-decomposition",
-        score: 0.80,
-        run_count: 1,
-      };
-      const run2: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "swarm-decomposition",
-        score: 0.85,
-        run_count: 2,
-      };
-      recordEvalRun(testProjectPath, run1);
-      recordEvalRun(testProjectPath, run2);
-      const historyPath = path.join(testProjectPath, ".opencode/eval-history.jsonl");
-      const content = fs.readFileSync(historyPath, "utf-8");
-      const lines = content.trim().split("\n");
-      expect(lines).toHaveLength(2);
-      const parsed1 = JSON.parse(lines[0]);
-      const parsed2 = JSON.parse(lines[1]);
-      expect(parsed1.score).toBe(0.80);
-      expect(parsed2.score).toBe(0.85);
-    });
-    test("creates directory if it doesn't exist", () => {
-      const run: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "test-eval",
-        score: 0.90,
-        run_count: 1,
-      };
-      // Directory doesn't exist yet
-      const opencodePath = path.join(testProjectPath, ".opencode");
-      expect(fs.existsSync(opencodePath)).toBe(false);
-      recordEvalRun(testProjectPath, run);
-      // Directory should be created
-      expect(fs.existsSync(opencodePath)).toBe(true);
-    });
-    test("supports different eval names in same history", () => {
-      const run1: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "swarm-decomposition",
-        score: 0.85,
-        run_count: 1,
-      };
-      const run2: EvalRunRecord = {
-        timestamp: new Date().toISOString(),
-        eval_name: "coordinator-session",
-        score: 0.75,
-        run_count: 1,
-      };
-      recordEvalRun(testProjectPath, run1);
-      recordEvalRun(testProjectPath, run2);
-      const historyPath = path.join(testProjectPath, ".opencode/eval-history.jsonl");
-      const content = fs.readFileSync(historyPath, "utf-8");
-      const lines = content.trim().split("\n");
-      expect(lines).toHaveLength(2);
-      const parsed1 = JSON.parse(lines[0]);
-      const parsed2 = JSON.parse(lines[1]);
-      expect(parsed1.eval_name).toBe("swarm-decomposition");
-      expect(parsed2.eval_name).toBe("coordinator-session");
-    });
-  });
-  describe("getScoreHistory", () => {
-    test("returns empty array when no history exists", () => {
-      const history = getScoreHistory(testProjectPath, "swarm-decomposition");
-      expect(history).toEqual([]);
-    });
-    test("returns all runs for a specific eval", () => {
-      const runs: EvalRunRecord[] = [
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "swarm-decomposition",
-          score: 0.80,
-          run_count: 1,
-        },
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "swarm-decomposition",
-          score: 0.85,
-          run_count: 2,
-        },
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "coordinator-session",
-          score: 0.70,
-          run_count: 1,
-        },
-      ];
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const history = getScoreHistory(testProjectPath, "swarm-decomposition");
-      expect(history).toHaveLength(2);
-      expect(history[0].score).toBe(0.80);
-      expect(history[1].score).toBe(0.85);
-    });
-    test("filters by eval_name correctly", () => {
-      const runs: EvalRunRecord[] = [
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "swarm-decomposition",
-          score: 0.80,
-          run_count: 1,
-        },
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "coordinator-session",
-          score: 0.70,
-          run_count: 1,
-        },
-        {
-          timestamp: new Date().toISOString(),
-          eval_name: "swarm-decomposition",
-          score: 0.85,
-          run_count: 2,
-        },
-      ];
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const decompositionHistory = getScoreHistory(testProjectPath, "swarm-decomposition");
-      const coordinatorHistory = getScoreHistory(testProjectPath, "coordinator-session");
-      expect(decompositionHistory).toHaveLength(2);
-      expect(coordinatorHistory).toHaveLength(1);
-      expect(coordinatorHistory[0].score).toBe(0.70);
-    });
-    test("returns runs in chronological order", () => {
-      const baseTime = Date.now();
-      const runs: EvalRunRecord[] = [
-        {
-          timestamp: new Date(baseTime).toISOString(),
-          eval_name: "test-eval",
-          score: 0.80,
-          run_count: 1,
-        },
-        {
-          timestamp: new Date(baseTime + 1000).toISOString(),
-          eval_name: "test-eval",
-          score: 0.85,
-          run_count: 2,
-        },
-        {
-          timestamp: new Date(baseTime + 2000).toISOString(),
-          eval_name: "test-eval",
-          score: 0.90,
-          run_count: 3,
-        },
-      ];
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const history = getScoreHistory(testProjectPath, "test-eval");
-      expect(history).toHaveLength(3);
-      expect(history[0].score).toBe(0.80);
-      expect(history[1].score).toBe(0.85);
-      expect(history[2].score).toBe(0.90);
-    });
-  });
-  describe("calculateVariance", () => {
-    test("returns 0 for single score", () => {
-      const variance = calculateVariance([0.85]);
-      expect(variance).toBe(0);
-    });
-    test("returns 0 for identical scores", () => {
-      const variance = calculateVariance([0.80, 0.80, 0.80, 0.80]);
-      expect(variance).toBe(0);
-    });
-    test("calculates variance for varying scores", () => {
-      // Scores: 0.70, 0.80, 0.90
-      // Mean: 0.80
-      // Deviations: -0.10, 0, 0.10
-      // Squared deviations: 0.01, 0, 0.01
-      // Variance: 0.02 / 3 = 0.00666...
-      const variance = calculateVariance([0.70, 0.80, 0.90]);
-      expect(variance).toBeCloseTo(0.00667, 5);
-    });
-    test("calculates variance for larger dataset", () => {
-      // 10 scores with controlled variance
-      const scores = [0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84];
-      const variance = calculateVariance(scores);
-      expect(variance).toBeGreaterThan(0);
-      expect(variance).toBeLessThan(0.01); // Should be small but not zero
-    });
-    test("handles empty array", () => {
-      const variance = calculateVariance([]);
-      expect(variance).toBe(0);
-    });
-    test("handles high variance scores", () => {
-      const scores = [0.10, 0.50, 0.90];
-      const variance = calculateVariance(scores);
-      expect(variance).toBeGreaterThan(0.05);
-    });
-  });
-  describe("getPhase", () => {
-    test("returns bootstrap phase for <10 runs", () => {
-      const runs: EvalRunRecord[] = [];
-      for (let i = 1; i <= 9; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80 + Math.random() * 0.1,
-          run_count: i,
-        });
-      }
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("bootstrap");
-    });
-    test("returns stabilization phase for 10-50 runs", () => {
-      const runs: EvalRunRecord[] = [];
-      for (let i = 1; i <= 25; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80 + Math.random() * 0.1,
-          run_count: i,
-        });
-      }
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("stabilization");
-    });
-    test("returns production phase for >50 runs with low variance", () => {
-      const runs: EvalRunRecord[] = [];
-      // Create 60 runs with very low variance (0.80 ± 0.01)
-      for (let i = 1; i <= 60; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80 + (Math.random() * 0.02 - 0.01), // Variance ~0.00003
-          run_count: i,
-        });
-      }
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("production");
-    });
-    test("returns stabilization phase for >50 runs with high variance", () => {
-      const runs: EvalRunRecord[] = [];
-      // Create 60 runs with high variance
-      // Variance = 0.1225 for alternating 0.1 and 0.8
-      for (let i = 1; i <= 60; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: i % 2 === 0 ? 0.1 : 0.8,
-          run_count: i,
-        });
-      }
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("stabilization");
-    });
-    test("returns bootstrap phase when no history exists", () => {
-      const phase = getPhase(testProjectPath, "nonexistent-eval");
-      expect(phase).toBe("bootstrap");
-    });
-    test("phase transitions at exactly 10 runs", () => {
-      const runs: EvalRunRecord[] = [];
-      for (let i = 1; i <= 10; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80,
-          run_count: i,
-        });
-      }
-      // Record 9 runs - should be bootstrap
-      for (let i = 0; i < 9; i++) {
-        recordEvalRun(testProjectPath, runs[i]);
-      }
-      expect(getPhase(testProjectPath, "test-eval")).toBe("bootstrap");
-      // Add 10th run - should be stabilization
-      recordEvalRun(testProjectPath, runs[9]);
-      expect(getPhase(testProjectPath, "test-eval")).toBe("stabilization");
-    });
-    test("phase transitions at exactly 50 runs with low variance", () => {
-      const runs: EvalRunRecord[] = [];
-      for (let i = 1; i <= 51; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80 + (Math.random() * 0.02 - 0.01),
-          run_count: i,
-        });
-      }
-      // Record 50 runs - should be stabilization
-      for (let i = 0; i < 50; i++) {
-        recordEvalRun(testProjectPath, runs[i]);
-      }
-      expect(getPhase(testProjectPath, "test-eval")).toBe("stabilization");
-      // Add 51st run - should be production (if variance is low)
-      recordEvalRun(testProjectPath, runs[50]);
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("production");
-    });
-    test("variance threshold is 0.1", () => {
-      const runs: EvalRunRecord[] = [];
-      // Create 60 runs with variance just below 0.1
-      // Mean = 0.80, stdev = 0.30, variance = 0.09
-      for (let i = 1; i <= 60; i++) {
-        runs.push({
-          timestamp: new Date().toISOString(),
-          eval_name: "test-eval",
-          score: 0.80 + (i % 2 === 0 ? 0.15 : -0.15), // Produces variance ~0.0225
-          run_count: i,
-        });
-      }
-      for (const run of runs) {
-        recordEvalRun(testProjectPath, run);
-      }
-      const phase = getPhase(testProjectPath, "test-eval");
-      expect(phase).toBe("production");
-    });
-  });
-  describe("phase progression integration", () => {
-    test("complete lifecycle: bootstrap -> stabilization -> production", () => {
-      const evalName = "lifecycle-test";
-      // Phase 1: Bootstrap (0-9 runs)
-      for (let i = 1; i <= 5; i++) {
-        recordEvalRun(testProjectPath, {
-          timestamp: new Date().toISOString(),
-          eval_name: evalName,
-          score: 0.75 + Math.random() * 0.2,
-          run_count: i,
-        });
-      }
-      expect(getPhase(testProjectPath, evalName)).toBe("bootstrap");
-      // Phase 2: Stabilization (10-50 runs)
-      for (let i = 6; i <= 30; i++) {
-        recordEvalRun(testProjectPath, {
-          timestamp: new Date().toISOString(),
-          eval_name: evalName,
-          score: 0.78 + Math.random() * 0.1,
-          run_count: i,
-        });
-      }
-      expect(getPhase(testProjectPath, evalName)).toBe("stabilization");
-      // Phase 3: Production (>50 runs, low variance)
-      for (let i = 31; i <= 60; i++) {
-        recordEvalRun(testProjectPath, {
-          timestamp: new Date().toISOString(),
-          eval_name: evalName,
-          score: 0.82 + (Math.random() * 0.02 - 0.01), // Very stable
-          run_count: i,
-        });
-      }
-      expect(getPhase(testProjectPath, evalName)).toBe("production");
-      // Verify history
-      const history = getScoreHistory(testProjectPath, evalName);
-      expect(history).toHaveLength(60);
-    });
-    test("regression in production keeps phase as stabilization if variance increases", () => {
-      const evalName = "regression-test";
-      // Build stable production phase
-      for (let i = 1; i <= 60; i++) {
-        recordEvalRun(testProjectPath, {
-          timestamp: new Date().toISOString(),
-          eval_name: evalName,
-          score: 0.85 + (Math.random() * 0.01 - 0.005),
-          run_count: i,
-        });
-      }
-      expect(getPhase(testProjectPath, evalName)).toBe("production");
-      // Introduce regression (high variance) - need 50 wild runs to push variance > 0.1
-      // 60 stable @ 0.85 + 50 wild @ 0.1/0.9 = variance ~0.103
-      for (let i = 61; i <= 110; i++) {
-        recordEvalRun(testProjectPath, {
-          timestamp: new Date().toISOString(),
-          eval_name: evalName,
-          score: i % 2 === 0 ? 0.1 : 0.9,
-          run_count: i,
-        });
-      }
-      // Should drop back to stabilization due to high variance
-      expect(getPhase(testProjectPath, evalName)).toBe("stabilization");
-    });
-  });
-});