npm - opencode-swarm-plugin - Versions diffs - 0.44.0 → 0.44.2 - Mend

opencode-swarm-plugin 0.44.0 → 0.44.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

package/bin/swarm.serve.test.ts +6 -4
package/bin/swarm.ts +18 -12
package/dist/compaction-prompt-scoring.js +139 -0
package/dist/eval-capture.js +12811 -0
package/dist/hive.d.ts.map +1 -1
package/dist/hive.js +14834 -0
package/dist/index.d.ts +18 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +7743 -62593
package/dist/plugin.js +24052 -78907
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-prompts.js +39407 -0
package/dist/swarm-review.d.ts.map +1 -1
package/dist/swarm-validation.d.ts +127 -0
package/dist/swarm-validation.d.ts.map +1 -0
package/dist/validators/index.d.ts +7 -0
package/dist/validators/index.d.ts.map +1 -0
package/dist/validators/schema-validator.d.ts +58 -0
package/dist/validators/schema-validator.d.ts.map +1 -0
package/package.json +17 -5
package/.changeset/swarm-insights-data-layer.md +0 -63
package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
package/.hive/analysis/session-data-quality-audit.md +0 -320
package/.hive/eval-results.json +0 -483
package/.hive/issues.jsonl +0 -138
package/.hive/memories.jsonl +0 -729
package/.opencode/eval-history.jsonl +0 -327
package/.turbo/turbo-build.log +0 -9
package/CHANGELOG.md +0 -2286
package/SCORER-ANALYSIS.md +0 -598
package/docs/analysis/subagent-coordination-patterns.md +0 -902
package/docs/analysis-socratic-planner-pattern.md +0 -504
package/docs/planning/ADR-001-monorepo-structure.md +0 -171
package/docs/planning/ADR-002-package-extraction.md +0 -393
package/docs/planning/ADR-003-performance-improvements.md +0 -451
package/docs/planning/ADR-004-message-queue-features.md +0 -187
package/docs/planning/ADR-005-devtools-observability.md +0 -202
package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
package/docs/planning/ROADMAP.md +0 -368
package/docs/semantic-memory-cli-syntax.md +0 -123
package/docs/swarm-mail-architecture.md +0 -1147
package/docs/testing/context-recovery-test.md +0 -470
package/evals/ARCHITECTURE.md +0 -1189
package/evals/README.md +0 -768
package/evals/compaction-prompt.eval.ts +0 -149
package/evals/compaction-resumption.eval.ts +0 -289
package/evals/coordinator-behavior.eval.ts +0 -307
package/evals/coordinator-session.eval.ts +0 -154
package/evals/evalite.config.ts.bak +0 -15
package/evals/example.eval.ts +0 -31
package/evals/fixtures/cass-baseline.ts +0 -217
package/evals/fixtures/compaction-cases.ts +0 -350
package/evals/fixtures/compaction-prompt-cases.ts +0 -311
package/evals/fixtures/coordinator-sessions.ts +0 -328
package/evals/fixtures/decomposition-cases.ts +0 -105
package/evals/lib/compaction-loader.test.ts +0 -248
package/evals/lib/compaction-loader.ts +0 -320
package/evals/lib/data-loader.evalite-test.ts +0 -289
package/evals/lib/data-loader.test.ts +0 -345
package/evals/lib/data-loader.ts +0 -281
package/evals/lib/llm.ts +0 -115
package/evals/scorers/compaction-prompt-scorers.ts +0 -145
package/evals/scorers/compaction-scorers.ts +0 -305
package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
package/evals/scorers/coordinator-discipline.ts +0 -325
package/evals/scorers/index.test.ts +0 -146
package/evals/scorers/index.ts +0 -328
package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
package/evals/scorers/outcome-scorers.ts +0 -349
package/evals/swarm-decomposition.eval.ts +0 -121
package/examples/commands/swarm.md +0 -745
package/examples/plugin-wrapper-template.ts +0 -2515
package/examples/skills/hive-workflow/SKILL.md +0 -212
package/examples/skills/skill-creator/SKILL.md +0 -223
package/examples/skills/swarm-coordination/SKILL.md +0 -292
package/global-skills/cli-builder/SKILL.md +0 -344
package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
package/global-skills/learning-systems/SKILL.md +0 -644
package/global-skills/skill-creator/LICENSE.txt +0 -202
package/global-skills/skill-creator/SKILL.md +0 -352
package/global-skills/skill-creator/references/output-patterns.md +0 -82
package/global-skills/skill-creator/references/workflows.md +0 -28
package/global-skills/swarm-coordination/SKILL.md +0 -995
package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
package/global-skills/swarm-coordination/references/strategies.md +0 -138
package/global-skills/system-design/SKILL.md +0 -213
package/global-skills/testing-patterns/SKILL.md +0 -430
package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
package/opencode-swarm-plugin-0.30.7.tgz +0 -0
package/opencode-swarm-plugin-0.31.0.tgz +0 -0
package/scripts/cleanup-test-memories.ts +0 -346
package/scripts/init-skill.ts +0 -222
package/scripts/migrate-unknown-sessions.ts +0 -349
package/scripts/validate-skill.ts +0 -204
package/src/agent-mail.ts +0 -1724
package/src/anti-patterns.test.ts +0 -1167
package/src/anti-patterns.ts +0 -448
package/src/compaction-capture.integration.test.ts +0 -257
package/src/compaction-hook.test.ts +0 -838
package/src/compaction-hook.ts +0 -1204
package/src/compaction-observability.integration.test.ts +0 -139
package/src/compaction-observability.test.ts +0 -187
package/src/compaction-observability.ts +0 -324
package/src/compaction-prompt-scorers.test.ts +0 -475
package/src/compaction-prompt-scoring.ts +0 -300
package/src/contributor-tools.test.ts +0 -133
package/src/contributor-tools.ts +0 -201
package/src/dashboard.test.ts +0 -611
package/src/dashboard.ts +0 -462
package/src/error-enrichment.test.ts +0 -403
package/src/error-enrichment.ts +0 -219
package/src/eval-capture.test.ts +0 -1015
package/src/eval-capture.ts +0 -929
package/src/eval-gates.test.ts +0 -306
package/src/eval-gates.ts +0 -218
package/src/eval-history.test.ts +0 -508
package/src/eval-history.ts +0 -214
package/src/eval-learning.test.ts +0 -378
package/src/eval-learning.ts +0 -360
package/src/eval-runner.test.ts +0 -223
package/src/eval-runner.ts +0 -402
package/src/export-tools.test.ts +0 -476
package/src/export-tools.ts +0 -257
package/src/hive.integration.test.ts +0 -2241
package/src/hive.ts +0 -1628
package/src/index.ts +0 -940
package/src/learning.integration.test.ts +0 -1815
package/src/learning.ts +0 -1079
package/src/logger.test.ts +0 -189
package/src/logger.ts +0 -135
package/src/mandate-promotion.test.ts +0 -473
package/src/mandate-promotion.ts +0 -239
package/src/mandate-storage.integration.test.ts +0 -601
package/src/mandate-storage.test.ts +0 -578
package/src/mandate-storage.ts +0 -794
package/src/mandates.ts +0 -540
package/src/memory-tools.test.ts +0 -195
package/src/memory-tools.ts +0 -344
package/src/memory.integration.test.ts +0 -334
package/src/memory.test.ts +0 -158
package/src/memory.ts +0 -527
package/src/model-selection.test.ts +0 -188
package/src/model-selection.ts +0 -68
package/src/observability-tools.test.ts +0 -359
package/src/observability-tools.ts +0 -871
package/src/output-guardrails.test.ts +0 -438
package/src/output-guardrails.ts +0 -381
package/src/pattern-maturity.test.ts +0 -1160
package/src/pattern-maturity.ts +0 -525
package/src/planning-guardrails.test.ts +0 -491
package/src/planning-guardrails.ts +0 -438
package/src/plugin.ts +0 -23
package/src/post-compaction-tracker.test.ts +0 -251
package/src/post-compaction-tracker.ts +0 -237
package/src/query-tools.test.ts +0 -636
package/src/query-tools.ts +0 -324
package/src/rate-limiter.integration.test.ts +0 -466
package/src/rate-limiter.ts +0 -774
package/src/replay-tools.test.ts +0 -496
package/src/replay-tools.ts +0 -240
package/src/repo-crawl.integration.test.ts +0 -441
package/src/repo-crawl.ts +0 -610
package/src/schemas/cell-events.test.ts +0 -347
package/src/schemas/cell-events.ts +0 -807
package/src/schemas/cell.ts +0 -257
package/src/schemas/evaluation.ts +0 -166
package/src/schemas/index.test.ts +0 -199
package/src/schemas/index.ts +0 -286
package/src/schemas/mandate.ts +0 -232
package/src/schemas/swarm-context.ts +0 -115
package/src/schemas/task.ts +0 -161
package/src/schemas/worker-handoff.test.ts +0 -302
package/src/schemas/worker-handoff.ts +0 -131
package/src/sessions/agent-discovery.test.ts +0 -137
package/src/sessions/agent-discovery.ts +0 -112
package/src/sessions/index.ts +0 -15
package/src/skills.integration.test.ts +0 -1192
package/src/skills.test.ts +0 -643
package/src/skills.ts +0 -1549
package/src/storage.integration.test.ts +0 -341
package/src/storage.ts +0 -884
package/src/structured.integration.test.ts +0 -817
package/src/structured.test.ts +0 -1046
package/src/structured.ts +0 -762
package/src/swarm-decompose.test.ts +0 -188
package/src/swarm-decompose.ts +0 -1302
package/src/swarm-deferred.integration.test.ts +0 -157
package/src/swarm-deferred.test.ts +0 -38
package/src/swarm-insights.test.ts +0 -214
package/src/swarm-insights.ts +0 -459
package/src/swarm-mail.integration.test.ts +0 -970
package/src/swarm-mail.ts +0 -739
package/src/swarm-orchestrate.integration.test.ts +0 -282
package/src/swarm-orchestrate.test.ts +0 -548
package/src/swarm-orchestrate.ts +0 -3084
package/src/swarm-prompts.test.ts +0 -1270
package/src/swarm-prompts.ts +0 -2077
package/src/swarm-research.integration.test.ts +0 -701
package/src/swarm-research.test.ts +0 -698
package/src/swarm-research.ts +0 -472
package/src/swarm-review.integration.test.ts +0 -285
package/src/swarm-review.test.ts +0 -879
package/src/swarm-review.ts +0 -709
package/src/swarm-strategies.ts +0 -407
package/src/swarm-worktree.test.ts +0 -501
package/src/swarm-worktree.ts +0 -575
package/src/swarm.integration.test.ts +0 -2377
package/src/swarm.ts +0 -38
package/src/tool-adapter.integration.test.ts +0 -1221
package/src/tool-availability.ts +0 -461
package/tsconfig.json +0 -28

package/evals/scorers/coordinator-discipline.ts DELETED Viewed

@@ -1,325 +0,0 @@
-/**
- * Coordinator Discipline Scorers - Evaluate coordinator behavior
- *
- * These scorers measure whether a coordinator follows the protocol:
- * 1. Don't edit files directly (spawn workers)
- * 2. Don't run tests directly (workers do verification)
- * 3. Spawn workers for all subtasks
- * 4. Review worker output before accepting
- * 5. Minimize time to first spawn (don't overthink)
- *
- * Inputs: CoordinatorSession from eval-capture
- */
-import { createScorer } from "evalite";
-import type { CoordinatorSession } from "../../src/eval-capture.js";
-/**
- * Violation Count Scorer
- *
- * Counts VIOLATION events in the session.
- * Each violation reduces score by 0.2.
- *
- * Violations tracked:
- * - coordinator_edited_file (should spawn worker instead)
- * - coordinator_ran_tests (workers do verification)
- * - coordinator_reserved_files (only workers reserve)
- * - no_worker_spawned (subtask exists but no worker)
- *
- * Score: 1.0 - (0.2 * violation_count), floored at 0.0
- */
-export const violationCount = createScorer({
-  name: "Violation Count",
-  description: "Coordinator followed protocol (no direct edits, tests, or reservations)",
-  scorer: ({ output }) => {
-    try {
-      const session = JSON.parse(String(output)) as CoordinatorSession;
-      // Count violations
-      const violations = session.events.filter(
-        (e) => e.event_type === "VIOLATION"
-      );
-      const count = violations.length;
-      const score = Math.max(0, 1.0 - count * 0.2);
-      if (count === 0) {
-        return {
-          score: 1.0,
-          message: "Perfect - 0 violations",
-        };
-      }
-      return {
-        score,
-        message: `${count} violations detected`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to parse CoordinatorSession: ${error}`,
-      };
-    }
-  },
-});
-/**
- * Spawn Efficiency Scorer
- *
- * Measures whether workers were spawned for all subtasks.
- * Coordinators should delegate work, not do it themselves.
- *
- * Score: workers_spawned / subtasks_planned
- *
- * If no decomposition_complete event exists, falls back to counting spawns
- * and returns 1.0 if any workers were spawned (better than nothing).
- */
-export const spawnEfficiency = createScorer({
-  name: "Spawn Efficiency",
-  description: "Workers spawned for all subtasks (delegation ratio)",
-  scorer: ({ output }) => {
-    try {
-      const session = JSON.parse(String(output)) as CoordinatorSession;
-      // Find decomposition_complete event (has subtask count)
-      const decomp = session.events.find(
-        (e) =>
-          e.event_type === "DECISION" &&
-          e.decision_type === "decomposition_complete"
-      );
-      // Count worker_spawned events
-      const spawned = session.events.filter(
-        (e) =>
-          e.event_type === "DECISION" && e.decision_type === "worker_spawned"
-      ).length;
-      if (!decomp) {
-        // Fallback: if workers were spawned but no decomp event, assume they're doing work
-        if (spawned > 0) {
-          return {
-            score: 1.0,
-            message: `${spawned} workers spawned (no decomposition event)`,
-          };
-        }
-        return {
-          score: 0,
-          message: "No decomposition event found",
-        };
-      }
-      const subtaskCount = (decomp.payload as { subtask_count?: number })?.subtask_count || 0;
-      if (subtaskCount === 0) {
-        return {
-          score: 0,
-          message: "No subtasks planned",
-        };
-      }
-      const score = spawned / subtaskCount;
-      return {
-        score,
-        message: `${spawned}/${subtaskCount} workers spawned (${(score * 100).toFixed(0)}%)`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to parse CoordinatorSession: ${error}`,
-      };
-    }
-  },
-});
-/**
- * Review Thoroughness Scorer
- *
- * Measures whether coordinator reviewed worker output.
- * Should have review_completed events for all finished subtasks.
- *
- * Score: reviews_completed / workers_finished
- */
-export const reviewThoroughness = createScorer({
-  name: "Review Thoroughness",
-  description: "Coordinator reviewed all worker output",
-  scorer: ({ output }) => {
-    try {
-      const session = JSON.parse(String(output)) as CoordinatorSession;
-      // Count finished workers (subtask_success or subtask_failed)
-      const finished = session.events.filter(
-        (e) =>
-          e.event_type === "OUTCOME" &&
-          (e.outcome_type === "subtask_success" ||
-            e.outcome_type === "subtask_failed")
-      ).length;
-      if (finished === 0) {
-        return {
-          score: 1.0,
-          message: "No finished workers to review",
-        };
-      }
-      // Count review_completed events
-      const reviewed = session.events.filter(
-        (e) =>
-          e.event_type === "DECISION" && e.decision_type === "review_completed"
-      ).length;
-      const score = reviewed / finished;
-      return {
-        score,
-        message: `${reviewed}/${finished} workers reviewed (${(score * 100).toFixed(0)}%)`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to parse CoordinatorSession: ${error}`,
-      };
-    }
-  },
-});
-/**
- * Time to First Spawn Scorer
- *
- * Measures how fast the coordinator spawned the first worker.
- * Overthinking and perfectionism delays workers and blocks progress.
- *
- * Normalization:
- * - < 60s: 1.0 (excellent)
- * - 60-300s: linear decay to 0.5
- * - > 300s: 0.0 (way too slow)
- *
- * Score: normalized to 0-1 (faster is better)
- */
-export const timeToFirstSpawn = createScorer({
-  name: "Time to First Spawn",
-  description: "Coordinator spawned workers quickly (no overthinking)",
-  scorer: ({ output }) => {
-    try {
-      const session = JSON.parse(String(output)) as CoordinatorSession;
-      // Find decomposition_complete event
-      const decomp = session.events.find(
-        (e) =>
-          e.event_type === "DECISION" &&
-          e.decision_type === "decomposition_complete"
-      );
-      if (!decomp) {
-        return {
-          score: 0,
-          message: "No decomposition event found",
-        };
-      }
-      // Find first worker_spawned event
-      const firstSpawn = session.events.find(
-        (e) =>
-          e.event_type === "DECISION" && e.decision_type === "worker_spawned"
-      );
-      if (!firstSpawn) {
-        return {
-          score: 0,
-          message: "No worker spawned",
-        };
-      }
-      // Calculate time delta
-      const decompTime = new Date(decomp.timestamp).getTime();
-      const spawnTime = new Date(firstSpawn.timestamp).getTime();
-      const deltaMs = spawnTime - decompTime;
-      // Normalize: < 60s = 1.0, > 300s = 0.0, linear in between
-      const EXCELLENT_MS = 60_000;
-      const POOR_MS = 300_000;
-      let score: number;
-      if (deltaMs < EXCELLENT_MS) {
-        score = 1.0;
-      } else if (deltaMs > POOR_MS) {
-        score = 0.0;
-      } else {
-        // Linear decay from 1.0 to 0.0
-        score = 1.0 - (deltaMs - EXCELLENT_MS) / (POOR_MS - EXCELLENT_MS);
-      }
-      const seconds = Math.round(deltaMs / 1000);
-      return {
-        score,
-        message: `First spawn after ${deltaMs}ms (${seconds}s)`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to parse CoordinatorSession: ${error}`,
-      };
-    }
-  },
-});
-/**
- * Overall Discipline Scorer
- *
- * Weighted composite of all coordinator discipline metrics.
- *
- * Weights:
- * - Violations: 30% (most critical - breaking protocol)
- * - Spawn efficiency: 25% (delegation is key)
- * - Review thoroughness: 25% (quality gate)
- * - Time to first spawn: 20% (bias toward action)
- *
- * Score: 0.0 to 1.0
- */
-export const overallDiscipline = createScorer({
-  name: "Overall Coordinator Discipline",
-  description: "Composite score for coordinator protocol adherence",
-  scorer: async ({ output, expected, input }) => {
-    try {
-      // Run all scorers
-      const scores = {
-        violations: await violationCount({ output, expected, input }),
-        spawn: await spawnEfficiency({ output, expected, input }),
-        review: await reviewThoroughness({ output, expected, input }),
-        speed: await timeToFirstSpawn({ output, expected, input }),
-      };
-      // Weighted average
-      const weights = {
-        violations: 0.3,
-        spawn: 0.25,
-        review: 0.25,
-        speed: 0.2,
-      };
-      const totalScore =
-        (scores.violations.score ?? 0) * weights.violations +
-        (scores.spawn.score ?? 0) * weights.spawn +
-        (scores.review.score ?? 0) * weights.review +
-        (scores.speed.score ?? 0) * weights.speed;
-      const details = [
-        `Violations: ${((scores.violations.score ?? 0) * 100).toFixed(0)}%`,
-        `Spawn: ${((scores.spawn.score ?? 0) * 100).toFixed(0)}%`,
-        `Review: ${((scores.review.score ?? 0) * 100).toFixed(0)}%`,
-        `Speed: ${((scores.speed.score ?? 0) * 100).toFixed(0)}%`,
-      ].join(", ");
-      return {
-        score: totalScore,
-        message: `Overall: ${(totalScore * 100).toFixed(0)}% (${details})`,
-      };
-    } catch (error) {
-      return {
-        score: 0,
-        message: `Failed to compute composite score: ${error}`,
-      };
-    }
-  },
-});

package/evals/scorers/index.test.ts DELETED Viewed

@@ -1,146 +0,0 @@
-/**
- * Tests for decomposition scorers
- *
- * Uses Vitest (evalite's test runner), not Bun's test runner.
- *
- * Note: evalite's Score type only exposes `score`, not `message`.
- * We test scores only - message testing requires accessing internal scorer.
- */
-import { describe, expect, test } from "vitest";
-import {
-  coverageCompleteness,
-  decompositionCoherence,
-  instructionClarity,
-  subtaskIndependence,
-} from "./index.js";
-describe("Heuristic Scorers", () => {
-  const goodDecomposition = JSON.stringify({
-    epic: { title: "Add auth", description: "Add authentication" },
-    subtasks: [
-      {
-        title: "Add login form component",
-        description: "Create React component for login with email/password",
-        files: ["src/components/LoginForm.tsx"],
-      },
-      {
-        title: "Add auth API routes",
-        description: "Create API endpoints for login/logout/session",
-        files: ["src/api/auth.ts"],
-      },
-      {
-        title: "Add auth middleware",
-        description: "Create middleware to protect routes",
-        files: ["src/middleware/auth.ts"],
-      },
-    ],
-  });
-  const conflictingDecomposition = JSON.stringify({
-    epic: { title: "Add auth", description: "Add authentication" },
-    subtasks: [
-      {
-        title: "Add login",
-        files: ["src/auth.ts"],
-      },
-      {
-        title: "Add logout",
-        files: ["src/auth.ts"], // Same file - conflict!
-      },
-    ],
-  });
-  test("subtaskIndependence scores 1.0 for no conflicts", async () => {
-    const result = await subtaskIndependence({
-      output: goodDecomposition,
-      expected: undefined,
-      input: {},
-    });
-    expect(result.score).toBe(1);
-  });
-  test("subtaskIndependence scores 0 for file conflicts", async () => {
-    const result = await subtaskIndependence({
-      output: conflictingDecomposition,
-      expected: undefined,
-      input: {},
-    });
-    expect(result.score).toBe(0);
-  });
-  test("instructionClarity scores higher for detailed subtasks", async () => {
-    const result = await instructionClarity({
-      output: goodDecomposition,
-      expected: undefined,
-      input: {},
-    });
-    expect(result.score).toBeGreaterThan(0.7);
-  });
-  test("coverageCompleteness checks subtask count", async () => {
-    const result = await coverageCompleteness({
-      output: goodDecomposition,
-      expected: { minSubtasks: 2, maxSubtasks: 5 },
-      input: {},
-    });
-    expect(result.score).toBe(1);
-  });
-});
-describe("LLM-as-Judge Scorer", () => {
-  // Skip LLM test in CI - requires API key
-  const hasApiKey = !!process.env.AI_GATEWAY_API_KEY;
-  test(
-    "decompositionCoherence returns valid score",
-    async () => {
-      if (!hasApiKey) {
-        console.log("Skipping LLM test - no AI_GATEWAY_API_KEY");
-        return;
-      }
-      const decomposition = JSON.stringify({
-        epic: { title: "Add auth", description: "Add authentication" },
-        subtasks: [
-          {
-            title: "Add login form",
-            description: "Create login UI",
-            files: ["src/LoginForm.tsx"],
-          },
-          {
-            title: "Add auth API",
-            description: "Create auth endpoints",
-            files: ["src/api/auth.ts"],
-          },
-        ],
-      });
-      const result = await decompositionCoherence({
-        output: decomposition,
-        expected: undefined,
-        input: { task: "Add user authentication with login/logout" },
-      });
-      expect(result.score).toBeGreaterThanOrEqual(0);
-      expect(result.score).toBeLessThanOrEqual(1);
-    },
-    30000,
-  );
-  test("decompositionCoherence scores invalid decomposition low", async () => {
-    if (!process.env.AI_GATEWAY_API_KEY) {
-      console.log("Skipping LLM test - no AI_GATEWAY_API_KEY");
-      return;
-    }
-    const result = await decompositionCoherence({
-      output: "not valid json at all {{{",
-      expected: undefined,
-      input: {},
-    });
-    // LLM should recognize garbage input and score it very low
-    // (0 or close to 0, not 0.5 fallback)
-    expect(result.score).toBeLessThanOrEqual(0.2);
-  }, 30000);
-});