npm - opencode-swarm-plugin - Versions diffs - 0.44.0 → 0.44.1 - Mend

opencode-swarm-plugin 0.44.0 → 0.44.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

package/bin/swarm.serve.test.ts +6 -4
package/bin/swarm.ts +16 -10
package/dist/compaction-prompt-scoring.js +139 -0
package/dist/eval-capture.js +12811 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.js +7644 -62599
package/dist/plugin.js +23766 -78721
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/package.json +17 -5
package/.changeset/swarm-insights-data-layer.md +0 -63
package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
package/.hive/analysis/session-data-quality-audit.md +0 -320
package/.hive/eval-results.json +0 -483
package/.hive/issues.jsonl +0 -138
package/.hive/memories.jsonl +0 -729
package/.opencode/eval-history.jsonl +0 -327
package/.turbo/turbo-build.log +0 -9
package/CHANGELOG.md +0 -2286
package/SCORER-ANALYSIS.md +0 -598
package/docs/analysis/subagent-coordination-patterns.md +0 -902
package/docs/analysis-socratic-planner-pattern.md +0 -504
package/docs/planning/ADR-001-monorepo-structure.md +0 -171
package/docs/planning/ADR-002-package-extraction.md +0 -393
package/docs/planning/ADR-003-performance-improvements.md +0 -451
package/docs/planning/ADR-004-message-queue-features.md +0 -187
package/docs/planning/ADR-005-devtools-observability.md +0 -202
package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
package/docs/planning/ADR-010-cass-inhousing.md +0 -1215
package/docs/planning/ROADMAP.md +0 -368
package/docs/semantic-memory-cli-syntax.md +0 -123
package/docs/swarm-mail-architecture.md +0 -1147
package/docs/testing/context-recovery-test.md +0 -470
package/evals/ARCHITECTURE.md +0 -1189
package/evals/README.md +0 -768
package/evals/compaction-prompt.eval.ts +0 -149
package/evals/compaction-resumption.eval.ts +0 -289
package/evals/coordinator-behavior.eval.ts +0 -307
package/evals/coordinator-session.eval.ts +0 -154
package/evals/evalite.config.ts.bak +0 -15
package/evals/example.eval.ts +0 -31
package/evals/fixtures/cass-baseline.ts +0 -217
package/evals/fixtures/compaction-cases.ts +0 -350
package/evals/fixtures/compaction-prompt-cases.ts +0 -311
package/evals/fixtures/coordinator-sessions.ts +0 -328
package/evals/fixtures/decomposition-cases.ts +0 -105
package/evals/lib/compaction-loader.test.ts +0 -248
package/evals/lib/compaction-loader.ts +0 -320
package/evals/lib/data-loader.evalite-test.ts +0 -289
package/evals/lib/data-loader.test.ts +0 -345
package/evals/lib/data-loader.ts +0 -281
package/evals/lib/llm.ts +0 -115
package/evals/scorers/compaction-prompt-scorers.ts +0 -145
package/evals/scorers/compaction-scorers.ts +0 -305
package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
package/evals/scorers/coordinator-discipline.ts +0 -325
package/evals/scorers/index.test.ts +0 -146
package/evals/scorers/index.ts +0 -328
package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
package/evals/scorers/outcome-scorers.ts +0 -349
package/evals/swarm-decomposition.eval.ts +0 -121
package/examples/commands/swarm.md +0 -745
package/examples/plugin-wrapper-template.ts +0 -2515
package/examples/skills/hive-workflow/SKILL.md +0 -212
package/examples/skills/skill-creator/SKILL.md +0 -223
package/examples/skills/swarm-coordination/SKILL.md +0 -292
package/global-skills/cli-builder/SKILL.md +0 -344
package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
package/global-skills/learning-systems/SKILL.md +0 -644
package/global-skills/skill-creator/LICENSE.txt +0 -202
package/global-skills/skill-creator/SKILL.md +0 -352
package/global-skills/skill-creator/references/output-patterns.md +0 -82
package/global-skills/skill-creator/references/workflows.md +0 -28
package/global-skills/swarm-coordination/SKILL.md +0 -995
package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
package/global-skills/swarm-coordination/references/strategies.md +0 -138
package/global-skills/system-design/SKILL.md +0 -213
package/global-skills/testing-patterns/SKILL.md +0 -430
package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
package/opencode-swarm-plugin-0.30.7.tgz +0 -0
package/opencode-swarm-plugin-0.31.0.tgz +0 -0
package/scripts/cleanup-test-memories.ts +0 -346
package/scripts/init-skill.ts +0 -222
package/scripts/migrate-unknown-sessions.ts +0 -349
package/scripts/validate-skill.ts +0 -204
package/src/agent-mail.ts +0 -1724
package/src/anti-patterns.test.ts +0 -1167
package/src/anti-patterns.ts +0 -448
package/src/compaction-capture.integration.test.ts +0 -257
package/src/compaction-hook.test.ts +0 -838
package/src/compaction-hook.ts +0 -1204
package/src/compaction-observability.integration.test.ts +0 -139
package/src/compaction-observability.test.ts +0 -187
package/src/compaction-observability.ts +0 -324
package/src/compaction-prompt-scorers.test.ts +0 -475
package/src/compaction-prompt-scoring.ts +0 -300
package/src/contributor-tools.test.ts +0 -133
package/src/contributor-tools.ts +0 -201
package/src/dashboard.test.ts +0 -611
package/src/dashboard.ts +0 -462
package/src/error-enrichment.test.ts +0 -403
package/src/error-enrichment.ts +0 -219
package/src/eval-capture.test.ts +0 -1015
package/src/eval-capture.ts +0 -929
package/src/eval-gates.test.ts +0 -306
package/src/eval-gates.ts +0 -218
package/src/eval-history.test.ts +0 -508
package/src/eval-history.ts +0 -214
package/src/eval-learning.test.ts +0 -378
package/src/eval-learning.ts +0 -360
package/src/eval-runner.test.ts +0 -223
package/src/eval-runner.ts +0 -402
package/src/export-tools.test.ts +0 -476
package/src/export-tools.ts +0 -257
package/src/hive.integration.test.ts +0 -2241
package/src/hive.ts +0 -1628
package/src/index.ts +0 -940
package/src/learning.integration.test.ts +0 -1815
package/src/learning.ts +0 -1079
package/src/logger.test.ts +0 -189
package/src/logger.ts +0 -135
package/src/mandate-promotion.test.ts +0 -473
package/src/mandate-promotion.ts +0 -239
package/src/mandate-storage.integration.test.ts +0 -601
package/src/mandate-storage.test.ts +0 -578
package/src/mandate-storage.ts +0 -794
package/src/mandates.ts +0 -540
package/src/memory-tools.test.ts +0 -195
package/src/memory-tools.ts +0 -344
package/src/memory.integration.test.ts +0 -334
package/src/memory.test.ts +0 -158
package/src/memory.ts +0 -527
package/src/model-selection.test.ts +0 -188
package/src/model-selection.ts +0 -68
package/src/observability-tools.test.ts +0 -359
package/src/observability-tools.ts +0 -871
package/src/output-guardrails.test.ts +0 -438
package/src/output-guardrails.ts +0 -381
package/src/pattern-maturity.test.ts +0 -1160
package/src/pattern-maturity.ts +0 -525
package/src/planning-guardrails.test.ts +0 -491
package/src/planning-guardrails.ts +0 -438
package/src/plugin.ts +0 -23
package/src/post-compaction-tracker.test.ts +0 -251
package/src/post-compaction-tracker.ts +0 -237
package/src/query-tools.test.ts +0 -636
package/src/query-tools.ts +0 -324
package/src/rate-limiter.integration.test.ts +0 -466
package/src/rate-limiter.ts +0 -774
package/src/replay-tools.test.ts +0 -496
package/src/replay-tools.ts +0 -240
package/src/repo-crawl.integration.test.ts +0 -441
package/src/repo-crawl.ts +0 -610
package/src/schemas/cell-events.test.ts +0 -347
package/src/schemas/cell-events.ts +0 -807
package/src/schemas/cell.ts +0 -257
package/src/schemas/evaluation.ts +0 -166
package/src/schemas/index.test.ts +0 -199
package/src/schemas/index.ts +0 -286
package/src/schemas/mandate.ts +0 -232
package/src/schemas/swarm-context.ts +0 -115
package/src/schemas/task.ts +0 -161
package/src/schemas/worker-handoff.test.ts +0 -302
package/src/schemas/worker-handoff.ts +0 -131
package/src/sessions/agent-discovery.test.ts +0 -137
package/src/sessions/agent-discovery.ts +0 -112
package/src/sessions/index.ts +0 -15
package/src/skills.integration.test.ts +0 -1192
package/src/skills.test.ts +0 -643
package/src/skills.ts +0 -1549
package/src/storage.integration.test.ts +0 -341
package/src/storage.ts +0 -884
package/src/structured.integration.test.ts +0 -817
package/src/structured.test.ts +0 -1046
package/src/structured.ts +0 -762
package/src/swarm-decompose.test.ts +0 -188
package/src/swarm-decompose.ts +0 -1302
package/src/swarm-deferred.integration.test.ts +0 -157
package/src/swarm-deferred.test.ts +0 -38
package/src/swarm-insights.test.ts +0 -214
package/src/swarm-insights.ts +0 -459
package/src/swarm-mail.integration.test.ts +0 -970
package/src/swarm-mail.ts +0 -739
package/src/swarm-orchestrate.integration.test.ts +0 -282
package/src/swarm-orchestrate.test.ts +0 -548
package/src/swarm-orchestrate.ts +0 -3084
package/src/swarm-prompts.test.ts +0 -1270
package/src/swarm-prompts.ts +0 -2077
package/src/swarm-research.integration.test.ts +0 -701
package/src/swarm-research.test.ts +0 -698
package/src/swarm-research.ts +0 -472
package/src/swarm-review.integration.test.ts +0 -285
package/src/swarm-review.test.ts +0 -879
package/src/swarm-review.ts +0 -709
package/src/swarm-strategies.ts +0 -407
package/src/swarm-worktree.test.ts +0 -501
package/src/swarm-worktree.ts +0 -575
package/src/swarm.integration.test.ts +0 -2377
package/src/swarm.ts +0 -38
package/src/tool-adapter.integration.test.ts +0 -1221
package/src/tool-availability.ts +0 -461
package/tsconfig.json +0 -28

package/src/eval-runner.ts DELETED Viewed

@@ -1,402 +0,0 @@
-/**
- * Programmatic Evalite Runner
- *
- * Provides a type-safe API for running evalite evals programmatically.
- * Wraps evalite's runEvalite function with structured result parsing.
- *
- * @module eval-runner
- */
-import { tool } from "@opencode-ai/plugin";
-import { runEvalite } from "evalite/runner";
-import { createInMemoryStorage } from "evalite/in-memory-storage";
-import type { Evalite } from "evalite/types";
-import fs from "node:fs/promises";
-import path from "node:path";
-import { recordEvalRun, getScoreHistory } from "./eval-history.js";
-import { checkGate } from "./eval-gates.js";
-import { learnFromEvalFailure } from "./eval-learning.js";
-import { getMemoryAdapter } from "./memory-tools.js";
-/**
- * Options for running evals programmatically
- */
-export interface RunEvalsOptions {
-  /**
-   * Working directory containing eval files (defaults to process.cwd())
-   */
-  cwd?: string;
-  /**
-   * Optional filter to run specific eval suites (e.g., "coordinator", "compaction")
-   * Matches against eval file paths using substring matching
-   */
-  suiteFilter?: string;
-  /**
-   * Minimum average score threshold (0-100)
-   * If average score falls below this, result.success will be false
-   */
-  scoreThreshold?: number;
-  /**
-   * Optional path to write raw evalite JSON output
-   */
-  outputPath?: string;
-}
-/**
- * Structured suite result with scores
- */
-export interface SuiteResult {
-  /** Suite name from evalite() call */
-  name: string;
-  /** Absolute path to eval file */
-  filepath: string;
-  /** Suite status: success, fail, or running */
-  status: "success" | "fail" | "running";
-  /** Total duration in milliseconds */
-  duration: number;
-  /** Average score across all evals in suite (0-1 scale) */
-  averageScore: number;
-  /** Number of evals in this suite */
-  evalCount: number;
-  /** Individual eval results (optional, can be large) */
-  evals?: Array<{
-    input: unknown;
-    output: unknown;
-    expected?: unknown;
-    scores: Array<{
-      name: string;
-      score: number;
-      description?: string;
-    }>;
-  }>;
-}
-/**
- * Structured result from running evals
- */
-export interface RunEvalsResult {
-  /** Whether the run succeeded (all evals passed threshold) */
-  success: boolean;
-  /** Total number of suites executed */
-  totalSuites: number;
-  /** Total number of individual evals executed */
-  totalEvals: number;
-  /** Average score across all suites (0-1 scale) */
-  averageScore: number;
-  /** Individual suite results */
-  suites: SuiteResult[];
-  /** Error message if run failed */
-  error?: string;
-  /** Gate check results per suite */
-  gateResults?: Array<{
-    suite: string;
-    passed: boolean;
-    phase: string;
-    message: string;
-    baseline?: number;
-    currentScore: number;
-    regressionPercent?: number;
-  }>;
-}
-/**
- * Run evalite evals programmatically
- *
- * @param options - Configuration for eval run
- * @returns Structured results with scores per suite
- *
- * @example
- * ```typescript
- * // Run all evals
- * const result = await runEvals({ cwd: "/path/to/project" });
- * console.log(`Average score: ${result.averageScore}`);
- *
- * // Run specific suite
- * const coordResult = await runEvals({
- *   cwd: "/path/to/project",
- *   suiteFilter: "coordinator"
- * });
- *
- * // Enforce score threshold
- * const gatedResult = await runEvals({
- *   cwd: "/path/to/project",
- *   scoreThreshold: 80
- * });
- * if (!gatedResult.success) {
- *   throw new Error(`Evals failed threshold: ${gatedResult.averageScore}`);
- * }
- * ```
- */
-export async function runEvals(
-  options: RunEvalsOptions = {}
-): Promise<RunEvalsResult> {
-  const {
-    cwd = process.cwd(),
-    suiteFilter,
-    scoreThreshold,
-    outputPath: userOutputPath,
-  } = options;
-  try {
-    // Resolve to project root (evals are in evals/ relative to project root)
-    // If cwd is src/, go up one level
-    const projectRoot = cwd.endsWith("src") ? path.dirname(cwd) : cwd;
-    const evalsDir = path.join(projectRoot, "evals");
-    let evalPath: string | undefined;
-    if (suiteFilter) {
-      // Find matching eval files
-      try {
-        const files = await fs.readdir(evalsDir);
-        const matchingFiles = files.filter((f) =>
-          f.toLowerCase().includes(suiteFilter.toLowerCase())
-        );
-        if (matchingFiles.length === 0) {
-          // No matches - return empty result (not an error)
-          return {
-            success: true,
-            totalSuites: 0,
-            totalEvals: 0,
-            averageScore: 0,
-            suites: [],
-          };
-        }
-        // Use first matching file (evalite will discover all via vitest)
-        evalPath = path.join(evalsDir, matchingFiles[0]);
-      } catch (err) {
-        // Directory doesn't exist or can't be read
-        return {
-          success: false,
-          totalSuites: 0,
-          totalEvals: 0,
-          averageScore: 0,
-          suites: [],
-          error: `Failed to read evals directory: ${err instanceof Error ? err.message : String(err)}`,
-        };
-      }
-    } else {
-      // No filter - run all evals in evals/
-      evalPath = evalsDir;
-    }
-    // Use temporary output path if user didn't provide one
-    const outputPath =
-      userOutputPath || path.join(projectRoot, `.evalite-results-${Date.now()}.json`);
-    const isTemporaryOutput = !userOutputPath;
-    // Run evalite programmatically
-    const storage = createInMemoryStorage();
-    await runEvalite({
-      path: evalPath, // undefined = run all
-      cwd: projectRoot, // Use project root as working directory
-      mode: "run-once",
-      scoreThreshold,
-      outputPath,
-      hideTable: true, // Suppress terminal output
-      storage,
-      disableServer: true, // No UI server needed
-    });
-    // Parse output file for structured results
-    let outputJson: string;
-    try {
-      outputJson = await fs.readFile(outputPath, "utf-8");
-    } catch (err) {
-      // Output file wasn't written - evalite crashed or no tests ran
-      return {
-        success: false,
-        totalSuites: 0,
-        totalEvals: 0,
-        averageScore: 0,
-        suites: [],
-        error: `No results file generated: ${err instanceof Error ? err.message : String(err)}`,
-      };
-    }
-    const output: Evalite.Exported.Output = JSON.parse(outputJson);
-    // Clean up temporary output file
-    if (isTemporaryOutput) {
-      await fs.unlink(outputPath).catch(() => {
-        /* ignore cleanup errors */
-      });
-    }
-    // Transform to structured result
-    const suites: SuiteResult[] = output.suites.map((suite) => ({
-      name: suite.name,
-      filepath: suite.filepath,
-      status: suite.status,
-      duration: suite.duration,
-      averageScore: suite.averageScore,
-      evalCount: suite.evals.length,
-      // Include evals if user wants detailed results
-      evals: suite.evals.map((e) => ({
-        input: e.input,
-        output: e.output,
-        expected: e.expected,
-        scores: e.scores.map((s) => ({
-          name: s.name,
-          score: s.score,
-          description: s.description,
-        })),
-      })),
-    }));
-    // Record eval runs to history
-    for (const suite of suites) {
-      const history = getScoreHistory(projectRoot, suite.name);
-      recordEvalRun(projectRoot, {
-        timestamp: new Date().toISOString(),
-        eval_name: suite.name,
-        score: suite.averageScore,
-        run_count: history.length + 1,
-      });
-    }
-    // Check gates for each suite
-    const gateResults = [];
-    for (const suite of suites) {
-      const history = getScoreHistory(projectRoot, suite.name);
-      const gate = checkGate(projectRoot, suite.name, suite.averageScore);
-      gateResults.push({ suite: suite.name, ...gate });
-      // If gate failed, trigger learning
-      if (!gate.passed) {
-        try {
-          const memoryAdapter = await getMemoryAdapter();
-          await learnFromEvalFailure(suite.name, suite.averageScore, history, memoryAdapter);
-        } catch (e) {
-          // Learning is best-effort, don't fail the eval run
-          console.warn(`Failed to store learning for ${suite.name}:`, e);
-        }
-      }
-    }
-    // Calculate overall metrics
-    const totalEvals = suites.reduce((sum, s) => sum + s.evalCount, 0);
-    const averageScore =
-      suites.length > 0
-        ? suites.reduce((sum, s) => sum + s.averageScore, 0) / suites.length
-        : 0;
-    // Determine success based on threshold
-    const thresholdPassed =
-      scoreThreshold === undefined || averageScore * 100 >= scoreThreshold;
-    return {
-      success: thresholdPassed,
-      totalSuites: suites.length,
-      totalEvals,
-      averageScore,
-      suites,
-      gateResults,
-    };
-  } catch (error) {
-    // Return error result
-    return {
-      success: false,
-      totalSuites: 0,
-      totalEvals: 0,
-      averageScore: 0,
-      suites: [],
-      error:
-        error instanceof Error
-          ? error.message
-          : String(error),
-    };
-  }
-}
-// ============================================================================
-// Plugin Tool
-// ============================================================================
-/**
- * Plugin tool for running evals programmatically
- */
-const eval_run = tool({
-  description: `Run evalite evals programmatically and get structured results with scores.
-Use this to:
-- Run all evals in evals/ directory
-- Filter by specific eval suite (e.g., "coordinator", "compaction")
-- Enforce score thresholds for quality gates
-- Get per-suite and per-eval scores
-Returns structured JSON with:
-- success: boolean (true if all tests passed threshold)
-- totalSuites: number of eval suites run
-- totalEvals: number of individual test cases
-- averageScore: 0-1 score across all suites
-- suites: array of suite results with scores
-Example usage:
-- Run all evals: eval_run()
-- Run coordinator evals: eval_run({ suiteFilter: "coordinator" })
-- Enforce 80% threshold: eval_run({ scoreThreshold: 80 })`,
-  args: {
-    suiteFilter: tool.schema
-      .string()
-      .optional()
-      .describe(
-        'Optional filter to run specific eval suite (e.g., "coordinator", "compaction"). Matches against eval file paths using substring matching.'
-      ),
-    scoreThreshold: tool.schema
-      .number()
-      .optional()
-      .describe(
-        "Optional minimum average score threshold (0-100). If average score falls below this, result.success will be false. Useful for CI quality gates."
-      ),
-    includeDetailedResults: tool.schema
-      .boolean()
-      .optional()
-      .describe(
-        "Include individual eval results with input/output/scores in response. Set to false (default) for summary only to save token usage."
-      ),
-  },
-  execute: async (args) => {
-    const result = await runEvals({
-      cwd: process.cwd(),
-      suiteFilter: args.suiteFilter as string | undefined,
-      scoreThreshold: args.scoreThreshold as number | undefined,
-    });
-    // Remove detailed evals if not requested (saves tokens)
-    const includeDetails = args.includeDetailedResults === true;
-    if (!includeDetails) {
-      for (const suite of result.suites) {
-        delete suite.evals;
-      }
-    }
-    return JSON.stringify(result, null, 2);
-  },
-});
-/**
- * All eval tools exported for registration
- */
-export const evalTools = {
-  eval_run,
-} as const;