npm - opencode-swarm-plugin - Versions diffs - 0.19.0 → 0.21.0 - Mend

opencode-swarm-plugin 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.beads/issues.jsonl +205 -0
package/INTEGRATION_EXAMPLE.md +66 -0
package/README.md +127 -562
package/dist/index.js +3842 -2917
package/dist/plugin.js +3824 -2918
package/docs/analysis/subagent-coordination-patterns.md +2 -0
package/evals/README.md +116 -0
package/evals/evalite.config.ts +15 -0
package/evals/example.eval.ts +32 -0
package/evals/fixtures/decomposition-cases.ts +105 -0
package/evals/lib/data-loader.test.ts +288 -0
package/evals/lib/data-loader.ts +111 -0
package/evals/lib/llm.ts +115 -0
package/evals/scorers/index.ts +200 -0
package/evals/scorers/outcome-scorers.test.ts +27 -0
package/evals/scorers/outcome-scorers.ts +349 -0
package/evals/swarm-decomposition.eval.ts +112 -0
package/package.json +8 -1
package/src/agent-mail.ts +7 -7
package/src/beads.ts +49 -0
package/src/eval-capture.ts +487 -0
package/src/index.ts +53 -3
package/src/output-guardrails.test.ts +438 -0
package/src/output-guardrails.ts +381 -0
package/src/pattern-maturity.test.ts +1160 -0
package/src/schemas/index.ts +18 -0
package/src/schemas/swarm-context.ts +115 -0
package/src/streams/events.test.ts +296 -0
package/src/streams/events.ts +115 -0
package/src/streams/migrations.test.ts +24 -20
package/src/streams/migrations.ts +51 -0
package/src/streams/projections.ts +187 -0
package/src/streams/store.ts +275 -0
package/src/swarm-mail.ts +7 -7
package/src/swarm-orchestrate.ts +430 -1
package/src/swarm-prompts.ts +84 -12

package/src/streams/migrations.test.ts CHANGED Viewed

@@ -34,11 +34,11 @@ describe("Schema Migrations", () => {
     it("should run all migrations on fresh database", async () => {
       const result = await runMigrations(db);
-      expect(result.applied).toEqual([1, 2]);
-      expect(result.current).toBe(2);
+      expect(result.applied).toEqual([1, 2, 3, 4]);
+      expect(result.current).toBe(4);
       const version = await getCurrentVersion(db);
-      expect(version).toBe(2);
+      expect(version).toBe(4);
     });
     it("should create cursors table with correct schema", async () => {
@@ -105,16 +105,16 @@ describe("Schema Migrations", () => {
     it("should be safe to run migrations multiple times", async () => {
       // First run
       const result1 = await runMigrations(db);
-      expect(result1.applied).toEqual([1, 2]);
+      expect(result1.applied).toEqual([1, 2, 3, 4]);
       // Second run - should apply nothing
       const result2 = await runMigrations(db);
       expect(result2.applied).toEqual([]);
-      expect(result2.current).toBe(2);
+      expect(result2.current).toBe(4);
       // Version should still be 2
       const version = await getCurrentVersion(db);
-      expect(version).toBe(2);
+      expect(version).toBe(4);
     });
   });
@@ -137,8 +137,8 @@ describe("Schema Migrations", () => {
       // Now run migrations - should only apply 2
       const result = await runMigrations(db);
-      expect(result.applied).toEqual([2]);
-      expect(result.current).toBe(2);
+      expect(result.applied).toEqual([2, 3, 4]);
+      expect(result.current).toBe(4);
     });
   });
@@ -146,11 +146,11 @@ describe("Schema Migrations", () => {
     it("should rollback to target version", async () => {
       // Apply all migrations
       await runMigrations(db);
-      expect(await getCurrentVersion(db)).toBe(2);
+      expect(await getCurrentVersion(db)).toBe(4);
       // Rollback to version 1
       const result = await rollbackTo(db, 1);
-      expect(result.rolledBack).toEqual([2]);
+      expect(result.rolledBack).toEqual([4, 3, 2]);
       expect(result.current).toBe(1);
       // Version should be 1
@@ -180,7 +180,7 @@ describe("Schema Migrations", () => {
       await runMigrations(db);
       const result = await rollbackTo(db, 0);
-      expect(result.rolledBack).toEqual([2, 1]);
+      expect(result.rolledBack).toEqual([4, 3, 2, 1]);
       expect(result.current).toBe(0);
       // All tables should be gone
@@ -196,9 +196,9 @@ describe("Schema Migrations", () => {
     it("should do nothing if target version >= current", async () => {
       await runMigrations(db);
-      const result = await rollbackTo(db, 2);
+      const result = await rollbackTo(db, 4);
       expect(result.rolledBack).toEqual([]);
-      expect(result.current).toBe(2);
+      expect(result.current).toBe(4);
     });
   });
@@ -210,12 +210,16 @@ describe("Schema Migrations", () => {
       expect(await isMigrationApplied(db, 1)).toBe(true);
       expect(await isMigrationApplied(db, 2)).toBe(true);
+      expect(await isMigrationApplied(db, 3)).toBe(true);
+      expect(await isMigrationApplied(db, 4)).toBe(true);
+      expect(await isMigrationApplied(db, 3)).toBe(true);
+      expect(await isMigrationApplied(db, 4)).toBe(true);
     });
     it("should list pending migrations", async () => {
       const pending1 = await getPendingMigrations(db);
-      expect(pending1).toHaveLength(2);
-      expect(pending1.map((m) => m.version)).toEqual([1, 2]);
+      expect(pending1).toHaveLength(4);
+      expect(pending1.map((m) => m.version)).toEqual([1, 2, 3, 4]);
       // Apply migration 1
       const migration = migrations[0];
@@ -236,8 +240,8 @@ describe("Schema Migrations", () => {
       );
       const pending2 = await getPendingMigrations(db);
-      expect(pending2).toHaveLength(1);
-      expect(pending2.map((m) => m.version)).toEqual([2]);
+      expect(pending2).toHaveLength(3);
+      expect(pending2.map((m) => m.version)).toEqual([2, 3, 4]);
     });
     it("should list applied migrations", async () => {
@@ -247,8 +251,8 @@ describe("Schema Migrations", () => {
       await runMigrations(db);
       const applied2 = await getAppliedMigrations(db);
-      expect(applied2).toHaveLength(2);
-      expect(applied2.map((m) => m.version)).toEqual([1, 2]);
+      expect(applied2).toHaveLength(4);
+      expect(applied2.map((m) => m.version)).toEqual([1, 2, 3, 4]);
       expect(applied2[0]?.description).toBe(
         "Add cursors table for DurableCursor",
       );
@@ -340,7 +344,7 @@ describe("Schema Migrations", () => {
         `SELECT version, applied_at, description FROM schema_version ORDER BY version`,
       );
-      expect(result.rows).toHaveLength(2);
+      expect(result.rows).toHaveLength(4);
       expect(result.rows[0]?.version).toBe(1);
       expect(result.rows[0]?.description).toBe(
         "Add cursors table for DurableCursor",

package/src/streams/migrations.ts CHANGED Viewed

@@ -107,6 +107,57 @@ export const migrations: Migration[] = [
     `,
     down: `DROP TABLE IF EXISTS deferred;`,
   },
+  {
+    version: 3,
+    description: "Add eval_records table for learning system",
+    up: `
+      CREATE TABLE IF NOT EXISTS eval_records (
+        id TEXT PRIMARY KEY,
+        project_key TEXT NOT NULL,
+        task TEXT NOT NULL,
+        context TEXT,
+        strategy TEXT NOT NULL,
+        epic_title TEXT NOT NULL,
+        subtasks JSONB NOT NULL,
+        outcomes JSONB,
+        overall_success BOOLEAN,
+        total_duration_ms INTEGER,
+        total_errors INTEGER,
+        human_accepted BOOLEAN,
+        human_modified BOOLEAN,
+        human_notes TEXT,
+        file_overlap_count INTEGER,
+        scope_accuracy REAL,
+        time_balance_ratio REAL,
+        created_at BIGINT NOT NULL,
+        updated_at BIGINT NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS idx_eval_records_project ON eval_records(project_key);
+      CREATE INDEX IF NOT EXISTS idx_eval_records_strategy ON eval_records(strategy);
+    `,
+    down: `DROP TABLE IF EXISTS eval_records;`,
+  },
+  {
+    version: 4,
+    description: "Add swarm_contexts table for context recovery",
+    up: `
+      CREATE TABLE IF NOT EXISTS swarm_contexts (
+        id TEXT PRIMARY KEY,
+        epic_id TEXT NOT NULL,
+        bead_id TEXT NOT NULL,
+        strategy TEXT NOT NULL,
+        files JSONB NOT NULL,
+        dependencies JSONB NOT NULL,
+        directives JSONB NOT NULL,
+        recovery JSONB NOT NULL,
+        created_at BIGINT NOT NULL,
+        updated_at BIGINT NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS idx_swarm_contexts_epic ON swarm_contexts(epic_id);
+      CREATE INDEX IF NOT EXISTS idx_swarm_contexts_bead ON swarm_contexts(bead_id);
+    `,
+    down: `DROP TABLE IF EXISTS swarm_contexts;`,
+  },
 ];
 // ============================================================================

package/src/streams/projections.ts CHANGED Viewed

@@ -315,3 +315,190 @@ function pathMatches(path: string, pattern: string): boolean {
   // Glob match using minimatch
   return minimatch(path, pattern);
 }
+// ============================================================================
+// Eval Records Projections
+// ============================================================================
+export interface EvalRecord {
+  id: string;
+  project_key: string;
+  task: string;
+  context: string | null;
+  strategy: string;
+  epic_title: string;
+  subtasks: Array<{
+    title: string;
+    files: string[];
+    priority?: number;
+  }>;
+  outcomes?: Array<{
+    bead_id: string;
+    planned_files: string[];
+    actual_files: string[];
+    duration_ms: number;
+    error_count: number;
+    retry_count: number;
+    success: boolean;
+  }>;
+  overall_success: boolean | null;
+  total_duration_ms: number | null;
+  total_errors: number | null;
+  human_accepted: boolean | null;
+  human_modified: boolean | null;
+  human_notes: string | null;
+  file_overlap_count: number | null;
+  scope_accuracy: number | null;
+  time_balance_ratio: number | null;
+  created_at: number;
+  updated_at: number;
+}
+export interface EvalStats {
+  totalRecords: number;
+  successRate: number;
+  avgDurationMs: number;
+  byStrategy: Record<string, number>;
+}
+/**
+ * Get eval records with optional filters
+ */
+export async function getEvalRecords(
+  projectKey: string,
+  options?: { limit?: number; strategy?: string },
+  projectPath?: string,
+): Promise<EvalRecord[]> {
+  const db = await getDatabase(projectPath);
+  const conditions = ["project_key = $1"];
+  const params: (string | number)[] = [projectKey];
+  let paramIndex = 2;
+  if (options?.strategy) {
+    conditions.push(`strategy = $${paramIndex++}`);
+    params.push(options.strategy);
+  }
+  const whereClause = conditions.join(" AND ");
+  let query = `
+    SELECT id, project_key, task, context, strategy, epic_title, subtasks,
+           outcomes, overall_success, total_duration_ms, total_errors,
+           human_accepted, human_modified, human_notes,
+           file_overlap_count, scope_accuracy, time_balance_ratio,
+           created_at, updated_at
+    FROM eval_records
+    WHERE ${whereClause}
+    ORDER BY created_at DESC
+  `;
+  if (options?.limit) {
+    query += ` LIMIT $${paramIndex}`;
+    params.push(options.limit);
+  }
+  const result = await db.query<{
+    id: string;
+    project_key: string;
+    task: string;
+    context: string | null;
+    strategy: string;
+    epic_title: string;
+    subtasks: string;
+    outcomes: string | null;
+    overall_success: boolean | null;
+    total_duration_ms: number | null;
+    total_errors: number | null;
+    human_accepted: boolean | null;
+    human_modified: boolean | null;
+    human_notes: string | null;
+    file_overlap_count: number | null;
+    scope_accuracy: number | null;
+    time_balance_ratio: number | null;
+    created_at: string;
+    updated_at: string;
+  }>(query, params);
+  return result.rows.map((row) => ({
+    id: row.id,
+    project_key: row.project_key,
+    task: row.task,
+    context: row.context,
+    strategy: row.strategy,
+    epic_title: row.epic_title,
+    // PGlite returns JSONB columns as already-parsed objects
+    subtasks:
+      typeof row.subtasks === "string"
+        ? JSON.parse(row.subtasks)
+        : row.subtasks,
+    outcomes: row.outcomes
+      ? typeof row.outcomes === "string"
+        ? JSON.parse(row.outcomes)
+        : row.outcomes
+      : undefined,
+    overall_success: row.overall_success,
+    total_duration_ms: row.total_duration_ms,
+    total_errors: row.total_errors,
+    human_accepted: row.human_accepted,
+    human_modified: row.human_modified,
+    human_notes: row.human_notes,
+    file_overlap_count: row.file_overlap_count,
+    scope_accuracy: row.scope_accuracy,
+    time_balance_ratio: row.time_balance_ratio,
+    created_at: parseInt(row.created_at as string),
+    updated_at: parseInt(row.updated_at as string),
+  }));
+}
+/**
+ * Get eval statistics for a project
+ */
+export async function getEvalStats(
+  projectKey: string,
+  projectPath?: string,
+): Promise<EvalStats> {
+  const db = await getDatabase(projectPath);
+  // Get overall stats
+  const overallResult = await db.query<{
+    total_records: string;
+    success_count: string;
+    avg_duration: string;
+  }>(
+    `SELECT
+      COUNT(*) as total_records,
+      COUNT(*) FILTER (WHERE overall_success = true) as success_count,
+      AVG(total_duration_ms) as avg_duration
+    FROM eval_records
+    WHERE project_key = $1`,
+    [projectKey],
+  );
+  const totalRecords = parseInt(overallResult.rows[0]?.total_records || "0");
+  const successCount = parseInt(overallResult.rows[0]?.success_count || "0");
+  const avgDurationMs = parseFloat(overallResult.rows[0]?.avg_duration || "0");
+  // Get by-strategy breakdown
+  const strategyResult = await db.query<{
+    strategy: string;
+    count: string;
+  }>(
+    `SELECT strategy, COUNT(*) as count
+    FROM eval_records
+    WHERE project_key = $1
+    GROUP BY strategy`,
+    [projectKey],
+  );
+  const byStrategy: Record<string, number> = {};
+  for (const row of strategyResult.rows) {
+    byStrategy[row.strategy] = parseInt(row.count);
+  }
+  return {
+    totalRecords,
+    successRate: totalRecords > 0 ? successCount / totalRecords : 0,
+    avgDurationMs,
+    byStrategy,
+  };
+}

package/src/streams/store.ts CHANGED Viewed

@@ -531,6 +531,28 @@ async function updateMaterializedViews(
       case "task_blocked":
         // No-op for now - could add task tracking table later
         break;
+      // Eval capture events - update eval_records projection
+      case "decomposition_generated":
+        await handleDecompositionGenerated(db, event);
+        break;
+      case "subtask_outcome":
+        await handleSubtaskOutcome(db, event);
+        break;
+      case "human_feedback":
+        await handleHumanFeedback(db, event);
+        break;
+      // Swarm checkpoint events - update swarm_contexts table
+      case "swarm_checkpointed":
+        await handleSwarmCheckpointed(db, event);
+        break;
+      case "swarm_recovered":
+        await handleSwarmRecovered(db, event);
+        break;
     }
   } catch (error) {
     console.error("[SwarmMail] Failed to update materialized views", {
@@ -707,6 +729,259 @@ async function handleFileReleased(
   }
 }
+async function handleDecompositionGenerated(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "decomposition_generated") return;
+  await db.query(
+    `INSERT INTO eval_records (
+      id, project_key, task, context, strategy, epic_title, subtasks,
+      created_at, updated_at
+    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $8)
+    ON CONFLICT (id) DO NOTHING`,
+    [
+      event.epic_id,
+      event.project_key,
+      event.task,
+      event.context || null,
+      event.strategy,
+      event.epic_title,
+      JSON.stringify(event.subtasks),
+      event.timestamp,
+    ],
+  );
+}
+async function handleSubtaskOutcome(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "subtask_outcome") return;
+  // Fetch current record to compute metrics
+  const result = await db.query<{
+    outcomes: string | null;
+    subtasks: string;
+  }>(`SELECT outcomes, subtasks FROM eval_records WHERE id = $1`, [
+    event.epic_id,
+  ]);
+  if (!result.rows[0]) {
+    console.warn(
+      `[SwarmMail] No eval_record found for epic_id ${event.epic_id}`,
+    );
+    return;
+  }
+  const row = result.rows[0];
+  // PGlite returns JSONB columns as already-parsed objects
+  const subtasks = (
+    typeof row.subtasks === "string" ? JSON.parse(row.subtasks) : row.subtasks
+  ) as Array<{
+    title: string;
+    files: string[];
+  }>;
+  const outcomes = row.outcomes
+    ? ((typeof row.outcomes === "string"
+        ? JSON.parse(row.outcomes)
+        : row.outcomes) as Array<{
+        bead_id: string;
+        planned_files: string[];
+        actual_files: string[];
+        duration_ms: number;
+        error_count: number;
+        retry_count: number;
+        success: boolean;
+      }>)
+    : [];
+  // Create new outcome
+  const newOutcome = {
+    bead_id: event.bead_id,
+    planned_files: event.planned_files,
+    actual_files: event.actual_files,
+    duration_ms: event.duration_ms,
+    error_count: event.error_count,
+    retry_count: event.retry_count,
+    success: event.success,
+  };
+  // Append to outcomes array
+  const updatedOutcomes = [...outcomes, newOutcome];
+  // Compute metrics
+  const fileOverlapCount = computeFileOverlap(subtasks);
+  const scopeAccuracy = computeScopeAccuracy(
+    event.planned_files,
+    event.actual_files,
+  );
+  const timeBalanceRatio = computeTimeBalanceRatio(updatedOutcomes);
+  const overallSuccess = updatedOutcomes.every((o) => o.success);
+  const totalDurationMs = updatedOutcomes.reduce(
+    (sum, o) => sum + o.duration_ms,
+    0,
+  );
+  const totalErrors = updatedOutcomes.reduce(
+    (sum, o) => sum + o.error_count,
+    0,
+  );
+  // Update record
+  await db.query(
+    `UPDATE eval_records SET
+      outcomes = $1,
+      file_overlap_count = $2,
+      scope_accuracy = $3,
+      time_balance_ratio = $4,
+      overall_success = $5,
+      total_duration_ms = $6,
+      total_errors = $7,
+      updated_at = $8
+    WHERE id = $9`,
+    [
+      JSON.stringify(updatedOutcomes),
+      fileOverlapCount,
+      scopeAccuracy,
+      timeBalanceRatio,
+      overallSuccess,
+      totalDurationMs,
+      totalErrors,
+      event.timestamp,
+      event.epic_id,
+    ],
+  );
+}
+async function handleHumanFeedback(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "human_feedback") return;
+  await db.query(
+    `UPDATE eval_records SET
+      human_accepted = $1,
+      human_modified = $2,
+      human_notes = $3,
+      updated_at = $4
+    WHERE id = $5`,
+    [
+      event.accepted,
+      event.modified,
+      event.notes || null,
+      event.timestamp,
+      event.epic_id,
+    ],
+  );
+}
+async function handleSwarmCheckpointed(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "swarm_checkpointed") return;
+  await db.query(
+    `INSERT INTO swarm_contexts (
+      project_key, epic_id, bead_id, strategy, files, dependencies,
+      directives, recovery, checkpointed_at, updated_at
+    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $9)
+    ON CONFLICT (project_key, epic_id, bead_id) DO UPDATE SET
+      strategy = EXCLUDED.strategy,
+      files = EXCLUDED.files,
+      dependencies = EXCLUDED.dependencies,
+      directives = EXCLUDED.directives,
+      recovery = EXCLUDED.recovery,
+      checkpointed_at = EXCLUDED.checkpointed_at,
+      updated_at = EXCLUDED.updated_at`,
+    [
+      event.project_key,
+      event.epic_id,
+      event.bead_id,
+      event.strategy,
+      JSON.stringify(event.files),
+      JSON.stringify(event.dependencies),
+      JSON.stringify(event.directives),
+      JSON.stringify(event.recovery),
+      event.timestamp,
+    ],
+  );
+}
+async function handleSwarmRecovered(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "swarm_recovered") return;
+  // Update swarm_contexts to mark as recovered
+  await db.query(
+    `UPDATE swarm_contexts SET
+      recovered_at = $1,
+      recovered_from_checkpoint = $2,
+      updated_at = $1
+    WHERE project_key = $3 AND epic_id = $4 AND bead_id = $5`,
+    [
+      event.timestamp,
+      event.recovered_from_checkpoint,
+      event.project_key,
+      event.epic_id,
+      event.bead_id,
+    ],
+  );
+}
+// ============================================================================
+// Metric Computation Helpers
+// ============================================================================
+/**
+ * Count files that appear in multiple subtasks
+ */
+function computeFileOverlap(subtasks: Array<{ files: string[] }>): number {
+  const fileCount = new Map<string, number>();
+  for (const subtask of subtasks) {
+    for (const file of subtask.files) {
+      fileCount.set(file, (fileCount.get(file) || 0) + 1);
+    }
+  }
+  return Array.from(fileCount.values()).filter((count) => count > 1).length;
+}
+/**
+ * Compute scope accuracy: intersection(actual, planned) / planned.length
+ */
+function computeScopeAccuracy(planned: string[], actual: string[]): number {
+  if (planned.length === 0) return 1.0;
+  const plannedSet = new Set(planned);
+  const intersection = actual.filter((file) => plannedSet.has(file));
+  return intersection.length / planned.length;
+}
+/**
+ * Compute time balance ratio: max(duration) / min(duration)
+ * Lower is better (more balanced)
+ */
+function computeTimeBalanceRatio(
+  outcomes: Array<{ duration_ms: number }>,
+): number | null {
+  if (outcomes.length === 0) return null;
+  const durations = outcomes.map((o) => o.duration_ms);
+  const max = Math.max(...durations);
+  const min = Math.min(...durations);
+  if (min === 0) return null;
+  return max / min;
+}
 // ============================================================================
 // Convenience Functions
 // ============================================================================

package/src/swarm-mail.ts CHANGED Viewed

@@ -28,6 +28,7 @@ import {
   checkSwarmHealth,
 } from "./streams/swarm-mail";
 import { getActiveReservations } from "./streams/projections";
+import type { MailSessionState } from "./streams/events";
 import {
   existsSync,
   mkdirSync,
@@ -47,13 +48,12 @@ interface ToolContext {
   sessionID: string;
 }
-/** Swarm Mail session state */
-export interface SwarmMailState {
-  projectKey: string;
-  agentName: string;
-  reservations: number[];
-  startedAt: string;
-}
+/**
+ * Swarm Mail session state
+ * @deprecated Use MailSessionState from streams/events.ts instead
+ * This is kept for backward compatibility and re-exported as an alias
+ */
+export type SwarmMailState = MailSessionState;
 /** Init tool arguments */
 interface InitArgs {