npm - opencode-swarm-plugin - Versions diffs - 0.20.0 → 0.21.0 - Mend

opencode-swarm-plugin 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.beads/issues.jsonl +202 -0
package/INTEGRATION_EXAMPLE.md +66 -0
package/README.md +127 -562
package/dist/index.js +3842 -2917
package/dist/plugin.js +3824 -2918
package/docs/analysis/subagent-coordination-patterns.md +2 -0
package/evals/README.md +116 -0
package/evals/evalite.config.ts +15 -0
package/evals/example.eval.ts +32 -0
package/evals/fixtures/decomposition-cases.ts +105 -0
package/evals/lib/data-loader.test.ts +288 -0
package/evals/lib/data-loader.ts +111 -0
package/evals/lib/llm.ts +115 -0
package/evals/scorers/index.ts +200 -0
package/evals/scorers/outcome-scorers.test.ts +27 -0
package/evals/scorers/outcome-scorers.ts +349 -0
package/evals/swarm-decomposition.eval.ts +112 -0
package/package.json +8 -1
package/src/beads.ts +49 -0
package/src/eval-capture.ts +487 -0
package/src/index.ts +45 -3
package/src/output-guardrails.test.ts +438 -0
package/src/output-guardrails.ts +381 -0
package/src/schemas/index.ts +18 -0
package/src/schemas/swarm-context.ts +115 -0
package/src/streams/events.test.ts +296 -0
package/src/streams/events.ts +93 -0
package/src/streams/migrations.test.ts +24 -20
package/src/streams/migrations.ts +51 -0
package/src/streams/projections.ts +187 -0
package/src/streams/store.ts +275 -0
package/src/swarm-orchestrate.ts +430 -1
package/src/swarm-prompts.ts +84 -12

package/src/streams/projections.ts CHANGED Viewed

@@ -315,3 +315,190 @@ function pathMatches(path: string, pattern: string): boolean {
   // Glob match using minimatch
   return minimatch(path, pattern);
 }
+// ============================================================================
+// Eval Records Projections
+// ============================================================================
+export interface EvalRecord {
+  id: string;
+  project_key: string;
+  task: string;
+  context: string | null;
+  strategy: string;
+  epic_title: string;
+  subtasks: Array<{
+    title: string;
+    files: string[];
+    priority?: number;
+  }>;
+  outcomes?: Array<{
+    bead_id: string;
+    planned_files: string[];
+    actual_files: string[];
+    duration_ms: number;
+    error_count: number;
+    retry_count: number;
+    success: boolean;
+  }>;
+  overall_success: boolean | null;
+  total_duration_ms: number | null;
+  total_errors: number | null;
+  human_accepted: boolean | null;
+  human_modified: boolean | null;
+  human_notes: string | null;
+  file_overlap_count: number | null;
+  scope_accuracy: number | null;
+  time_balance_ratio: number | null;
+  created_at: number;
+  updated_at: number;
+}
+export interface EvalStats {
+  totalRecords: number;
+  successRate: number;
+  avgDurationMs: number;
+  byStrategy: Record<string, number>;
+}
+/**
+ * Get eval records with optional filters
+ */
+export async function getEvalRecords(
+  projectKey: string,
+  options?: { limit?: number; strategy?: string },
+  projectPath?: string,
+): Promise<EvalRecord[]> {
+  const db = await getDatabase(projectPath);
+  const conditions = ["project_key = $1"];
+  const params: (string | number)[] = [projectKey];
+  let paramIndex = 2;
+  if (options?.strategy) {
+    conditions.push(`strategy = $${paramIndex++}`);
+    params.push(options.strategy);
+  }
+  const whereClause = conditions.join(" AND ");
+  let query = `
+    SELECT id, project_key, task, context, strategy, epic_title, subtasks,
+           outcomes, overall_success, total_duration_ms, total_errors,
+           human_accepted, human_modified, human_notes,
+           file_overlap_count, scope_accuracy, time_balance_ratio,
+           created_at, updated_at
+    FROM eval_records
+    WHERE ${whereClause}
+    ORDER BY created_at DESC
+  `;
+  if (options?.limit) {
+    query += ` LIMIT $${paramIndex}`;
+    params.push(options.limit);
+  }
+  const result = await db.query<{
+    id: string;
+    project_key: string;
+    task: string;
+    context: string | null;
+    strategy: string;
+    epic_title: string;
+    subtasks: string;
+    outcomes: string | null;
+    overall_success: boolean | null;
+    total_duration_ms: number | null;
+    total_errors: number | null;
+    human_accepted: boolean | null;
+    human_modified: boolean | null;
+    human_notes: string | null;
+    file_overlap_count: number | null;
+    scope_accuracy: number | null;
+    time_balance_ratio: number | null;
+    created_at: string;
+    updated_at: string;
+  }>(query, params);
+  return result.rows.map((row) => ({
+    id: row.id,
+    project_key: row.project_key,
+    task: row.task,
+    context: row.context,
+    strategy: row.strategy,
+    epic_title: row.epic_title,
+    // PGlite returns JSONB columns as already-parsed objects
+    subtasks:
+      typeof row.subtasks === "string"
+        ? JSON.parse(row.subtasks)
+        : row.subtasks,
+    outcomes: row.outcomes
+      ? typeof row.outcomes === "string"
+        ? JSON.parse(row.outcomes)
+        : row.outcomes
+      : undefined,
+    overall_success: row.overall_success,
+    total_duration_ms: row.total_duration_ms,
+    total_errors: row.total_errors,
+    human_accepted: row.human_accepted,
+    human_modified: row.human_modified,
+    human_notes: row.human_notes,
+    file_overlap_count: row.file_overlap_count,
+    scope_accuracy: row.scope_accuracy,
+    time_balance_ratio: row.time_balance_ratio,
+    created_at: parseInt(row.created_at as string),
+    updated_at: parseInt(row.updated_at as string),
+  }));
+}
+/**
+ * Get eval statistics for a project
+ */
+export async function getEvalStats(
+  projectKey: string,
+  projectPath?: string,
+): Promise<EvalStats> {
+  const db = await getDatabase(projectPath);
+  // Get overall stats
+  const overallResult = await db.query<{
+    total_records: string;
+    success_count: string;
+    avg_duration: string;
+  }>(
+    `SELECT
+      COUNT(*) as total_records,
+      COUNT(*) FILTER (WHERE overall_success = true) as success_count,
+      AVG(total_duration_ms) as avg_duration
+    FROM eval_records
+    WHERE project_key = $1`,
+    [projectKey],
+  );
+  const totalRecords = parseInt(overallResult.rows[0]?.total_records || "0");
+  const successCount = parseInt(overallResult.rows[0]?.success_count || "0");
+  const avgDurationMs = parseFloat(overallResult.rows[0]?.avg_duration || "0");
+  // Get by-strategy breakdown
+  const strategyResult = await db.query<{
+    strategy: string;
+    count: string;
+  }>(
+    `SELECT strategy, COUNT(*) as count
+    FROM eval_records
+    WHERE project_key = $1
+    GROUP BY strategy`,
+    [projectKey],
+  );
+  const byStrategy: Record<string, number> = {};
+  for (const row of strategyResult.rows) {
+    byStrategy[row.strategy] = parseInt(row.count);
+  }
+  return {
+    totalRecords,
+    successRate: totalRecords > 0 ? successCount / totalRecords : 0,
+    avgDurationMs,
+    byStrategy,
+  };
+}

package/src/streams/store.ts CHANGED Viewed

@@ -531,6 +531,28 @@ async function updateMaterializedViews(
       case "task_blocked":
         // No-op for now - could add task tracking table later
         break;
+      // Eval capture events - update eval_records projection
+      case "decomposition_generated":
+        await handleDecompositionGenerated(db, event);
+        break;
+      case "subtask_outcome":
+        await handleSubtaskOutcome(db, event);
+        break;
+      case "human_feedback":
+        await handleHumanFeedback(db, event);
+        break;
+      // Swarm checkpoint events - update swarm_contexts table
+      case "swarm_checkpointed":
+        await handleSwarmCheckpointed(db, event);
+        break;
+      case "swarm_recovered":
+        await handleSwarmRecovered(db, event);
+        break;
     }
   } catch (error) {
     console.error("[SwarmMail] Failed to update materialized views", {
@@ -707,6 +729,259 @@ async function handleFileReleased(
   }
 }
+async function handleDecompositionGenerated(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "decomposition_generated") return;
+  await db.query(
+    `INSERT INTO eval_records (
+      id, project_key, task, context, strategy, epic_title, subtasks,
+      created_at, updated_at
+    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $8)
+    ON CONFLICT (id) DO NOTHING`,
+    [
+      event.epic_id,
+      event.project_key,
+      event.task,
+      event.context || null,
+      event.strategy,
+      event.epic_title,
+      JSON.stringify(event.subtasks),
+      event.timestamp,
+    ],
+  );
+}
+async function handleSubtaskOutcome(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "subtask_outcome") return;
+  // Fetch current record to compute metrics
+  const result = await db.query<{
+    outcomes: string | null;
+    subtasks: string;
+  }>(`SELECT outcomes, subtasks FROM eval_records WHERE id = $1`, [
+    event.epic_id,
+  ]);
+  if (!result.rows[0]) {
+    console.warn(
+      `[SwarmMail] No eval_record found for epic_id ${event.epic_id}`,
+    );
+    return;
+  }
+  const row = result.rows[0];
+  // PGlite returns JSONB columns as already-parsed objects
+  const subtasks = (
+    typeof row.subtasks === "string" ? JSON.parse(row.subtasks) : row.subtasks
+  ) as Array<{
+    title: string;
+    files: string[];
+  }>;
+  const outcomes = row.outcomes
+    ? ((typeof row.outcomes === "string"
+        ? JSON.parse(row.outcomes)
+        : row.outcomes) as Array<{
+        bead_id: string;
+        planned_files: string[];
+        actual_files: string[];
+        duration_ms: number;
+        error_count: number;
+        retry_count: number;
+        success: boolean;
+      }>)
+    : [];
+  // Create new outcome
+  const newOutcome = {
+    bead_id: event.bead_id,
+    planned_files: event.planned_files,
+    actual_files: event.actual_files,
+    duration_ms: event.duration_ms,
+    error_count: event.error_count,
+    retry_count: event.retry_count,
+    success: event.success,
+  };
+  // Append to outcomes array
+  const updatedOutcomes = [...outcomes, newOutcome];
+  // Compute metrics
+  const fileOverlapCount = computeFileOverlap(subtasks);
+  const scopeAccuracy = computeScopeAccuracy(
+    event.planned_files,
+    event.actual_files,
+  );
+  const timeBalanceRatio = computeTimeBalanceRatio(updatedOutcomes);
+  const overallSuccess = updatedOutcomes.every((o) => o.success);
+  const totalDurationMs = updatedOutcomes.reduce(
+    (sum, o) => sum + o.duration_ms,
+    0,
+  );
+  const totalErrors = updatedOutcomes.reduce(
+    (sum, o) => sum + o.error_count,
+    0,
+  );
+  // Update record
+  await db.query(
+    `UPDATE eval_records SET
+      outcomes = $1,
+      file_overlap_count = $2,
+      scope_accuracy = $3,
+      time_balance_ratio = $4,
+      overall_success = $5,
+      total_duration_ms = $6,
+      total_errors = $7,
+      updated_at = $8
+    WHERE id = $9`,
+    [
+      JSON.stringify(updatedOutcomes),
+      fileOverlapCount,
+      scopeAccuracy,
+      timeBalanceRatio,
+      overallSuccess,
+      totalDurationMs,
+      totalErrors,
+      event.timestamp,
+      event.epic_id,
+    ],
+  );
+}
+async function handleHumanFeedback(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "human_feedback") return;
+  await db.query(
+    `UPDATE eval_records SET
+      human_accepted = $1,
+      human_modified = $2,
+      human_notes = $3,
+      updated_at = $4
+    WHERE id = $5`,
+    [
+      event.accepted,
+      event.modified,
+      event.notes || null,
+      event.timestamp,
+      event.epic_id,
+    ],
+  );
+}
+async function handleSwarmCheckpointed(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "swarm_checkpointed") return;
+  await db.query(
+    `INSERT INTO swarm_contexts (
+      project_key, epic_id, bead_id, strategy, files, dependencies,
+      directives, recovery, checkpointed_at, updated_at
+    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $9)
+    ON CONFLICT (project_key, epic_id, bead_id) DO UPDATE SET
+      strategy = EXCLUDED.strategy,
+      files = EXCLUDED.files,
+      dependencies = EXCLUDED.dependencies,
+      directives = EXCLUDED.directives,
+      recovery = EXCLUDED.recovery,
+      checkpointed_at = EXCLUDED.checkpointed_at,
+      updated_at = EXCLUDED.updated_at`,
+    [
+      event.project_key,
+      event.epic_id,
+      event.bead_id,
+      event.strategy,
+      JSON.stringify(event.files),
+      JSON.stringify(event.dependencies),
+      JSON.stringify(event.directives),
+      JSON.stringify(event.recovery),
+      event.timestamp,
+    ],
+  );
+}
+async function handleSwarmRecovered(
+  db: Awaited<ReturnType<typeof getDatabase>>,
+  event: AgentEvent & { id: number; sequence: number },
+): Promise<void> {
+  if (event.type !== "swarm_recovered") return;
+  // Update swarm_contexts to mark as recovered
+  await db.query(
+    `UPDATE swarm_contexts SET
+      recovered_at = $1,
+      recovered_from_checkpoint = $2,
+      updated_at = $1
+    WHERE project_key = $3 AND epic_id = $4 AND bead_id = $5`,
+    [
+      event.timestamp,
+      event.recovered_from_checkpoint,
+      event.project_key,
+      event.epic_id,
+      event.bead_id,
+    ],
+  );
+}
+// ============================================================================
+// Metric Computation Helpers
+// ============================================================================
+/**
+ * Count files that appear in multiple subtasks
+ */
+function computeFileOverlap(subtasks: Array<{ files: string[] }>): number {
+  const fileCount = new Map<string, number>();
+  for (const subtask of subtasks) {
+    for (const file of subtask.files) {
+      fileCount.set(file, (fileCount.get(file) || 0) + 1);
+    }
+  }
+  return Array.from(fileCount.values()).filter((count) => count > 1).length;
+}
+/**
+ * Compute scope accuracy: intersection(actual, planned) / planned.length
+ */
+function computeScopeAccuracy(planned: string[], actual: string[]): number {
+  if (planned.length === 0) return 1.0;
+  const plannedSet = new Set(planned);
+  const intersection = actual.filter((file) => plannedSet.has(file));
+  return intersection.length / planned.length;
+}
+/**
+ * Compute time balance ratio: max(duration) / min(duration)
+ * Lower is better (more balanced)
+ */
+function computeTimeBalanceRatio(
+  outcomes: Array<{ duration_ms: number }>,
+): number | null {
+  if (outcomes.length === 0) return null;
+  const durations = outcomes.map((o) => o.duration_ms);
+  const max = Math.max(...durations);
+  const min = Math.min(...durations);
+  if (min === 0) return null;
+  return max / min;
+}
 // ============================================================================
 // Convenience Functions
 // ============================================================================