npm - opencode-swarm-plugin - Versions diffs - 0.35.0 → 0.36.1 - Mend

opencode-swarm-plugin 0.35.0 → 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/.hive/issues.jsonl +4 -4
package/.hive/memories.jsonl +274 -1
package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +307 -307
package/CHANGELOG.md +133 -0
package/bin/swarm.ts +234 -179
package/dist/compaction-hook.d.ts +54 -4
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/eval-capture.d.ts +122 -17
package/dist/eval-capture.d.ts.map +1 -1
package/dist/index.d.ts +1 -7
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1278 -619
package/dist/planning-guardrails.d.ts +121 -0
package/dist/planning-guardrails.d.ts.map +1 -1
package/dist/plugin.d.ts +9 -9
package/dist/plugin.d.ts.map +1 -1
package/dist/plugin.js +1283 -329
package/dist/schemas/task.d.ts +0 -1
package/dist/schemas/task.d.ts.map +1 -1
package/dist/swarm-decompose.d.ts +0 -8
package/dist/swarm-decompose.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +0 -4
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/dist/swarm.d.ts +0 -6
package/dist/swarm.d.ts.map +1 -1
package/evals/README.md +38 -0
package/evals/coordinator-session.eval.ts +154 -0
package/evals/fixtures/coordinator-sessions.ts +328 -0
package/evals/lib/data-loader.ts +69 -0
package/evals/scorers/coordinator-discipline.evalite-test.ts +536 -0
package/evals/scorers/coordinator-discipline.ts +315 -0
package/evals/scorers/index.ts +12 -0
package/examples/plugin-wrapper-template.ts +747 -34
package/package.json +2 -2
package/src/compaction-hook.test.ts +234 -281
package/src/compaction-hook.ts +221 -63
package/src/eval-capture.test.ts +390 -0
package/src/eval-capture.ts +168 -10
package/src/index.ts +89 -2
package/src/learning.integration.test.ts +0 -2
package/src/planning-guardrails.test.ts +387 -2
package/src/planning-guardrails.ts +289 -0
package/src/plugin.ts +10 -10
package/src/schemas/task.ts +0 -1
package/src/swarm-decompose.ts +21 -8
package/src/swarm-orchestrate.ts +44 -0
package/src/swarm-prompts.ts +20 -0
package/src/swarm-review.ts +41 -0
package/src/swarm.integration.test.ts +0 -40

package/evals/fixtures/coordinator-sessions.ts ADDED Viewed

@@ -0,0 +1,328 @@
+/**
+ * Coordinator Session Test Fixtures
+ *
+ * Synthetic coordinator sessions for testing coordinator-discipline scorers.
+ * Each fixture demonstrates good or bad coordinator behavior.
+ */
+import type { CoordinatorSession } from "../../src/eval-capture.js";
+/**
+ * PERFECT COORDINATOR
+ *
+ * - No violations (no direct edits, tests, or reservations)
+ * - 100% spawn efficiency (3/3 workers spawned)
+ * - 100% review thoroughness (all workers reviewed)
+ * - Fast time to first spawn (30s)
+ */
+export const perfectCoordinator: CoordinatorSession = {
+  session_id: "test-session-perfect",
+  epic_id: "test-epic-perfect",
+  start_time: "2025-01-01T10:00:00.000Z",
+  end_time: "2025-01-01T10:30:00.000Z",
+  events: [
+    // 1. Decomposition complete
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:00:00.000Z",
+      event_type: "DECISION",
+      decision_type: "decomposition_complete",
+      payload: { subtask_count: 3 },
+    },
+    // 2. First spawn (30s after decomp)
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:00:30.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "BlueLake", bead_id: "test-epic-perfect.1" },
+    },
+    // 3. Second spawn
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:01:00.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "GreenMountain", bead_id: "test-epic-perfect.2" },
+    },
+    // 4. Third spawn
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:01:30.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "RedForest", bead_id: "test-epic-perfect.3" },
+    },
+    // 5. First worker completes
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:10:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-perfect.1", worker: "BlueLake" },
+    },
+    // 6. First review
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:11:00.000Z",
+      event_type: "DECISION",
+      decision_type: "review_completed",
+      payload: {
+        bead_id: "test-epic-perfect.1",
+        approved: true,
+        issues: [],
+      },
+    },
+    // 7. Second worker completes
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:15:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-perfect.2", worker: "GreenMountain" },
+    },
+    // 8. Second review
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:16:00.000Z",
+      event_type: "DECISION",
+      decision_type: "review_completed",
+      payload: {
+        bead_id: "test-epic-perfect.2",
+        approved: true,
+        issues: [],
+      },
+    },
+    // 9. Third worker completes
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:20:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-perfect.3", worker: "RedForest" },
+    },
+    // 10. Third review
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:21:00.000Z",
+      event_type: "DECISION",
+      decision_type: "review_completed",
+      payload: {
+        bead_id: "test-epic-perfect.3",
+        approved: true,
+        issues: [],
+      },
+    },
+    // 11. Epic complete
+    {
+      session_id: "test-session-perfect",
+      epic_id: "test-epic-perfect",
+      timestamp: "2025-01-01T10:30:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "epic_complete",
+      payload: { epic_id: "test-epic-perfect", total_subtasks: 3 },
+    },
+  ],
+};
+/**
+ * BAD COORDINATOR - Multiple Violations
+ *
+ * - 3 violations (edited file, ran tests, reserved files)
+ * - 33% spawn efficiency (only 1/3 workers spawned)
+ * - 0% review thoroughness (no reviews)
+ * - Slow time to first spawn (10 minutes)
+ */
+export const badCoordinator: CoordinatorSession = {
+  session_id: "test-session-bad",
+  epic_id: "test-epic-bad",
+  start_time: "2025-01-01T10:00:00.000Z",
+  end_time: "2025-01-01T11:00:00.000Z",
+  events: [
+    // 1. Decomposition complete
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:00:00.000Z",
+      event_type: "DECISION",
+      decision_type: "decomposition_complete",
+      payload: { subtask_count: 3 },
+    },
+    // 2. VIOLATION: Coordinator edited file directly
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:01:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "coordinator_edited_file",
+      payload: { file: "src/auth.ts", reason: "should spawn worker instead" },
+    },
+    // 3. VIOLATION: Coordinator ran tests
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:02:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "coordinator_ran_tests",
+      payload: { command: "bun test", reason: "workers do verification" },
+    },
+    // 4. VIOLATION: Coordinator reserved files
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:03:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "coordinator_reserved_files",
+      payload: { paths: ["src/**"], reason: "only workers reserve" },
+    },
+    // 5. First spawn (10 minutes after decomp - way too slow)
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:10:00.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "BlueLake", bead_id: "test-epic-bad.1" },
+    },
+    // 6. Worker completes (but no review!)
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:20:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-bad.1", worker: "BlueLake" },
+    },
+    // 7. VIOLATION: No worker spawned for subtask 2
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:30:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "no_worker_spawned",
+      payload: { bead_id: "test-epic-bad.2", reason: "coordinator did work directly" },
+    },
+    // 8. VIOLATION: No worker spawned for subtask 3
+    {
+      session_id: "test-session-bad",
+      epic_id: "test-epic-bad",
+      timestamp: "2025-01-01T10:40:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "no_worker_spawned",
+      payload: { bead_id: "test-epic-bad.3", reason: "coordinator did work directly" },
+    },
+  ],
+};
+/**
+ * DECENT COORDINATOR - Some Issues
+ *
+ * - 1 violation (ran tests once)
+ * - 100% spawn efficiency (2/2 workers spawned)
+ * - 50% review thoroughness (reviewed only 1/2)
+ * - Good time to first spawn (45s)
+ */
+export const decentCoordinator: CoordinatorSession = {
+  session_id: "test-session-decent",
+  epic_id: "test-epic-decent",
+  start_time: "2025-01-01T10:00:00.000Z",
+  end_time: "2025-01-01T10:25:00.000Z",
+  events: [
+    // 1. Decomposition complete
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:00:00.000Z",
+      event_type: "DECISION",
+      decision_type: "decomposition_complete",
+      payload: { subtask_count: 2 },
+    },
+    // 2. First spawn (45s - acceptable)
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:00:45.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "BlueLake", bead_id: "test-epic-decent.1" },
+    },
+    // 3. Second spawn
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:01:00.000Z",
+      event_type: "DECISION",
+      decision_type: "worker_spawned",
+      payload: { worker: "GreenMountain", bead_id: "test-epic-decent.2" },
+    },
+    // 4. First worker completes
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:10:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-decent.1", worker: "BlueLake" },
+    },
+    // 5. First review
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:11:00.000Z",
+      event_type: "DECISION",
+      decision_type: "review_completed",
+      payload: {
+        bead_id: "test-epic-decent.1",
+        approved: true,
+        issues: [],
+      },
+    },
+    // 6. VIOLATION: Ran tests (one slip-up)
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:15:00.000Z",
+      event_type: "VIOLATION",
+      violation_type: "coordinator_ran_tests",
+      payload: { command: "bun test", reason: "should let worker verify" },
+    },
+    // 7. Second worker completes
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:20:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "subtask_success",
+      payload: { bead_id: "test-epic-decent.2", worker: "GreenMountain" },
+    },
+    // 8. No review for second worker (50% review rate)
+    // 9. Epic complete
+    {
+      session_id: "test-session-decent",
+      epic_id: "test-epic-decent",
+      timestamp: "2025-01-01T10:25:00.000Z",
+      event_type: "OUTCOME",
+      outcome_type: "epic_complete",
+      payload: { epic_id: "test-epic-decent", total_subtasks: 2 },
+    },
+  ],
+};
+/**
+ * All test fixtures
+ */
+export const coordinatorSessionFixtures = [
+  perfectCoordinator,
+  badCoordinator,
+  decentCoordinator,
+];

package/evals/lib/data-loader.ts CHANGED Viewed

@@ -4,6 +4,7 @@
  * Loads real decomposition outcomes from the eval_records table
  * for use in Evalite evals.
  */
+import * as fs from "node:fs";
 import {
   getEvalRecords,
   getEvalStats,
@@ -109,3 +110,71 @@ export async function getEvalDataSummary(
     hasEnoughData: stats.totalRecords >= 5,
   };
 }
+/**
+ * Load captured coordinator sessions from ~/.config/swarm-tools/sessions/
+ *
+ * Reads all JSONL session files and returns CoordinatorSession objects.
+ *
+ * @param options - Filter options
+ * @returns Array of coordinator sessions
+ */
+export async function loadCapturedSessions(options?: {
+  sessionIds?: string[];
+  limit?: number;
+}): Promise<
+  Array<{ session: import("../../src/eval-capture.js").CoordinatorSession }>
+> {
+  const { getSessionDir, readSessionEvents, saveSession } = await import(
+    "../../src/eval-capture.js"
+  );
+  const sessionDir = getSessionDir();
+  // If session dir doesn't exist, return empty
+  if (!fs.existsSync(sessionDir)) {
+    return [];
+  }
+  // Read all .jsonl files in session directory
+  const files = fs
+    .readdirSync(sessionDir)
+    .filter((f) => f.endsWith(".jsonl"));
+  // Filter by sessionIds if provided
+  const targetFiles = options?.sessionIds
+    ? files.filter((f) => options.sessionIds?.includes(f.replace(".jsonl", "")))
+    : files;
+  // Load each session
+  const sessions: Array<{
+    session: import("../../src/eval-capture.js").CoordinatorSession;
+  }> = [];
+  for (const file of targetFiles) {
+    const sessionId = file.replace(".jsonl", "");
+    try {
+      const events = readSessionEvents(sessionId);
+      if (events.length === 0) continue;
+      // Find epic_id from first event
+      const epicId = events[0]?.epic_id;
+      if (!epicId) continue;
+      const session = saveSession({ session_id: sessionId, epic_id: epicId });
+      if (session) {
+        sessions.push({ session });
+      }
+    } catch (error) {
+      // Skip invalid sessions
+      console.warn(`Failed to load session ${sessionId}:`, error);
+    }
+    // Apply limit if specified
+    if (options?.limit && sessions.length >= options.limit) {
+      break;
+    }
+  }
+  return sessions;
+}