npm - opencode-swarm-plugin - Versions diffs - 0.36.0 → 0.37.0 - Mend

opencode-swarm-plugin 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.hive/issues.jsonl +16 -4
package/.hive/memories.jsonl +274 -1
package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +318 -318
package/CHANGELOG.md +113 -0
package/bin/swarm.test.ts +106 -0
package/bin/swarm.ts +413 -179
package/dist/compaction-hook.d.ts +54 -4
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/eval-capture.d.ts +122 -17
package/dist/eval-capture.d.ts.map +1 -1
package/dist/index.d.ts +1 -7
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1278 -619
package/dist/planning-guardrails.d.ts +121 -0
package/dist/planning-guardrails.d.ts.map +1 -1
package/dist/plugin.d.ts +9 -9
package/dist/plugin.d.ts.map +1 -1
package/dist/plugin.js +1283 -329
package/dist/schemas/task.d.ts +0 -1
package/dist/schemas/task.d.ts.map +1 -1
package/dist/swarm-decompose.d.ts +0 -8
package/dist/swarm-decompose.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +0 -4
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm-review.d.ts.map +1 -1
package/dist/swarm.d.ts +0 -6
package/dist/swarm.d.ts.map +1 -1
package/evals/README.md +38 -0
package/evals/coordinator-session.eval.ts +154 -0
package/evals/fixtures/coordinator-sessions.ts +328 -0
package/evals/lib/data-loader.ts +69 -0
package/evals/scorers/coordinator-discipline.evalite-test.ts +536 -0
package/evals/scorers/coordinator-discipline.ts +315 -0
package/evals/scorers/index.ts +12 -0
package/examples/plugin-wrapper-template.ts +303 -4
package/package.json +2 -2
package/src/compaction-hook.test.ts +8 -1
package/src/compaction-hook.ts +31 -21
package/src/eval-capture.test.ts +390 -0
package/src/eval-capture.ts +163 -4
package/src/hive.integration.test.ts +148 -0
package/src/hive.ts +89 -0
package/src/index.ts +68 -1
package/src/planning-guardrails.test.ts +387 -2
package/src/planning-guardrails.ts +289 -0
package/src/plugin.ts +10 -10
package/src/swarm-decompose.test.ts +195 -0
package/src/swarm-decompose.ts +72 -1
package/src/swarm-orchestrate.ts +44 -0
package/src/swarm-prompts.ts +20 -0
package/src/swarm-review.integration.test.ts +24 -29
package/src/swarm-review.ts +41 -0

package/evals/scorers/coordinator-discipline.evalite-test.ts ADDED Viewed

@@ -0,0 +1,536 @@
+/**
+ * Tests for coordinator-discipline scorers
+ */
+import { describe, expect, it } from "bun:test";
+import type { CoordinatorSession } from "../../src/eval-capture.js";
+import {
+	overallDiscipline,
+	reviewThoroughness,
+	spawnEfficiency,
+	timeToFirstSpawn,
+	violationCount,
+} from "./coordinator-discipline.js";
+describe("violationCount", () => {
+	it("scores 1.0 for zero violations", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			end_time: "2025-01-01T01:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "strategy_selected",
+					payload: { strategy: "file-based" },
+				},
+			],
+		};
+		const result = violationCount.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("0 violations");
+	});
+	it("decreases score by 0.2 per violation", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			end_time: "2025-01-01T01:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "VIOLATION",
+					violation_type: "coordinator_edited_file",
+					payload: { file: "test.ts" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "VIOLATION",
+					violation_type: "coordinator_ran_tests",
+					payload: { command: "bun test" },
+				},
+			],
+		};
+		const result = violationCount.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0.6); // 1.0 - 0.2 * 2
+		expect(result.message).toContain("2 violations");
+	});
+	it("floors score at 0.0 for many violations", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: Array.from({ length: 10 }, (_, i) => ({
+				session_id: "test-session",
+				epic_id: "test-epic",
+				timestamp: `2025-01-01T00:00:${String(i).padStart(2, "0")}Z`,
+				event_type: "VIOLATION" as const,
+				violation_type: "coordinator_edited_file" as const,
+				payload: { file: `test${i}.ts` },
+			})),
+		};
+		const result = violationCount.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0.0);
+		expect(result.message).toContain("10 violations");
+	});
+});
+describe("spawnEfficiency", () => {
+	it("scores 1.0 when all subtasks have workers spawned", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "decomposition_complete",
+					payload: { subtask_count: 3 },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:30Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-3" },
+				},
+			],
+		};
+		const result = spawnEfficiency.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("3/3");
+	});
+	it("scores less than 1.0 when some workers not spawned", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "decomposition_complete",
+					payload: { subtask_count: 4 },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+			],
+		};
+		const result = spawnEfficiency.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0.5); // 2/4
+		expect(result.message).toContain("2/4");
+	});
+	it("returns 0 when no decomposition event found", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+			],
+		};
+		const result = spawnEfficiency.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0);
+		expect(result.message).toContain("No decomposition");
+	});
+});
+describe("reviewThoroughness", () => {
+	it("scores 1.0 when all workers have reviews", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:30Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-2" },
+				},
+			],
+		};
+		const result = reviewThoroughness.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("2/2");
+	});
+	it("scores less than 1.0 when some workers missing reviews", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-1" },
+				},
+			],
+		};
+		const result = reviewThoroughness.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0.5); // 1/2
+		expect(result.message).toContain("1/2");
+	});
+	it("returns 1.0 when no workers finished", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "strategy_selected",
+					payload: { strategy: "file-based" },
+				},
+			],
+		};
+		const result = reviewThoroughness.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(1.0);
+		expect(result.message).toContain("No finished workers");
+	});
+});
+describe("timeToFirstSpawn", () => {
+	it("normalizes time to 0-1 range (faster is better)", () => {
+		// 30 seconds to first spawn
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "decomposition_complete",
+					payload: { subtask_count: 3 },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:30Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+			],
+		};
+		const result = timeToFirstSpawn.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		// 30s should score around 0.95 (fast spawn)
+		expect(result.score).toBeGreaterThan(0.9);
+		expect(result.message).toContain("30000ms");
+	});
+	it("returns 0 when no worker spawned", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "decomposition_complete",
+					payload: { subtask_count: 3 },
+				},
+			],
+		};
+		const result = timeToFirstSpawn.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0);
+		expect(result.message).toContain("No worker spawned");
+	});
+	it("returns 0 when no decomposition event", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+			],
+		};
+		const result = timeToFirstSpawn.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.score).toBe(0);
+		expect(result.message).toContain("No decomposition");
+	});
+});
+describe("overallDiscipline", () => {
+	it("computes weighted composite score", () => {
+		// Perfect session
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "decomposition_complete",
+					payload: { subtask_count: 2 },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:10Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:20Z",
+					event_type: "DECISION",
+					decision_type: "worker_spawned",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:00Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:10Z",
+					event_type: "OUTCOME",
+					outcome_type: "subtask_success",
+					payload: { bead_id: "bd-2" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:20Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-1" },
+				},
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:10:30Z",
+					event_type: "DECISION",
+					decision_type: "review_completed",
+					payload: { bead_id: "bd-2" },
+				},
+			],
+		};
+		const result = overallDiscipline.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		// Perfect session should score very high (close to 1.0)
+		expect(result.score).toBeGreaterThan(0.95);
+		expect(result.message).toContain("Overall");
+	});
+	it("includes breakdown in message", () => {
+		const session: CoordinatorSession = {
+			session_id: "test-session",
+			epic_id: "test-epic",
+			start_time: "2025-01-01T00:00:00Z",
+			events: [
+				{
+					session_id: "test-session",
+					epic_id: "test-epic",
+					timestamp: "2025-01-01T00:00:00Z",
+					event_type: "DECISION",
+					decision_type: "strategy_selected",
+					payload: { strategy: "file-based" },
+				},
+			],
+		};
+		const result = overallDiscipline.scorer({
+			output: JSON.stringify(session),
+			expected: {},
+		});
+		expect(result.message).toContain("Violations:");
+		expect(result.message).toContain("Spawn:");
+		expect(result.message).toContain("Review:");
+		expect(result.message).toContain("Speed:");
+	});
+});