npm - opencode-swarm-plugin - Versions diffs - 0.37.0 → 0.39.1 - Mend

opencode-swarm-plugin 0.37.0 → 0.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/.env +2 -0
package/.hive/eval-results.json +26 -0
package/.hive/issues.jsonl +20 -5
package/.hive/memories.jsonl +35 -1
package/.opencode/eval-history.jsonl +12 -0
package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +319 -319
package/CHANGELOG.md +258 -0
package/README.md +50 -0
package/bin/swarm.test.ts +475 -0
package/bin/swarm.ts +385 -208
package/dist/compaction-hook.d.ts +1 -1
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/compaction-prompt-scoring.d.ts +124 -0
package/dist/compaction-prompt-scoring.d.ts.map +1 -0
package/dist/eval-capture.d.ts +81 -1
package/dist/eval-capture.d.ts.map +1 -1
package/dist/eval-gates.d.ts +84 -0
package/dist/eval-gates.d.ts.map +1 -0
package/dist/eval-history.d.ts +117 -0
package/dist/eval-history.d.ts.map +1 -0
package/dist/eval-learning.d.ts +216 -0
package/dist/eval-learning.d.ts.map +1 -0
package/dist/hive.d.ts +59 -0
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +87 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +823 -131
package/dist/plugin.js +655 -131
package/dist/post-compaction-tracker.d.ts +133 -0
package/dist/post-compaction-tracker.d.ts.map +1 -0
package/dist/swarm-decompose.d.ts +30 -0
package/dist/swarm-decompose.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts +23 -0
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +25 -1
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm.d.ts +19 -0
package/dist/swarm.d.ts.map +1 -1
package/evals/README.md +595 -94
package/evals/compaction-prompt.eval.ts +149 -0
package/evals/coordinator-behavior.eval.ts +8 -8
package/evals/fixtures/compaction-prompt-cases.ts +305 -0
package/evals/lib/compaction-loader.test.ts +248 -0
package/evals/lib/compaction-loader.ts +320 -0
package/evals/lib/data-loader.test.ts +345 -0
package/evals/lib/data-loader.ts +107 -6
package/evals/scorers/compaction-prompt-scorers.ts +145 -0
package/evals/scorers/compaction-scorers.ts +13 -13
package/evals/scorers/coordinator-discipline.evalite-test.ts +3 -2
package/evals/scorers/coordinator-discipline.ts +13 -13
package/examples/plugin-wrapper-template.ts +177 -8
package/package.json +7 -2
package/scripts/migrate-unknown-sessions.ts +349 -0
package/src/compaction-capture.integration.test.ts +257 -0
package/src/compaction-hook.test.ts +139 -2
package/src/compaction-hook.ts +113 -2
package/src/compaction-prompt-scorers.test.ts +299 -0
package/src/compaction-prompt-scoring.ts +298 -0
package/src/eval-capture.test.ts +422 -0
package/src/eval-capture.ts +94 -2
package/src/eval-gates.test.ts +306 -0
package/src/eval-gates.ts +218 -0
package/src/eval-history.test.ts +508 -0
package/src/eval-history.ts +214 -0
package/src/eval-learning.test.ts +378 -0
package/src/eval-learning.ts +360 -0
package/src/index.ts +61 -1
package/src/post-compaction-tracker.test.ts +251 -0
package/src/post-compaction-tracker.ts +237 -0
package/src/swarm-decompose.test.ts +40 -47
package/src/swarm-decompose.ts +2 -2
package/src/swarm-orchestrate.test.ts +270 -7
package/src/swarm-orchestrate.ts +100 -13
package/src/swarm-prompts.test.ts +121 -0
package/src/swarm-prompts.ts +297 -4
package/src/swarm-research.integration.test.ts +157 -0
package/src/swarm-review.ts +3 -3
/package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0

package/evals/compaction-prompt.eval.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Compaction Prompt Quality Evaluation
+ *
+ * Tests that continuation prompts generated after context compaction meet
+ * quality criteria for coordinator resumption:
+ *
+ * 1. Epic ID Specificity (20%) - Real IDs not placeholders
+ * 2. Actionability (20%) - Specific tool calls with real values
+ * 3. Coordinator Identity (25%) - ASCII header + strong mandates
+ * 4. Forbidden Tools (15%) - Lists forbidden tools by name
+ * 5. Post-Compaction Discipline (20%) - First tool is correct
+ *
+ * ## Why This Matters
+ *
+ * After compaction, coordinators lose context. The continuation prompt is
+ * their ONLY guide to resume. Bad prompts cause:
+ * - Coordinators editing files (should delegate to workers)
+ * - Generic "check status" instead of actual tool calls
+ * - Lost epic IDs (can't resume coordination)
+ *
+ * ## Test Strategy
+ *
+ * - 6 synthetic fixtures covering perfect/bad prompts
+ * - Each fixture tests specific failure modes
+ * - Composite scorer validates overall quality
+ *
+ * Run with: bun run eval:compaction
+ */
+import { evalite } from "evalite";
+import { compactionPromptCases } from "./fixtures/compaction-prompt-cases.js";
+import {
+	actionability,
+	coordinatorIdentity,
+	epicIdSpecificity,
+	forbiddenToolsPresent,
+	postCompactionDiscipline,
+} from "./scorers/compaction-prompt-scorers.js";
+/**
+ * Main eval: Compaction Prompt Quality
+ *
+ * Tests all cases from fixtures/compaction-prompt-cases.ts
+ */
+evalite("Compaction Prompt Quality", {
+	data: async () =>
+		compactionPromptCases.map((testCase) => ({
+			input: testCase.prompt,
+			expected: testCase.expected,
+		})),
+	task: async (input) => {
+		// Identity task - fixture already has the prompt
+		// In real usage, this would call the LLM to generate the prompt
+		return JSON.stringify(input);
+	},
+	scorers: [
+		epicIdSpecificity,
+		actionability,
+		coordinatorIdentity,
+		forbiddenToolsPresent,
+		postCompactionDiscipline,
+	],
+});
+/**
+ * Perfect Prompt Verification
+ *
+ * Ensures our "perfect" fixture actually scores 100%
+ */
+evalite("Perfect Prompt Scores 100%", {
+	data: async () => [
+		{
+			input: compactionPromptCases[0].prompt, // First case is "perfect"
+			expected: {
+				hasRealEpicId: true,
+				isActionable: true,
+				hasCoordinatorIdentity: true,
+				listsForbiddenTools: true,
+				hasCorrectFirstTool: true,
+			},
+		},
+	],
+	task: async (input) => JSON.stringify(input),
+	scorers: [
+		epicIdSpecificity,
+		actionability,
+		coordinatorIdentity,
+		forbiddenToolsPresent,
+		postCompactionDiscipline,
+	],
+});
+/**
+ * Placeholder Detection
+ *
+ * Ensures we catch common placeholder patterns
+ */
+evalite("Placeholder Detection", {
+	data: async () => [
+		{
+			input: compactionPromptCases[1].prompt, // Placeholder case
+			expected: { hasRealEpicId: false },
+		},
+	],
+	task: async (input) => JSON.stringify(input),
+	scorers: [epicIdSpecificity],
+});
+/**
+ * Generic Instructions Detection
+ *
+ * Ensures we fail prompts with vague language instead of tool calls
+ */
+evalite("Generic Instructions Fail", {
+	data: async () => [
+		{
+			input: compactionPromptCases[2].prompt, // Generic case
+			expected: { isActionable: false },
+		},
+	],
+	task: async (input) => JSON.stringify(input),
+	scorers: [actionability],
+});
+/**
+ * First Tool Discipline
+ *
+ * Ensures first suggested tool is correct (swarm_status/inbox, not edit)
+ */
+evalite("First Tool Discipline", {
+	data: async () => [
+		{
+			input: compactionPromptCases[5].prompt, // Wrong first tool
+			expected: { hasCorrectFirstTool: false },
+		},
+	],
+	task: async (input) => JSON.stringify(input),
+	scorers: [postCompactionDiscipline],
+});

package/evals/coordinator-behavior.eval.ts CHANGED Viewed

@@ -187,20 +187,20 @@ export const coordinatorMindset = createScorer({
 export const overallCoordinatorBehavior = createScorer({
   name: "Overall Coordinator Behavior",
   description: "Composite score: does the LLM behave like a coordinator?",
-  scorer: ({ output }) => {
-    const toolsResult = mentionsCoordinatorTools.scorer({ output, expected: undefined });
-    const avoidsResult = avoidsWorkerBehaviors.scorer({ output, expected: undefined });
-    const mindsetResult = coordinatorMindset.scorer({ output, expected: undefined });
+  scorer: async ({ output, expected, input }) => {
+    const toolsResult = await mentionsCoordinatorTools({ output, expected, input });
+    const avoidsResult = await avoidsWorkerBehaviors({ output, expected, input });
+    const mindsetResult = await coordinatorMindset({ output, expected, input });
     // Weighted average: avoiding worker behavior is most important
     const score =
-      toolsResult.score * 0.3 +
-      avoidsResult.score * 0.4 +
-      mindsetResult.score * 0.3;
+      (toolsResult.score ?? 0) * 0.3 +
+      (avoidsResult.score ?? 0) * 0.4 +
+      (mindsetResult.score ?? 0) * 0.3;
     return {
       score,
-      message: `Tools: ${(toolsResult.score * 100).toFixed(0)}%, Avoids Worker: ${(avoidsResult.score * 100).toFixed(0)}%, Mindset: ${(mindsetResult.score * 100).toFixed(0)}%`,
+      message: `Tools: ${((toolsResult.score ?? 0) * 100).toFixed(0)}%, Avoids Worker: ${((avoidsResult.score ?? 0) * 100).toFixed(0)}%, Mindset: ${((mindsetResult.score ?? 0) * 100).toFixed(0)}%`,
     };
   },
 });

package/evals/fixtures/compaction-prompt-cases.ts ADDED Viewed

@@ -0,0 +1,305 @@
+/**
+ * Test cases for compaction prompt quality evaluation
+ *
+ * Each case represents a continuation prompt that should be generated
+ * after context compaction. Tests validate that prompts have:
+ * - Real epic IDs (not placeholders)
+ * - Actionable tool calls with specific values
+ * - Strong coordinator identity
+ * - Explicit forbidden tools list
+ * - Correct first tool suggestion
+ */
+import type { CompactionPrompt } from "../../src/compaction-prompt-scoring.js";
+/**
+ * Compaction prompt test case structure
+ */
+export interface CompactionPromptTestCase {
+	name: string;
+	description: string;
+	/**
+	 * The generated continuation prompt
+	 */
+	prompt: CompactionPrompt;
+	/**
+	 * Expected scoring outcomes
+	 */
+	expected: {
+		/**
+		 * Should have real epic IDs (not placeholders)
+		 */
+		hasRealEpicId: boolean;
+		/**
+		 * Should have actionable tool calls
+		 */
+		isActionable: boolean;
+		/**
+		 * Should have strong coordinator identity
+		 */
+		hasCoordinatorIdentity: boolean;
+		/**
+		 * Should list forbidden tools by name
+		 */
+		listsForbiddenTools: boolean;
+		/**
+		 * First suggested tool should be correct
+		 */
+		hasCorrectFirstTool: boolean;
+	};
+}
+export const compactionPromptCases: CompactionPromptTestCase[] = [
+	// ============================================================================
+	// PERFECT PROMPT: All criteria met
+	// ============================================================================
+	{
+		name: "Perfect coordinator resumption prompt",
+		description:
+			"Ideal continuation prompt with all quality criteria met: real IDs, actionable tools, strong identity, forbidden list, correct first tool",
+		prompt: {
+			content: `
+┌─────────────────────────────────────────────────────────────┐
+│                 🐝 COORDINATOR RESUMPTION                   │
+│                   Context Compacted                         │
+└─────────────────────────────────────────────────────────────┘
+You are the COORDINATOR of swarm epic mjkweh2p4u5.
+## IMMEDIATE ACTIONS (Do These FIRST)
+1. swarm_status(epic_id="mjkweh2p4u5", project_key="/Users/joel/Code/myapp")
+2. swarmmail_inbox(limit=5)
+3. Review any completed work
+## FORBIDDEN TOOLS (NEVER Use These)
+Coordinators do NOT edit code directly. These tools are FORBIDDEN:
+- edit
+- write
+- bash (for file modifications)
+Use swarm_spawn_subtask to delegate work to workers.
+## Your Role
+You orchestrate. You do NOT implement. Spawn workers, monitor progress, unblock, ship.
+ALWAYS spawn workers for file modifications.
+NEVER edit files yourself.
+NON-NEGOTIABLE: Check status and inbox before making decisions.
+`,
+		},
+		expected: {
+			hasRealEpicId: true,
+			isActionable: true,
+			hasCoordinatorIdentity: true,
+			listsForbiddenTools: true,
+			hasCorrectFirstTool: true,
+		},
+	},
+	// ============================================================================
+	// BAD PROMPT: Placeholder epic ID
+	// ============================================================================
+	{
+		name: "Prompt with placeholder epic ID",
+		description:
+			"Contains placeholder <epic-id> instead of real ID - fails specificity check",
+		prompt: {
+			content: `
+## Coordinator Resumption
+You are coordinating epic <epic-id>.
+Check the status with:
+1. swarm_status(epic_id="<epic-id>", project_key="<path>")
+2. swarmmail_inbox()
+Continue orchestrating the swarm.
+`,
+		},
+		expected: {
+			hasRealEpicId: false, // <epic-id> is a placeholder
+			isActionable: false, // Has placeholders in tool calls
+			hasCoordinatorIdentity: false, // No ASCII header or strong language
+			listsForbiddenTools: false, // Doesn't list forbidden tools
+			hasCorrectFirstTool: true, // First tool is swarm_status (correct)
+		},
+	},
+	// ============================================================================
+	// BAD PROMPT: Generic instructions, no actionable tools
+	// ============================================================================
+	{
+		name: "Generic instructions without specific tools",
+		description:
+			"Vague language like 'check status' without actual tool calls - fails actionability",
+		prompt: {
+			content: `
+You were coordinating a swarm before compaction.
+To resume:
+- Check the status of workers
+- Read your messages
+- Continue where you left off
+Remember, you're the coordinator. Keep the work moving forward.
+`,
+		},
+		expected: {
+			hasRealEpicId: false, // No epic ID at all
+			isActionable: false, // No specific tool calls
+			hasCoordinatorIdentity: false, // No strong identity reinforcement
+			listsForbiddenTools: false, // No forbidden tools list
+			hasCorrectFirstTool: false, // No first tool specified
+		},
+	},
+	// ============================================================================
+	// BAD PROMPT: Weak coordinator identity
+	// ============================================================================
+	{
+		name: "Weak coordinator identity",
+		description:
+			"Has real ID and tools but lacks strong identity reinforcement - fails coordinator identity check",
+		prompt: {
+			content: `
+## Swarm Resumption
+Epic ID: mjkweh9x2a1
+Project: /Users/joel/Code/myapp
+You can check status with:
+swarm_status(epic_id="mjkweh9x2a1", project_key="/Users/joel/Code/myapp")
+And read messages:
+swarmmail_inbox(limit=5)
+Please continue coordinating.
+`,
+		},
+		expected: {
+			hasRealEpicId: true, // Has real ID
+			isActionable: true, // Has specific tool calls
+			hasCoordinatorIdentity: false, // No ASCII header, no NEVER/ALWAYS/NON-NEGOTIABLE
+			listsForbiddenTools: false, // No forbidden tools list
+			hasCorrectFirstTool: true, // First tool is swarm_status
+		},
+	},
+	// ============================================================================
+	// BAD PROMPT: Missing forbidden tools list
+	// ============================================================================
+	{
+		name: "Missing forbidden tools list",
+		description:
+			"Good prompt but doesn't explicitly list forbidden tools - coordinators need this reminder",
+		prompt: {
+			content: `
+┌─────────────────────────────────────────────────────────────┐
+│                 🐝 COORDINATOR RESUMPTION                   │
+└─────────────────────────────────────────────────────────────┘
+You are the COORDINATOR of epic mjkweh3k8p2.
+## IMMEDIATE ACTIONS
+1. swarm_status(epic_id="mjkweh3k8p2", project_key="/Users/joel/Code/myapp")
+2. swarmmail_inbox(limit=5)
+## Your Role
+ALWAYS delegate to workers.
+NEVER edit files directly.
+Coordinators orchestrate, workers implement.
+`,
+		},
+		expected: {
+			hasRealEpicId: true,
+			isActionable: true,
+			hasCoordinatorIdentity: true, // Has ASCII + NEVER/ALWAYS
+			listsForbiddenTools: false, // Doesn't list "edit", "write", "bash" by name
+			hasCorrectFirstTool: true,
+		},
+	},
+	// ============================================================================
+	// BAD PROMPT: Wrong first tool (edit instead of swarm_status)
+	// ============================================================================
+	{
+		name: "Wrong first tool suggestion",
+		description:
+			"Suggests edit/write as first action - coordinator discipline failure",
+		prompt: {
+			content: `
+┌─────────────────────────────────────────────────────────────┐
+│                 🐝 COORDINATOR RESUMPTION                   │
+└─────────────────────────────────────────────────────────────┘
+You are the COORDINATOR of epic mjkweh7q9n4.
+## IMMEDIATE ACTIONS
+1. edit(filePath="/src/app.ts", oldString="...", newString="...")
+2. swarm_status(epic_id="mjkweh7q9n4", project_key="/Users/joel/Code/myapp")
+## FORBIDDEN TOOLS
+- edit
+- write
+- bash (for file mods)
+NEVER edit files yourself.
+ALWAYS delegate to workers.
+`,
+		},
+		expected: {
+			hasRealEpicId: true,
+			isActionable: true,
+			hasCoordinatorIdentity: true,
+			listsForbiddenTools: true,
+			hasCorrectFirstTool: false, // First tool is edit, should be swarm_status/inbox
+		},
+	},
+	// ============================================================================
+	// EDGE CASE: Multiple epics mentioned
+	// ============================================================================
+	{
+		name: "Multiple epic IDs in prompt",
+		description:
+			"Prompt references multiple epics - should still pass if at least one is real",
+		prompt: {
+			content: `
+┌─────────────────────────────────────────────────────────────┐
+│                 🐝 COORDINATOR RESUMPTION                   │
+└─────────────────────────────────────────────────────────────┘
+You are coordinating epics:
+- mjkweh5t2x8 (in progress)
+- mjkweh6u3y9 (blocked)
+## IMMEDIATE ACTIONS
+1. swarm_status(epic_id="mjkweh5t2x8", project_key="/Users/joel/Code/myapp")
+2. swarmmail_inbox(limit=5)
+## FORBIDDEN TOOLS
+- edit
+- write
+- bash
+ALWAYS check status first.
+NEVER edit files directly.
+`,
+		},
+		expected: {
+			hasRealEpicId: true, // Has real IDs
+			isActionable: true,
+			hasCoordinatorIdentity: true,
+			listsForbiddenTools: true,
+			hasCorrectFirstTool: true,
+		},
+	},
+];