npm - opencode-swarm-plugin - Versions diffs - 0.31.7 → 0.33.0 - Mend

opencode-swarm-plugin 0.31.7 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/.turbo/turbo-build.log +4 -4
package/.turbo/turbo-test.log +324 -316
package/CHANGELOG.md +394 -0
package/README.md +129 -181
package/bin/swarm.test.ts +31 -0
package/bin/swarm.ts +635 -140
package/dist/compaction-hook.d.ts +1 -1
package/dist/compaction-hook.d.ts.map +1 -1
package/dist/hive.d.ts.map +1 -1
package/dist/index.d.ts +17 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +653 -139
package/dist/memory-tools.d.ts.map +1 -1
package/dist/memory.d.ts +5 -4
package/dist/memory.d.ts.map +1 -1
package/dist/observability-tools.d.ts +116 -0
package/dist/observability-tools.d.ts.map +1 -0
package/dist/plugin.js +648 -136
package/dist/skills.d.ts.map +1 -1
package/dist/swarm-orchestrate.d.ts +29 -5
package/dist/swarm-orchestrate.d.ts.map +1 -1
package/dist/swarm-prompts.d.ts +66 -0
package/dist/swarm-prompts.d.ts.map +1 -1
package/dist/swarm.d.ts +17 -2
package/dist/swarm.d.ts.map +1 -1
package/evals/lib/{data-loader.test.ts → data-loader.evalite-test.ts} +7 -6
package/evals/lib/data-loader.ts +1 -1
package/evals/scorers/{outcome-scorers.test.ts → outcome-scorers.evalite-test.ts} +1 -1
package/examples/plugin-wrapper-template.ts +316 -12
package/global-skills/swarm-coordination/SKILL.md +118 -8
package/package.json +3 -2
package/src/compaction-hook.ts +5 -3
package/src/hive.integration.test.ts +83 -1
package/src/hive.ts +37 -12
package/src/index.ts +25 -1
package/src/mandate-storage.integration.test.ts +601 -0
package/src/memory-tools.ts +6 -4
package/src/memory.integration.test.ts +117 -49
package/src/memory.test.ts +41 -217
package/src/memory.ts +12 -8
package/src/observability-tools.test.ts +346 -0
package/src/observability-tools.ts +594 -0
package/src/repo-crawl.integration.test.ts +441 -0
package/src/skills.integration.test.ts +1192 -0
package/src/skills.test.ts +42 -1
package/src/skills.ts +8 -4
package/src/structured.integration.test.ts +817 -0
package/src/swarm-deferred.integration.test.ts +157 -0
package/src/swarm-deferred.test.ts +38 -0
package/src/swarm-mail.integration.test.ts +15 -19
package/src/swarm-orchestrate.integration.test.ts +282 -0
package/src/swarm-orchestrate.test.ts +123 -0
package/src/swarm-orchestrate.ts +279 -201
package/src/swarm-prompts.test.ts +481 -0
package/src/swarm-prompts.ts +297 -0
package/src/swarm-research.integration.test.ts +544 -0
package/src/swarm-research.test.ts +698 -0
package/src/swarm-research.ts +472 -0
package/src/swarm-review.integration.test.ts +290 -0
package/src/swarm.integration.test.ts +23 -20
package/src/swarm.ts +6 -3
package/src/tool-adapter.integration.test.ts +1221 -0

package/examples/plugin-wrapper-template.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  * - OPENCODE_SESSION_ID: Passed to CLI for session state persistence
  * - OPENCODE_MESSAGE_ID: Passed to CLI for context
  * - OPENCODE_AGENT: Passed to CLI for context
+ * - SWARM_PROJECT_DIR: Project directory (critical for database path)
  */
 import type { Plugin, PluginInput, Hooks } from "@opencode-ai/plugin";
 import { tool } from "@opencode-ai/plugin";
@@ -18,6 +19,10 @@ import { spawn } from "child_process";
 const SWARM_CLI = "swarm";
+// Module-level project directory - set during plugin initialization
+// This is CRITICAL: without it, the CLI uses process.cwd() which may be wrong
+let projectDirectory: string = process.cwd();
 // =============================================================================
 // CLI Execution Helper
 // =============================================================================
@@ -27,6 +32,8 @@ const SWARM_CLI = "swarm";
  *
  * Spawns `swarm tool <name> --json '<args>'` and returns the result.
  * Passes session context via environment variables.
+ *
+ * IMPORTANT: Runs in projectDirectory (set by OpenCode) not process.cwd()
  */
 async function execTool(
   name: string,
@@ -40,12 +47,14 @@ async function execTool(
       : ["tool", name];
     const proc = spawn(SWARM_CLI, cliArgs, {
+      cwd: projectDirectory, // Run in project directory, not plugin directory
       stdio: ["ignore", "pipe", "pipe"],
       env: {
         ...process.env,
         OPENCODE_SESSION_ID: ctx.sessionID,
         OPENCODE_MESSAGE_ID: ctx.messageID,
         OPENCODE_AGENT: ctx.agent,
+        SWARM_PROJECT_DIR: projectDirectory, // Also pass as env var
       },
     });
@@ -896,6 +905,252 @@ interface SwarmDetection {
   reasons: string[];
 }
+/**
+ * Structured state snapshot for LLM-powered compaction
+ *
+ * This is passed to the lite model to generate a continuation prompt
+ * with concrete data instead of just instructions.
+ */
+interface SwarmStateSnapshot {
+  sessionID: string;
+  detection: {
+    confidence: "high" | "medium" | "low" | "none";
+    reasons: string[];
+  };
+  epic?: {
+    id: string;
+    title: string;
+    status: string;
+    subtasks: Array<{
+      id: string;
+      title: string;
+      status: "open" | "in_progress" | "blocked" | "closed";
+      files: string[];
+      assignedTo?: string;
+    }>;
+  };
+  messages: Array<{
+    from: string;
+    to: string[];
+    subject: string;
+    body: string;
+    timestamp: number;
+    importance?: string;
+  }>;
+  reservations: Array<{
+    agent: string;
+    paths: string[];
+    exclusive: boolean;
+    expiresAt: number;
+  }>;
+}
+/**
+ * Query actual swarm state using spawn (like detectSwarm does)
+ *
+ * Returns structured snapshot of current state for LLM compaction.
+ * Shells out to swarm CLI to get real data.
+ */
+async function querySwarmState(sessionID: string): Promise<SwarmStateSnapshot> {
+  try {
+    // Query cells via swarm CLI
+    const cellsResult = await new Promise<{ exitCode: number; stdout: string }>(
+      (resolve) => {
+        const proc = spawn(SWARM_CLI, ["tool", "hive_query"], {
+          cwd: projectDirectory,
+          stdio: ["ignore", "pipe", "pipe"],
+        });
+        let stdout = "";
+        proc.stdout.on("data", (d) => {
+          stdout += d;
+        });
+        proc.on("close", (exitCode) =>
+          resolve({ exitCode: exitCode ?? 1, stdout }),
+        );
+      },
+    );
+    const cells =
+      cellsResult.exitCode === 0 ? JSON.parse(cellsResult.stdout) : [];
+    // Find active epic (first unclosed epic with subtasks)
+    const openEpics = cells.filter(
+      (c: { type?: string; status: string }) =>
+        c.type === "epic" && c.status !== "closed",
+    );
+    const epic = openEpics[0];
+    // Get subtasks if we have an epic
+    const subtasks =
+      epic && epic.id
+        ? cells.filter(
+            (c: { parent_id?: string }) => c.parent_id === epic.id,
+          )
+        : [];
+    // TODO: Query swarm mail for messages and reservations
+    // For MVP, use empty arrays - the fallback chain handles this
+    const messages: SwarmStateSnapshot["messages"] = [];
+    const reservations: SwarmStateSnapshot["reservations"] = [];
+    // Run detection for confidence
+    const detection = await detectSwarm();
+    return {
+      sessionID,
+      detection: {
+        confidence: detection.confidence,
+        reasons: detection.reasons,
+      },
+      epic: epic
+        ? {
+            id: epic.id,
+            title: epic.title,
+            status: epic.status,
+            subtasks: subtasks.map((s: {
+              id: string;
+              title: string;
+              status: string;
+              files?: string[];
+            }) => ({
+              id: s.id,
+              title: s.title,
+              status: s.status as "open" | "in_progress" | "blocked" | "closed",
+              files: s.files || [],
+            })),
+          }
+        : undefined,
+      messages,
+      reservations,
+    };
+  } catch (err) {
+    // If query fails, return minimal snapshot
+    const detection = await detectSwarm();
+    return {
+      sessionID,
+      detection: {
+        confidence: detection.confidence,
+        reasons: detection.reasons,
+      },
+      messages: [],
+      reservations: [],
+    };
+  }
+}
+/**
+ * Generate compaction prompt using LLM
+ *
+ * Shells out to `opencode run -m <liteModel>` with structured state.
+ * Returns markdown continuation prompt or null on failure.
+ *
+ * Timeout: 30 seconds
+ */
+async function generateCompactionPrompt(
+  snapshot: SwarmStateSnapshot,
+): Promise<string | null> {
+  try {
+    const liteModel =
+      process.env.OPENCODE_LITE_MODEL || "claude-3-5-haiku-20241022";
+    const promptText = `You are generating a continuation prompt for a compacted swarm coordination session.
+Analyze this swarm state and generate a structured markdown prompt that will be given to the resumed session:
+${JSON.stringify(snapshot, null, 2)}
+Generate a prompt following this structure:
+# 🐝 Swarm Continuation - [Epic Title or "Unknown"]
+You are resuming coordination of an active swarm that was interrupted by context compaction.
+## Epic State
+**ID:** [epic ID or "Unknown"]
+**Title:** [epic title or "No active epic"]
+**Status:** [X/Y subtasks complete]
+**Project:** ${projectDirectory}
+## Subtask Status
+### ✅ Completed (N)
+[List completed subtasks with IDs]
+### 🚧 In Progress (N)
+[List in-progress subtasks with IDs, files, agents if known]
+### 🚫 Blocked (N)
+[List blocked subtasks]
+### ⏳ Pending (N)
+[List pending subtasks]
+## Next Actions (IMMEDIATE)
+[List 3-5 concrete actions with actual commands, using real IDs from the state]
+## Coordinator Reminders
+- **You are the coordinator** - Don't wait for instructions, orchestrate
+- **Monitor actively** - Check messages every ~10 minutes
+- **Unblock aggressively** - Resolve dependencies immediately
+- **Review thoroughly** - 3-strike rule enforced
+- **Ship it** - When all subtasks done, close the epic
+Keep the prompt concise but actionable. Use actual data from the snapshot, not placeholders.`;
+    const result = await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
+      (resolve, reject) => {
+        const proc = spawn("opencode", ["run", "-m", liteModel, "--", promptText], {
+          cwd: projectDirectory,
+          stdio: ["ignore", "pipe", "pipe"],
+          timeout: 30000, // 30 second timeout
+        });
+        let stdout = "";
+        let stderr = "";
+        proc.stdout.on("data", (d) => {
+          stdout += d;
+        });
+        proc.stderr.on("data", (d) => {
+          stderr += d;
+        });
+        proc.on("close", (exitCode) => {
+          resolve({ exitCode: exitCode ?? 1, stdout, stderr });
+        });
+        proc.on("error", (err) => {
+          reject(err);
+        });
+        // Timeout handling
+        setTimeout(() => {
+          proc.kill("SIGTERM");
+          reject(new Error("LLM compaction timeout (30s)"));
+        }, 30000);
+      },
+    );
+    if (result.exitCode !== 0) {
+      console.error(
+        "[Swarm Compaction] opencode run failed:",
+        result.stderr,
+      );
+      return null;
+    }
+    // Extract the prompt from stdout (LLM may wrap in markdown)
+    const prompt = result.stdout.trim();
+    return prompt.length > 0 ? prompt : null;
+  } catch (err) {
+    console.error("[Swarm Compaction] LLM generation failed:", err);
+    return null;
+  }
+}
 /**
  * Check for swarm sign - evidence a swarm passed through
  *
@@ -1058,9 +1313,11 @@ Extract from session context:
 1. \`swarm_status(epic_id="<epic>", project_key="<path>")\` - Get current state
 2. \`swarmmail_inbox(limit=5)\` - Check for agent messages
-3. **Spawn ready subtasks** - Don't wait, fire them off
-4. **Unblock blocked work** - Resolve dependencies, reassign if needed
-5. **Collect completed work** - Close done subtasks, verify quality
+3. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Review any completed work
+4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\` - Approve or request changes
+5. **Spawn ready subtasks** - Don't wait, fire them off
+6. **Unblock blocked work** - Resolve dependencies, reassign if needed
+7. **Collect completed work** - Close done subtasks, verify quality
 ### Keep the Swarm Cooking
@@ -1113,17 +1370,26 @@ Include this in your summary:
 "This is an active swarm. Check swarm_status and swarmmail_inbox immediately."
 `;
-// Extended hooks type to include experimental compaction hook
+// Extended hooks type to include experimental compaction hook with new prompt API
+type CompactionOutput = {
+  context: string[];
+  prompt?: string; // NEW API from OpenCode PR #5907
+};
 type ExtendedHooks = Hooks & {
   "experimental.session.compacting"?: (
     input: { sessionID: string },
-    output: { context: string[] },
+    output: CompactionOutput,
   ) => Promise<void>;
 };
 export const SwarmPlugin: Plugin = async (
-  _input: PluginInput,
+  input: PluginInput,
 ): Promise<ExtendedHooks> => {
+  // CRITICAL: Set project directory from OpenCode input
+  // Without this, CLI uses wrong database path
+  projectDirectory = input.directory;
   return {
     tool: {
       // Beads
@@ -1186,23 +1452,61 @@ export const SwarmPlugin: Plugin = async (
       skills_execute,
     },
-    // Swarm-aware compaction hook - injects context based on detection confidence
+    // Swarm-aware compaction hook with LLM-powered continuation prompts
+    // Three-level fallback chain: LLM → static context → detection fallback → none
     "experimental.session.compacting": async (
-      _input: { sessionID: string },
-      output: { context: string[] },
+      input: { sessionID: string },
+      output: CompactionOutput,
     ) => {
       const detection = await detectSwarm();
       if (detection.confidence === "high" || detection.confidence === "medium") {
-        // Definite or probable swarm - inject full context
+        // Definite or probable swarm - try LLM-powered compaction
+        try {
+          // Level 1: Query actual state
+          const snapshot = await querySwarmState(input.sessionID);
+          // Level 2: Generate prompt with LLM
+          const llmPrompt = await generateCompactionPrompt(snapshot);
+          if (llmPrompt) {
+            // SUCCESS: Use LLM-generated prompt
+            const header = `[Swarm compaction: LLM-generated, ${detection.reasons.join(", ")}]\n\n`;
+            // Progressive enhancement: use new API if available
+            if ("prompt" in output) {
+              output.prompt = header + llmPrompt;
+            } else {
+              output.context.push(header + llmPrompt);
+            }
+            console.log(
+              "[Swarm Compaction] Using LLM-generated continuation prompt",
+            );
+            return;
+          }
+          // LLM failed, fall through to static prompt
+          console.log(
+            "[Swarm Compaction] LLM generation returned null, using static prompt",
+          );
+        } catch (err) {
+          // LLM failed, fall through to static prompt
+          console.error(
+            "[Swarm Compaction] LLM generation failed, using static prompt:",
+            err,
+          );
+        }
+        // Level 3: Fall back to static context
         const header = `[Swarm detected: ${detection.reasons.join(", ")}]\n\n`;
         output.context.push(header + SWARM_COMPACTION_CONTEXT);
       } else if (detection.confidence === "low") {
-        // Possible swarm - inject fallback detection prompt
+        // Level 4: Possible swarm - inject fallback detection prompt
         const header = `[Possible swarm: ${detection.reasons.join(", ")}]\n\n`;
         output.context.push(header + SWARM_DETECTION_FALLBACK);
       }
-      // confidence === "none" - no injection, probably not a swarm
+      // Level 5: confidence === "none" - no injection, probably not a swarm
     },
   };
 };

package/global-skills/swarm-coordination/SKILL.md CHANGED Viewed

@@ -13,6 +13,8 @@ tools:
   - swarm_complete
   - swarm_status
   - swarm_progress
+  - swarm_review
+  - swarm_review_feedback
   - hive_create_epic
   - hive_query
   - swarmmail_init
@@ -442,19 +444,120 @@ for (const subtask of subtasks) {
 }
 ```
-### Phase 6: Monitor & Intervene
+### Phase 6: MANDATORY Review Loop (NON-NEGOTIABLE)
-```typescript
-// Check progress
-const status = await swarm_status({ epic_id, project_key });
+**⚠️ AFTER EVERY Worker Returns, You MUST Complete This Checklist:**
-// Check for messages from workers
-const inbox = await swarmmail_inbox({ limit: 5 });
+This is the **quality gate** that prevents shipping broken code. DO NOT skip this.
-// Read specific message if needed
+```typescript
+// ============================================================
+// Step 1: Check Swarm Mail (Worker may have sent messages)
+// ============================================================
+const inbox = await swarmmail_inbox({ limit: 5 });
 const message = await swarmmail_read_message({ message_id: N });
-// Intervene if needed (see Intervention Patterns)
+// ============================================================
+// Step 2: Review the Work (Generate review prompt with diff)
+// ============================================================
+const reviewPrompt = await swarm_review({
+  project_key: "/abs/path/to/project",
+  epic_id: "epic-id",
+  task_id: "subtask-id",
+  files_touched: ["src/auth/service.ts", "src/auth/service.test.ts"]
+});
+// This generates a review prompt that includes:
+// - Epic context (what we're trying to achieve)
+// - Subtask requirements
+// - Git diff of changes
+// - Dependency status (what came before, what comes next)
+// ============================================================
+// Step 3: Evaluate Against Criteria
+// ============================================================
+// Ask yourself:
+// - Does the work fulfill the subtask requirements?
+// - Does it serve the overall epic goal?
+// - Does it enable downstream tasks?
+// - Type safety, no obvious bugs?
+// ============================================================
+// Step 4: Send Feedback (Approve or Request Changes)
+// ============================================================
+await swarm_review_feedback({
+  project_key: "/abs/path/to/project",
+  task_id: "subtask-id",
+  worker_id: "WorkerName",
+  status: "approved",  // or "needs_changes"
+  summary: "LGTM - auth service looks solid",
+  issues: "[]"  // or "[{file, line, issue, suggestion}]"
+});
+// ============================================================
+// Step 5: ONLY THEN Continue
+// ============================================================
+// If approved:
+//   - Close the cell
+//   - Spawn next worker (if dependencies allow)
+//   - Update swarm status
+//
+// If needs_changes:
+//   - Worker gets feedback
+//   - Worker retries (max 3 attempts)
+//   - Review again when worker re-submits
+//
+// If 3 failures:
+//   - Mark task blocked
+//   - Escalate to human (architectural problem, not "try harder")
+```
+**❌ Anti-Pattern (Skipping Review):**
+```typescript
+// Worker completes
+swarm_complete({ ... });
+// Coordinator immediately spawns next worker
+// ⚠️ WRONG - No quality gate!
+Task({ subagent_type: "swarm/worker", prompt: nextWorkerPrompt });
+```
+**✅ Correct Pattern (Review Before Proceeding):**
+```typescript
+// Worker completes
+swarm_complete({ ... });
+// Coordinator REVIEWS first
+swarm_review({ ... });
+// ... evaluates changes ...
+swarm_review_feedback({ status: "approved" });
+// ONLY THEN spawn next worker
+Task({ subagent_type: "swarm/worker", prompt: nextWorkerPrompt });
+```
+**Review Workflow (3-Strike Rule):**
+1. Worker calls `swarm_complete` → Coordinator notified
+2. Coordinator runs `swarm_review` → Gets diff + epic context
+3. Coordinator evaluates against epic goals
+4. If good: `swarm_review_feedback(status="approved")` → Task closed
+5. If issues: `swarm_review_feedback(status="needs_changes", issues=[...])` → Worker fixes
+6. After 3 rejections → Task marked blocked (architectural problem, not "try harder")
+**Review Criteria:**
+- Does work fulfill subtask requirements?
+- Does it serve the overall epic goal?
+- Does it enable downstream tasks?
+- Type safety, no obvious bugs?
+**Monitoring & Intervention:**
+```typescript
+// Check overall swarm status
+const status = await swarm_status({ epic_id, project_key });
 ```
 ### Phase 7: Aggregate & Complete
@@ -778,6 +881,13 @@ One blocker affects multiple subtasks.
 | `swarmmail_ack`          | Acknowledge message                 |
 | `swarmmail_health`       | Check database health               |
+## Swarm Review Quick Reference
+| Tool                     | Purpose                                    |
+| ------------------------ | ------------------------------------------ |
+| `swarm_review`           | Generate review prompt with epic context + diff |
+| `swarm_review_feedback`  | Send approval/rejection to worker (3-strike rule) |
 ## Full Swarm Flow
 ```typescript

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-swarm-plugin",
-  "version": "0.31.7",
+  "version": "0.33.0",
   "description": "Multi-agent swarm coordination for OpenCode with learning capabilities, beads integration, and Agent Mail",
   "type": "module",
   "main": "./dist/index.js",
@@ -39,7 +39,8 @@
     "gray-matter": "^4.0.3",
     "ioredis": "^5.4.1",
     "minimatch": "^10.1.1",
-    "swarm-mail": "1.2.2",
+    "swarm-mail": "1.4.0",
+    "yaml": "^2.8.2",
     "zod": "4.1.8"
   },
   "devDependencies": {

package/src/compaction-hook.ts CHANGED Viewed

@@ -88,9 +88,11 @@ Extract from session context:
 1. \`swarm_status(epic_id="<epic>", project_key="<path>")\` - Get current state
 2. \`swarmmail_inbox(limit=5)\` - Check for agent messages
-3. **Spawn ready subtasks** - Don't wait, fire them off
-4. **Unblock blocked work** - Resolve dependencies, reassign if needed
-5. **Collect completed work** - Close done subtasks, verify quality
+3. \`swarm_review(project_key, epic_id, task_id, files_touched)\` - Review any completed work
+4. \`swarm_review_feedback(project_key, task_id, worker_id, status, issues)\` - Approve or request changes
+5. **Spawn ready subtasks** - Don't wait, fire them off
+6. **Unblock blocked work** - Resolve dependencies, reassign if needed
+7. **Collect completed work** - Close done subtasks, verify quality
 ### Keep the Swarm Cooking

package/src/hive.integration.test.ts CHANGED Viewed

@@ -7,6 +7,8 @@
  * Run with: bun test src/hive.integration.test.ts
  */
 import { describe, it, expect, beforeAll, beforeEach, afterAll } from "vitest";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import {
   hive_create,
   hive_create_epic,
@@ -56,7 +58,7 @@ const createdBeadIds: string[] = [];
 /**
  * Test project key - use temp directory to isolate tests
  */
-const TEST_PROJECT_KEY = `/tmp/beads-integration-test-${Date.now()}`;
+const TEST_PROJECT_KEY = join(tmpdir(), `beads-integration-test-${Date.now()}`);
 /**
  * Adapter instance for verification
@@ -1353,6 +1355,86 @@ describe("beads integration", () => {
   });
   describe("hive_sync", () => {
+    it("succeeds with unstaged changes outside .hive/ (stash-before-pull)", async () => {
+      const { mkdirSync, rmSync, writeFileSync, existsSync } = await import("node:fs");
+      const { join } = await import("node:path");
+      const { tmpdir } = await import("node:os");
+      const { execSync } = await import("node:child_process");
+      // Create a temp git repository with a remote (to trigger pull)
+      const tempProject = join(tmpdir(), `hive-sync-stash-test-${Date.now()}`);
+      const remoteProject = join(tmpdir(), `hive-sync-remote-${Date.now()}`);
+      // Create "remote" bare repo
+      mkdirSync(remoteProject, { recursive: true });
+      execSync("git init --bare", { cwd: remoteProject });
+      // Create local repo
+      mkdirSync(tempProject, { recursive: true });
+      execSync("git init", { cwd: tempProject });
+      execSync('git config user.email "test@example.com"', { cwd: tempProject });
+      execSync('git config user.name "Test User"', { cwd: tempProject });
+      execSync(`git remote add origin ${remoteProject}`, { cwd: tempProject });
+      // Create .hive directory and a source file
+      const hiveDir = join(tempProject, ".hive");
+      mkdirSync(hiveDir, { recursive: true });
+      writeFileSync(join(hiveDir, "issues.jsonl"), "");
+      writeFileSync(join(tempProject, "src.ts"), "// initial");
+      // Initial commit and push
+      execSync("git add .", { cwd: tempProject });
+      execSync('git commit -m "initial commit"', { cwd: tempProject });
+      execSync("git push -u origin main", { cwd: tempProject });
+      // Now create unstaged changes OUTSIDE .hive/
+      writeFileSync(join(tempProject, "src.ts"), "// modified but not staged");
+      // Set working directory for hive commands
+      const originalDir = getHiveWorkingDirectory();
+      setHiveWorkingDirectory(tempProject);
+      try {
+        // Create a cell (this will mark it dirty and flush will write to JSONL)
+        await hive_create.execute(
+          { title: "Stash test cell", type: "task" },
+          mockContext,
+        );
+        // Sync WITH auto_pull=true (this is where the bug manifests)
+        // Before fix: fails with "cannot pull with rebase: You have unstaged changes"
+        // After fix: stashes, pulls, pops, succeeds
+        const result = await hive_sync.execute(
+          { auto_pull: true },
+          mockContext,
+        );
+        // Should succeed
+        expect(result).toContain("successfully");
+        // Verify .hive changes were committed
+        const hiveStatus = execSync("git status --porcelain .hive/", {
+          cwd: tempProject,
+          encoding: "utf-8",
+        });
+        expect(hiveStatus.trim()).toBe("");
+        // Verify unstaged changes are still there (stash was popped)
+        const srcStatus = execSync("git status --porcelain src.ts", {
+          cwd: tempProject,
+          encoding: "utf-8",
+        });
+        expect(srcStatus.trim()).toContain("M src.ts");
+      } finally {
+        // Restore original working directory
+        setHiveWorkingDirectory(originalDir);
+        // Cleanup
+        rmSync(tempProject, { recursive: true, force: true });
+        rmSync(remoteProject, { recursive: true, force: true });
+      }
+    });
     it("commits .hive changes before pulling (regression test for unstaged changes error)", async () => {
       const { mkdirSync, rmSync, writeFileSync, existsSync } = await import("node:fs");
       const { join } = await import("node:path");