npm - autopilot-code - Versions diffs - 2.0.0 → 2.1.0 - Mend

autopilot-code 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/scripts/issue_runner/agents/__init__.py +10 -2
package/scripts/issue_runner/agents/opencode_client.py +486 -0
package/scripts/issue_runner/agents/opencode_server.py +247 -0
package/scripts/issue_runner/agents/test_opencode_server.py +321 -0
package/scripts/issue_runner/runner.py +11 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "autopilot-code",
-  "version": "2.0.0",
+  "version": "2.1.0",
   "private": false,
   "description": "Repo-issue–driven autopilot runner",
   "license": "MIT",

package/scripts/issue_runner/agents/__init__.py CHANGED Viewed

@@ -1,8 +1,15 @@
 from .base import BaseAgent, AgentResult
 from .opencode import OpenCodeAgent
+from .opencode_server import OpenCodeServerAgent
 from .claude import ClaudeCodeAgent
-__all__ = ["BaseAgent", "AgentResult", "OpenCodeAgent", "ClaudeCodeAgent"]
+__all__ = [
+    "BaseAgent",
+    "AgentResult",
+    "OpenCodeAgent",
+    "OpenCodeServerAgent",
+    "ClaudeCodeAgent",
+]
 def get_agent(agent_type: str, config: dict) -> BaseAgent:
@@ -10,7 +17,7 @@ def get_agent(agent_type: str, config: dict) -> BaseAgent:
     Factory function to create the appropriate agent.
     Args:
-        agent_type: "opencode" or "claude"
+        agent_type: "opencode", "opencode-server", or "claude"
         config: Agent configuration from autopilot.json
     Returns:
@@ -21,6 +28,7 @@ def get_agent(agent_type: str, config: dict) -> BaseAgent:
     """
     agents = {
         "opencode": OpenCodeAgent,
+        "opencode-server": OpenCodeServerAgent,
         "claude": ClaudeCodeAgent,
     }

package/scripts/issue_runner/agents/opencode_client.py ADDED Viewed

@@ -0,0 +1,486 @@
+"""
+HTTP client for OpenCode server API.
+This module provides a client for interacting with the OpenCode server
+via its HTTP API, enabling session-based conversations with proper
+session persistence across server restarts.
+"""
+import json
+import logging
+import os
+import re
+import signal
+import subprocess
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+logger = logging.getLogger(__name__)
+@dataclass
+class ServerInfo:
+    """Information about a running OpenCode server."""
+    port: int
+    pid: int
+    worktree: Path
+@dataclass
+class MessagePart:
+    """A part of a message response."""
+    type: str
+    text: Optional[str] = None
+    tool: Optional[str] = None
+    tool_input: Optional[Dict[str, Any]] = None
+    tool_output: Optional[str] = None
+@dataclass
+class MessageResponse:
+    """Response from sending a message."""
+    message_id: str
+    session_id: str
+    role: str
+    parts: List[MessagePart]
+    tokens: Dict[str, int]
+    finish_reason: Optional[str] = None
+    def get_text(self) -> str:
+        """Extract all text content from response parts."""
+        text_parts = [p.text for p in self.parts if p.type == "text" and p.text]
+        return "\n".join(text_parts)
+class OpenCodeClient:
+    """
+    HTTP client for OpenCode server.
+    Handles communication with an OpenCode server instance running
+    in a specific worktree directory.
+    """
+    def __init__(self, port: int, host: str = "127.0.0.1", timeout: int = 1800):
+        """
+        Initialize client for a specific server.
+        Args:
+            port: Port the server is listening on
+            host: Hostname (default localhost)
+            timeout: Request timeout in seconds (default 30 minutes for long agent runs)
+        """
+        # 30-minute timeout matches the agent execution limit - complex implementations
+        # or CI fixes can take significant time as the LLM explores the codebase.
+        self.port = port
+        self.host = host
+        self.timeout = timeout
+        self.base_url = f"http://{host}:{port}"
+    def _request(
+        self,
+        method: str,
+        path: str,
+        data: Optional[Dict] = None,
+        timeout: Optional[int] = None,
+    ) -> Optional[Dict]:
+        """
+        Make an HTTP request to the server using curl.
+        We use curl instead of urllib/requests because:
+        1. Zero dependencies - curl is universally available
+        2. Reliable timeout handling for very long requests (30+ min)
+        3. The ~10ms subprocess overhead is negligible vs. agent execution time
+        Args:
+            method: HTTP method (GET, POST, etc.)
+            path: API path (e.g., /session)
+            data: JSON data to send (for POST/PATCH)
+            timeout: Override default timeout
+        Returns:
+            Parsed JSON response or None on error
+        """
+        url = f"{self.base_url}{path}"
+        # Use -w to append HTTP status code on a new line for validation
+        cmd = ["curl", "-s", "-X", method, "-w", "\n%{http_code}", url]
+        if data is not None:
+            cmd.extend(["-H", "Content-Type: application/json"])
+            cmd.extend(["-d", json.dumps(data)])
+        req_timeout = timeout if timeout is not None else self.timeout
+        cmd.extend(["--max-time", str(req_timeout)])
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                # Extra 10s buffer for subprocess overhead beyond curl's timeout
+                timeout=req_timeout + 10,
+            )
+            if result.returncode != 0:
+                logger.warning(f"Request failed: {result.stderr}")
+                return None
+            # Parse response body and HTTP status code
+            output = result.stdout.strip()
+            if not output:
+                return None
+            # Status code is on the last line (added by -w flag)
+            lines = output.rsplit("\n", 1)
+            if len(lines) != 2:
+                # Malformed response - -w flag should always add status code line
+                logger.warning(f"Malformed curl response (no status code): {output[:100]}")
+                return None
+            body, status_code = lines
+            if not status_code.isdigit():
+                logger.warning(f"Invalid HTTP status code: {status_code}")
+                return None
+            if int(status_code) >= 400:
+                logger.warning(f"HTTP {status_code} from {method} {path}")
+                return None
+            if not body:
+                return None
+            return json.loads(body)
+        except subprocess.TimeoutExpired:
+            logger.error(f"Request timed out: {method} {path}")
+            return None
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON response: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Request error: {e}")
+            return None
+    def health_check(self) -> bool:
+        """
+        Check if the server is healthy.
+        Returns:
+            True if server is responding and healthy
+        """
+        response = self._request("GET", "/global/health", timeout=5)
+        return response is not None and response.get("healthy", False)
+    def create_session(self, title: Optional[str] = None) -> Optional[str]:
+        """
+        Create a new session.
+        Args:
+            title: Optional title for the session
+        Returns:
+            Session ID or None on error
+        """
+        data = {}
+        if title:
+            data["title"] = title
+        response = self._request("POST", "/session", data=data, timeout=30)
+        if response:
+            session_id = response.get("id")
+            logger.info(f"Created session: {session_id}")
+            return session_id
+        return None
+    def get_session(self, session_id: str) -> Optional[Dict]:
+        """
+        Get session details.
+        Args:
+            session_id: Session ID to retrieve
+        Returns:
+            Session data or None if not found
+        """
+        return self._request("GET", f"/session/{session_id}", timeout=10)
+    def session_exists(self, session_id: str) -> bool:
+        """
+        Check if a session exists.
+        Args:
+            session_id: Session ID to check
+        Returns:
+            True if session exists
+        """
+        session = self.get_session(session_id)
+        return session is not None and "id" in session
+    def send_message(
+        self,
+        session_id: str,
+        text: str,
+        timeout: Optional[int] = None,
+    ) -> Optional[MessageResponse]:
+        """
+        Send a message to a session and wait for response.
+        Args:
+            session_id: Session to send message to
+            text: Message text
+            timeout: Override default timeout
+        Returns:
+            MessageResponse or None on error
+        """
+        data = {
+            "parts": [{"type": "text", "text": text}]
+        }
+        response = self._request(
+            "POST",
+            f"/session/{session_id}/message",
+            data=data,
+            timeout=timeout,
+        )
+        if not response:
+            return None
+        try:
+            info = response.get("info", {})
+            raw_parts = response.get("parts", [])
+            parts = []
+            for p in raw_parts:
+                part = MessagePart(type=p.get("type", "unknown"))
+                if p.get("type") == "text":
+                    part.text = p.get("text", "")
+                elif p.get("type") == "tool":
+                    part.tool = p.get("tool")
+                    state = p.get("state", {})
+                    part.tool_input = state.get("input")
+                    part.tool_output = state.get("output")
+                elif p.get("type") == "reasoning":
+                    part.text = p.get("text", "")
+                parts.append(part)
+            return MessageResponse(
+                message_id=info.get("id", ""),
+                session_id=info.get("sessionID", session_id),
+                role=info.get("role", "assistant"),
+                parts=parts,
+                tokens=info.get("tokens", {}),
+                finish_reason=info.get("finish"),
+            )
+        except Exception as e:
+            logger.error(f"Failed to parse message response: {e}")
+            return None
+    def get_messages(
+        self,
+        session_id: str,
+        limit: Optional[int] = None,
+    ) -> List[Dict]:
+        """
+        Get message history for a session.
+        Args:
+            session_id: Session to get messages from
+            limit: Maximum number of messages to retrieve
+        Returns:
+            List of message dictionaries
+        """
+        path = f"/session/{session_id}/message"
+        if limit:
+            path += f"?limit={limit}"
+        response = self._request("GET", path, timeout=30)
+        return response if isinstance(response, list) else []
+class OpenCodeServerManager:
+    """
+    Manages OpenCode server instances for worktrees.
+    Handles starting, stopping, and connecting to OpenCode servers
+    for different worktree directories.
+    """
+    def __init__(self, opencode_binary: str = "opencode"):
+        """
+        Initialize server manager.
+        Args:
+            opencode_binary: Path to opencode binary
+        """
+        self.binary = opencode_binary
+        self._servers: Dict[str, ServerInfo] = {}  # worktree path -> server info
+    def _parse_port_from_output(self, output: str) -> Optional[int]:
+        """Parse port number from server startup output."""
+        match = re.search(r"listening on http://[^:]+:(\d+)", output)
+        if match:
+            return int(match.group(1))
+        return None
+    def start_server(
+        self,
+        worktree: Path,
+        timeout: int = 30,
+    ) -> Optional[ServerInfo]:
+        """
+        Start an OpenCode server for a worktree.
+        Args:
+            worktree: Path to the worktree directory
+            timeout: Seconds to wait for server to start
+        Returns:
+            ServerInfo or None on failure
+        """
+        worktree_str = str(worktree.resolve())
+        # Check if we already have a server for this worktree.
+        # Note: There's a theoretical TOCTOU race between health_check() and using
+        # the server, but Python's GIL + our single-threaded runner make this safe.
+        # If the server dies between check and use, the HTTP call will fail and
+        # the caller can retry, which will start a fresh server.
+        if worktree_str in self._servers:
+            info = self._servers[worktree_str]
+            client = OpenCodeClient(info.port)
+            if client.health_check():
+                logger.info(f"Reusing existing server on port {info.port}")
+                return info
+            else:
+                # Server died or unresponsive - kill stale process and remove from cache
+                logger.warning(f"Server on port {info.port} not responding, killing PID {info.pid}")
+                try:
+                    os.kill(info.pid, signal.SIGTERM)
+                except ProcessLookupError:
+                    pass  # Already dead
+                except Exception as e:
+                    logger.debug(f"Error killing stale server: {e}")
+                del self._servers[worktree_str]
+        logger.info(f"Starting OpenCode server for {worktree}")
+        # Start server process
+        proc = subprocess.Popen(
+            [self.binary, "serve"],
+            cwd=worktree,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        # Wait for server to output port
+        port = None
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            if proc.poll() is not None:
+                # Process exited
+                output = proc.stdout.read() if proc.stdout else ""
+                logger.error(f"Server exited unexpectedly: {output}")
+                return None
+            # Blocking readline is acceptable here because:
+            # 1. OpenCode always outputs the port line quickly on startup
+            # 2. The outer timeout loop + proc.poll() handles hung processes
+            # 3. Non-blocking I/O adds complexity without real benefit
+            if proc.stdout:
+                line = proc.stdout.readline()
+                if line:
+                    logger.debug(f"Server output: {line.strip()}")
+                    port = self._parse_port_from_output(line)
+                    if port:
+                        break
+                    # Warn if we see output but can't parse port (format may have changed)
+                    if "listen" in line.lower():
+                        logger.warning(f"Could not parse port from: {line.strip()}")
+            time.sleep(0.1)
+        if not port:
+            logger.error("Failed to get server port - check OpenCode version/output format")
+            proc.terminate()
+            return None
+        # Close stdout to prevent buffer fill-up since the server runs detached.
+        # The server communicates via HTTP after startup, not stdout, so any
+        # SIGPIPE from further writes is harmless and expected.
+        if proc.stdout:
+            proc.stdout.close()
+        # Verify server is responding
+        client = OpenCodeClient(port)
+        if not self._wait_for_health(client, timeout=10):
+            logger.error("Server not responding to health checks")
+            proc.terminate()
+            return None
+        info = ServerInfo(port=port, pid=proc.pid, worktree=worktree)
+        self._servers[worktree_str] = info
+        logger.info(f"Server started on port {port} (PID {proc.pid})")
+        return info
+    def _wait_for_health(self, client: OpenCodeClient, timeout: int = 10) -> bool:
+        """Wait for server to become healthy."""
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            if client.health_check():
+                return True
+            time.sleep(0.5)
+        return False
+    def get_client(self, worktree: Path) -> Optional[OpenCodeClient]:
+        """
+        Get a client for a worktree, starting server if needed.
+        Args:
+            worktree: Path to worktree
+        Returns:
+            OpenCodeClient or None if server couldn't be started
+        """
+        info = self.start_server(worktree)
+        if info:
+            return OpenCodeClient(info.port)
+        return None
+    def stop_server(self, worktree: Path) -> bool:
+        """
+        Stop the server for a worktree.
+        Args:
+            worktree: Path to worktree
+        Returns:
+            True if server was stopped
+        """
+        worktree_str = str(worktree.resolve())
+        if worktree_str not in self._servers:
+            return False
+        info = self._servers[worktree_str]
+        try:
+            os.kill(info.pid, signal.SIGTERM)
+            logger.info(f"Stopped server on port {info.port} (PID {info.pid})")
+        except ProcessLookupError:
+            logger.debug(f"Server already stopped (PID {info.pid})")
+        except Exception as e:
+            logger.warning(f"Error stopping server: {e}")
+        del self._servers[worktree_str]
+        return True
+    def stop_all(self) -> None:
+        """Stop all managed servers."""
+        for worktree_str in list(self._servers.keys()):
+            self.stop_server(Path(worktree_str))

package/scripts/issue_runner/agents/opencode_server.py ADDED Viewed

@@ -0,0 +1,247 @@
+"""
+OpenCode server-based agent implementation.
+This agent uses the OpenCode HTTP server API instead of the CLI,
+providing proper session management and continuity across multiple
+agent calls within the same issue workflow.
+"""
+import logging
+import os
+import shutil
+from pathlib import Path
+from typing import Optional
+from .base import BaseAgent, AgentResult
+from .opencode_client import OpenCodeClient, OpenCodeServerManager
+logger = logging.getLogger(__name__)
+# Global server manager shared across agent instances.
+# This is intentionally global (not a class variable or injected) because:
+# 1. Multiple IssueRunner instances may create separate agent instances
+# 2. The manager must track ALL running servers to properly reuse/cleanup
+# 3. Python's GIL makes this safe for our single-threaded runner
+_server_manager: Optional[OpenCodeServerManager] = None
+def get_server_manager(binary_path: str = "opencode") -> OpenCodeServerManager:
+    """Get or create the global server manager."""
+    global _server_manager
+    if _server_manager is None:
+        _server_manager = OpenCodeServerManager(binary_path)
+    return _server_manager
+def reset_server_manager() -> None:
+    """Reset the global server manager. Used for testing."""
+    global _server_manager
+    if _server_manager is not None:
+        _server_manager.stop_all()
+    _server_manager = None
+class OpenCodeServerAgent(BaseAgent):
+    """
+    Agent implementation using OpenCode HTTP server.
+    Unlike the CLI-based agent, this implementation:
+    - Starts an OpenCode server per worktree
+    - Maintains proper session continuity via session IDs
+    - Persists sessions across server restarts (sessions stored in worktree)
+    """
+    def __init__(self, config: dict):
+        super().__init__(config)
+        self._server_manager: Optional[OpenCodeServerManager] = None
+    @property
+    def name(self) -> str:
+        return "OpenCode Server"
+    @property
+    def supports_sessions(self) -> bool:
+        return True
+    @property
+    def server_manager(self) -> OpenCodeServerManager:
+        """Get the server manager, initializing if needed.
+        Note: self.binary_path is inherited from BaseAgent which caches
+        the result of find_binary() on first access.
+        """
+        if self._server_manager is None:
+            self._server_manager = get_server_manager(self.binary_path)
+        return self._server_manager
+    def find_binary(self) -> str:
+        """
+        Locate the opencode binary.
+        Search order:
+        1. agentPath from config
+        2. PATH
+        3. Common nvm locations
+        4. Other common locations
+        """
+        # 1. Config-specified path
+        agent_path = self.config.get("agentPath", "")
+        if agent_path and os.path.isfile(agent_path) and os.access(agent_path, os.X_OK):
+            return agent_path
+        # 2. Already in PATH
+        which_result = shutil.which("opencode")
+        if which_result:
+            return which_result
+        # 3. Common nvm locations
+        home = Path.home()
+        nvm_dir = home / ".nvm" / "versions" / "node"
+        if nvm_dir.exists():
+            for node_dir in nvm_dir.iterdir():
+                opencode_path = node_dir / "bin" / "opencode"
+                if opencode_path.exists() and os.access(opencode_path, os.X_OK):
+                    return str(opencode_path)
+        # 4. Other common locations
+        common_paths = [
+            home / ".local" / "bin" / "opencode",
+            Path("/usr/local/bin/opencode"),
+            home / ".npm-global" / "bin" / "opencode",
+        ]
+        for path in common_paths:
+            if path.exists() and os.access(path, os.X_OK):
+                return str(path)
+        raise FileNotFoundError(
+            "opencode not found. Set 'agentPath' in autopilot.json or ensure opencode is installed."
+        )
+    def _get_client(self, worktree: Path) -> OpenCodeClient:
+        """
+        Get an OpenCode client for a worktree, starting server if needed.
+        Args:
+            worktree: Path to the worktree
+        Returns:
+            OpenCodeClient connected to server
+        Raises:
+            RuntimeError: If server couldn't be started
+        """
+        client = self.server_manager.get_client(worktree)
+        if client is None:
+            raise RuntimeError(f"Failed to start OpenCode server for {worktree}")
+        return client
+    def _ensure_session(
+        self,
+        client: OpenCodeClient,
+        session_id: Optional[str],
+        title: Optional[str] = None,
+    ) -> str:
+        """
+        Ensure a valid session exists, creating one if needed.
+        Args:
+            client: OpenCode client
+            session_id: Existing session ID (if any)
+            title: Title for new session
+        Returns:
+            Valid session ID
+        Raises:
+            RuntimeError: If session couldn't be created
+        """
+        # Check if existing session is valid
+        if session_id and client.session_exists(session_id):
+            logger.info(f"Reusing existing session: {session_id}")
+            return session_id
+        # Create new session
+        new_session_id = client.create_session(title=title)
+        if not new_session_id:
+            raise RuntimeError("Failed to create OpenCode session")
+        logger.info(f"Created new session: {new_session_id}")
+        return new_session_id
+    def run(
+        self,
+        worktree: Path,
+        prompt: str,
+        session_id: Optional[str] = None,
+    ) -> AgentResult:
+        """
+        Run OpenCode with the given prompt.
+        Args:
+            worktree: Working directory for the agent
+            prompt: The task/prompt
+            session_id: Previous session ID to continue (if any)
+        Returns:
+            AgentResult with session_id for future continuation
+        """
+        try:
+            # Get client (starts server if needed)
+            client = self._get_client(worktree)
+            # Ensure we have a valid session
+            session_id = self._ensure_session(client, session_id, title="autopilot")
+            # Send message
+            logger.info(f"Sending message to session {session_id}")
+            response = client.send_message(session_id, prompt)
+            if response is None:
+                return AgentResult(
+                    success=False,
+                    session_id=session_id,
+                    output="",
+                    error="No response from OpenCode server",
+                )
+            # Extract text from response
+            output_text = response.get_text()
+            # Check finish reason
+            success = response.finish_reason in ("stop", "tool-calls", None)
+            return AgentResult(
+                success=success,
+                session_id=session_id,
+                output=output_text,
+                error=None if success else f"Unexpected finish: {response.finish_reason}",
+            )
+        except Exception as e:
+            logger.exception("Error running OpenCode server agent")
+            return AgentResult(
+                success=False,
+                session_id=session_id,
+                output="",
+                error=str(e),
+            )
+    def stop_server(self, worktree: Path) -> None:
+        """
+        Stop the server for a worktree.
+        Should be called during cleanup to release resources.
+        Args:
+            worktree: Path to the worktree
+        """
+        self.server_manager.stop_server(worktree)
+    def cleanup(self, worktree: Path) -> None:
+        """
+        Clean up resources for a worktree.
+        Args:
+            worktree: Path to the worktree
+        """
+        self.stop_server(worktree)

package/scripts/issue_runner/agents/test_opencode_server.py ADDED Viewed

@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+"""
+Integration tests for OpenCode server agent.
+Run with: python3 -m scripts.issue_runner.agents.test_opencode_server
+These tests require OpenCode to be installed and configured.
+NOTE: Some tests (session_continuity, session_persistence, concurrent_sessions)
+rely on LLM responses and may occasionally fail due to non-deterministic model
+output. This is expected - the tests verify real end-to-end behavior rather
+than mocking. Re-run if a single test fails sporadically.
+"""
+import logging
+import sys
+import tempfile
+import shutil
+from pathlib import Path
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+class TestResult:
+    def __init__(self, name: str):
+        self.name = name
+        self.passed = False
+        self.error = None
+    def __str__(self):
+        status = "✅ PASS" if self.passed else f"❌ FAIL: {self.error}"
+        return f"{self.name}: {status}"
+def test_imports() -> TestResult:
+    """Test that all imports work correctly."""
+    result = TestResult("imports")
+    try:
+        from scripts.issue_runner.agents import get_agent, OpenCodeServerAgent
+        from scripts.issue_runner.agents.opencode_client import (
+            OpenCodeClient,
+            OpenCodeServerManager,
+            ServerInfo,
+            MessageResponse,
+        )
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_agent_factory() -> TestResult:
+    """Test that the agent factory creates the correct agent type."""
+    result = TestResult("agent_factory")
+    try:
+        from scripts.issue_runner.agents import get_agent, OpenCodeServerAgent
+        agent = get_agent("opencode-server", {})
+        assert isinstance(agent, OpenCodeServerAgent), f"Wrong type: {type(agent)}"
+        assert agent.name == "OpenCode Server"
+        assert agent.supports_sessions is True
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_find_binary() -> TestResult:
+    """Test that the opencode binary can be found."""
+    result = TestResult("find_binary")
+    try:
+        from scripts.issue_runner.agents import get_agent
+        agent = get_agent("opencode-server", {})
+        binary = agent.find_binary()
+        assert binary is not None, "Binary not found"
+        assert Path(binary).exists(), f"Binary does not exist: {binary}"
+        result.passed = True
+    except FileNotFoundError as e:
+        result.error = f"OpenCode not installed: {e}"
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_server_start_stop(worktree: Path) -> TestResult:
+    """Test starting and stopping a server."""
+    result = TestResult("server_start_stop")
+    try:
+        from scripts.issue_runner.agents.opencode_client import OpenCodeServerManager
+        manager = OpenCodeServerManager()
+        # Start server
+        info = manager.start_server(worktree)
+        assert info is not None, "Failed to start server"
+        assert info.port > 0, f"Invalid port: {info.port}"
+        assert info.pid > 0, f"Invalid PID: {info.pid}"
+        # Stop server
+        stopped = manager.stop_server(worktree)
+        assert stopped, "Failed to stop server"
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_session_creation(worktree: Path) -> TestResult:
+    """Test creating a session."""
+    result = TestResult("session_creation")
+    try:
+        from scripts.issue_runner.agents import get_agent
+        agent = get_agent("opencode-server", {})
+        # Run a simple command to create session
+        res = agent.run(worktree, "What is 1+1? Reply with just the number.")
+        assert res.success, f"Agent failed: {res.error}"
+        assert res.session_id is not None, "No session ID returned"
+        # OpenCode uses "ses_" prefix as of v1.x - if this fails, check version compatibility
+        assert res.session_id.startswith("ses_"), f"Invalid session ID: {res.session_id}"
+        agent.cleanup(worktree)
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_session_continuity(worktree: Path) -> TestResult:
+    """Test that session continuity works within a single server."""
+    result = TestResult("session_continuity")
+    try:
+        from scripts.issue_runner.agents import get_agent
+        agent = get_agent("opencode-server", {})
+        # First message - set a secret
+        res1 = agent.run(worktree, "Remember: the password is ELEPHANT. Just say OK.")
+        assert res1.success, f"First message failed: {res1.error}"
+        session_id = res1.session_id
+        # Second message - recall the secret
+        res2 = agent.run(worktree, "What was the password?", session_id=session_id)
+        assert res2.success, f"Second message failed: {res2.error}"
+        assert res2.session_id == session_id, "Session ID changed"
+        assert "ELEPHANT" in res2.output.upper(), f"Password not recalled: {res2.output}"
+        agent.cleanup(worktree)
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_session_persistence(worktree: Path) -> TestResult:
+    """Test that sessions persist across server restarts."""
+    result = TestResult("session_persistence")
+    try:
+        from scripts.issue_runner.agents import get_agent
+        # First agent - set secret and stop
+        agent1 = get_agent("opencode-server", {})
+        res1 = agent1.run(worktree, "Remember: the code is ZEBRA. Just say OK.")
+        assert res1.success, f"First message failed: {res1.error}"
+        session_id = res1.session_id
+        # Stop the server
+        agent1.cleanup(worktree)
+        # Second agent - recall secret with same session ID
+        agent2 = get_agent("opencode-server", {})
+        res2 = agent2.run(worktree, "What was the code?", session_id=session_id)
+        assert res2.success, f"Second message failed: {res2.error}"
+        assert res2.session_id == session_id, "Session ID changed after restart"
+        assert "ZEBRA" in res2.output.upper(), f"Code not recalled after restart: {res2.output}"
+        agent2.cleanup(worktree)
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def test_concurrent_sessions(worktree1: Path, worktree2: Path) -> TestResult:
+    """Test that multiple worktrees have independent sessions.
+    "Concurrent" here means multiple worktrees can have active sessions
+    simultaneously (each with its own server), not parallel thread execution.
+    This verifies session isolation - each worktree remembers its own context.
+    """
+    result = TestResult("concurrent_sessions")
+    try:
+        from scripts.issue_runner.agents import get_agent
+        agent = get_agent("opencode-server", {})
+        # Create sessions in both worktrees (runs sequentially but sessions coexist)
+        res1 = agent.run(worktree1, "Remember: worktree1 secret is APPLE. Say OK.")
+        res2 = agent.run(worktree2, "Remember: worktree2 secret is BANANA. Say OK.")
+        assert res1.success, f"Worktree1 failed: {res1.error}"
+        assert res2.success, f"Worktree2 failed: {res2.error}"
+        assert res1.session_id != res2.session_id, "Sessions should be different"
+        # Verify each remembers its own secret
+        res1b = agent.run(worktree1, "What was the secret?", session_id=res1.session_id)
+        res2b = agent.run(worktree2, "What was the secret?", session_id=res2.session_id)
+        assert "APPLE" in res1b.output.upper(), f"Worktree1 wrong: {res1b.output}"
+        assert "BANANA" in res2b.output.upper(), f"Worktree2 wrong: {res2b.output}"
+        agent.cleanup(worktree1)
+        agent.cleanup(worktree2)
+        result.passed = True
+    except Exception as e:
+        result.error = str(e)
+    return result
+def run_tests():
+    """Run all tests and report results."""
+    from scripts.issue_runner.agents.opencode_server import reset_server_manager
+    print("\n" + "=" * 60)
+    print("OpenCode Server Agent - Integration Tests")
+    print("=" * 60 + "\n")
+    results = []
+    # Basic tests that don't need a worktree
+    print("Running basic tests...")
+    results.append(test_imports())
+    results.append(test_agent_factory())
+    results.append(test_find_binary())
+    # Check if we can proceed with integration tests
+    if not all(r.passed for r in results):
+        print("\n⚠️  Basic tests failed, skipping integration tests\n")
+    else:
+        # Create temporary worktrees for integration tests
+        print("\nRunning integration tests (requires OpenCode)...")
+        # Reset global state before integration tests
+        reset_server_manager()
+        # Use the current repo as worktree for single-worktree tests
+        worktree = Path(__file__).parent.parent.parent.parent.resolve()
+        results.append(test_server_start_stop(worktree))
+        reset_server_manager()  # Clean state between tests
+        results.append(test_session_creation(worktree))
+        reset_server_manager()
+        results.append(test_session_continuity(worktree))
+        reset_server_manager()
+        results.append(test_session_persistence(worktree))
+        reset_server_manager()
+        # For concurrent test, we need two different directories
+        # Use temp directories that are git repos
+        temp1 = None
+        temp2 = None
+        try:
+            temp1 = Path(tempfile.mkdtemp(prefix="autopilot-test1-"))
+            temp2 = Path(tempfile.mkdtemp(prefix="autopilot-test2-"))
+            # Initialize as git repos (required by opencode)
+            import subprocess
+            subprocess.run(["git", "init"], cwd=temp1, capture_output=True)
+            subprocess.run(["git", "init"], cwd=temp2, capture_output=True)
+            results.append(test_concurrent_sessions(temp1, temp2))
+        except Exception as e:
+            result = TestResult("concurrent_sessions")
+            result.error = f"Setup failed: {e}"
+            results.append(result)
+        finally:
+            # Always cleanup temp dirs and reset server state
+            reset_server_manager()
+            if temp1:
+                shutil.rmtree(temp1, ignore_errors=True)
+            if temp2:
+                shutil.rmtree(temp2, ignore_errors=True)
+    # Print results
+    print("\n" + "-" * 60)
+    print("Results:")
+    print("-" * 60)
+    for r in results:
+        print(f"  {r}")
+    passed = sum(1 for r in results if r.passed)
+    total = len(results)
+    print("-" * 60)
+    print(f"\n{'✅' if passed == total else '❌'} {passed}/{total} tests passed\n")
+    return passed == total
+if __name__ == "__main__":
+    # Change to repo root so imports work
+    import os
+    repo_root = Path(__file__).parent.parent.parent.parent.resolve()
+    os.chdir(repo_root)
+    sys.path.insert(0, str(repo_root))
+    success = run_tests()
+    sys.exit(0 if success else 1)

package/scripts/issue_runner/runner.py CHANGED Viewed

@@ -81,6 +81,7 @@ class IssueRunner:
                     updated_at=datetime.utcnow().isoformat() + "Z",
                 )
                 self._save_state(state, f"❌ Failed at step {state.step.value}: {e}")
+                self._cleanup_agent(Path(state.worktree))
                 return False
         return state.step == IssueStep.DONE
@@ -110,6 +111,14 @@ class IssueRunner:
             message = STEP_STATUS_MESSAGES.get(state.step, f"Step: {state.step.value}")
         self.github.save_state(state.issue_number, state, message)
+    def _cleanup_agent(self, worktree: Path) -> None:
+        """Clean up agent resources if supported."""
+        if hasattr(self.agent, "cleanup"):
+            try:
+                self.agent.cleanup(worktree)
+            except Exception as e:
+                logger.warning(f"Error cleaning up agent: {e}")
     def _transition(self, state: StateData) -> StateData:
         """Execute one state transition."""
         handlers = {
@@ -672,6 +681,8 @@ This PR is automatically created by Autopilot to implement issue #{state.issue_n
         # Cleanup worktree
         worktree = Path(state.worktree)
+        self._cleanup_agent(worktree)
         if self.git.worktree_exists(worktree):
             self.git.remove_worktree(worktree)