autopilot-code 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autopilot-code",
3
- "version": "2.0.0",
3
+ "version": "2.1.0",
4
4
  "private": false,
5
5
  "description": "Repo-issue–driven autopilot runner",
6
6
  "license": "MIT",
@@ -1,8 +1,15 @@
1
1
  from .base import BaseAgent, AgentResult
2
2
  from .opencode import OpenCodeAgent
3
+ from .opencode_server import OpenCodeServerAgent
3
4
  from .claude import ClaudeCodeAgent
4
5
 
5
- __all__ = ["BaseAgent", "AgentResult", "OpenCodeAgent", "ClaudeCodeAgent"]
6
+ __all__ = [
7
+ "BaseAgent",
8
+ "AgentResult",
9
+ "OpenCodeAgent",
10
+ "OpenCodeServerAgent",
11
+ "ClaudeCodeAgent",
12
+ ]
6
13
 
7
14
 
8
15
  def get_agent(agent_type: str, config: dict) -> BaseAgent:
@@ -10,7 +17,7 @@ def get_agent(agent_type: str, config: dict) -> BaseAgent:
10
17
  Factory function to create the appropriate agent.
11
18
 
12
19
  Args:
13
- agent_type: "opencode" or "claude"
20
+ agent_type: "opencode", "opencode-server", or "claude"
14
21
  config: Agent configuration from autopilot.json
15
22
 
16
23
  Returns:
@@ -21,6 +28,7 @@ def get_agent(agent_type: str, config: dict) -> BaseAgent:
21
28
  """
22
29
  agents = {
23
30
  "opencode": OpenCodeAgent,
31
+ "opencode-server": OpenCodeServerAgent,
24
32
  "claude": ClaudeCodeAgent,
25
33
  }
26
34
 
@@ -0,0 +1,486 @@
1
+ """
2
+ HTTP client for OpenCode server API.
3
+
4
+ This module provides a client for interacting with the OpenCode server
5
+ via its HTTP API, enabling session-based conversations with proper
6
+ session persistence across server restarts.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ import os
12
+ import re
13
+ import signal
14
+ import subprocess
15
+ import time
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Optional, List, Dict, Any
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class ServerInfo:
25
+ """Information about a running OpenCode server."""
26
+ port: int
27
+ pid: int
28
+ worktree: Path
29
+
30
+
31
+ @dataclass
32
+ class MessagePart:
33
+ """A part of a message response."""
34
+ type: str
35
+ text: Optional[str] = None
36
+ tool: Optional[str] = None
37
+ tool_input: Optional[Dict[str, Any]] = None
38
+ tool_output: Optional[str] = None
39
+
40
+
41
+ @dataclass
42
+ class MessageResponse:
43
+ """Response from sending a message."""
44
+ message_id: str
45
+ session_id: str
46
+ role: str
47
+ parts: List[MessagePart]
48
+ tokens: Dict[str, int]
49
+ finish_reason: Optional[str] = None
50
+
51
+ def get_text(self) -> str:
52
+ """Extract all text content from response parts."""
53
+ text_parts = [p.text for p in self.parts if p.type == "text" and p.text]
54
+ return "\n".join(text_parts)
55
+
56
+
57
+ class OpenCodeClient:
58
+ """
59
+ HTTP client for OpenCode server.
60
+
61
+ Handles communication with an OpenCode server instance running
62
+ in a specific worktree directory.
63
+ """
64
+
65
+ def __init__(self, port: int, host: str = "127.0.0.1", timeout: int = 1800):
66
+ """
67
+ Initialize client for a specific server.
68
+
69
+ Args:
70
+ port: Port the server is listening on
71
+ host: Hostname (default localhost)
72
+ timeout: Request timeout in seconds (default 30 minutes for long agent runs)
73
+ """
74
+ # 30-minute timeout matches the agent execution limit - complex implementations
75
+ # or CI fixes can take significant time as the LLM explores the codebase.
76
+ self.port = port
77
+ self.host = host
78
+ self.timeout = timeout
79
+ self.base_url = f"http://{host}:{port}"
80
+
81
+ def _request(
82
+ self,
83
+ method: str,
84
+ path: str,
85
+ data: Optional[Dict] = None,
86
+ timeout: Optional[int] = None,
87
+ ) -> Optional[Dict]:
88
+ """
89
+ Make an HTTP request to the server using curl.
90
+
91
+ We use curl instead of urllib/requests because:
92
+ 1. Zero dependencies - curl is universally available
93
+ 2. Reliable timeout handling for very long requests (30+ min)
94
+ 3. The ~10ms subprocess overhead is negligible vs. agent execution time
95
+
96
+ Args:
97
+ method: HTTP method (GET, POST, etc.)
98
+ path: API path (e.g., /session)
99
+ data: JSON data to send (for POST/PATCH)
100
+ timeout: Override default timeout
101
+
102
+ Returns:
103
+ Parsed JSON response or None on error
104
+ """
105
+ url = f"{self.base_url}{path}"
106
+ # Use -w to append HTTP status code on a new line for validation
107
+ cmd = ["curl", "-s", "-X", method, "-w", "\n%{http_code}", url]
108
+
109
+ if data is not None:
110
+ cmd.extend(["-H", "Content-Type: application/json"])
111
+ cmd.extend(["-d", json.dumps(data)])
112
+
113
+ req_timeout = timeout if timeout is not None else self.timeout
114
+ cmd.extend(["--max-time", str(req_timeout)])
115
+
116
+ try:
117
+ result = subprocess.run(
118
+ cmd,
119
+ capture_output=True,
120
+ text=True,
121
+ # Extra 10s buffer for subprocess overhead beyond curl's timeout
122
+ timeout=req_timeout + 10,
123
+ )
124
+
125
+ if result.returncode != 0:
126
+ logger.warning(f"Request failed: {result.stderr}")
127
+ return None
128
+
129
+ # Parse response body and HTTP status code
130
+ output = result.stdout.strip()
131
+ if not output:
132
+ return None
133
+
134
+ # Status code is on the last line (added by -w flag)
135
+ lines = output.rsplit("\n", 1)
136
+ if len(lines) != 2:
137
+ # Malformed response - -w flag should always add status code line
138
+ logger.warning(f"Malformed curl response (no status code): {output[:100]}")
139
+ return None
140
+
141
+ body, status_code = lines
142
+ if not status_code.isdigit():
143
+ logger.warning(f"Invalid HTTP status code: {status_code}")
144
+ return None
145
+
146
+ if int(status_code) >= 400:
147
+ logger.warning(f"HTTP {status_code} from {method} {path}")
148
+ return None
149
+
150
+ if not body:
151
+ return None
152
+
153
+ return json.loads(body)
154
+ except subprocess.TimeoutExpired:
155
+ logger.error(f"Request timed out: {method} {path}")
156
+ return None
157
+ except json.JSONDecodeError as e:
158
+ logger.error(f"Invalid JSON response: {e}")
159
+ return None
160
+ except Exception as e:
161
+ logger.error(f"Request error: {e}")
162
+ return None
163
+
164
+ def health_check(self) -> bool:
165
+ """
166
+ Check if the server is healthy.
167
+
168
+ Returns:
169
+ True if server is responding and healthy
170
+ """
171
+ response = self._request("GET", "/global/health", timeout=5)
172
+ return response is not None and response.get("healthy", False)
173
+
174
+ def create_session(self, title: Optional[str] = None) -> Optional[str]:
175
+ """
176
+ Create a new session.
177
+
178
+ Args:
179
+ title: Optional title for the session
180
+
181
+ Returns:
182
+ Session ID or None on error
183
+ """
184
+ data = {}
185
+ if title:
186
+ data["title"] = title
187
+
188
+ response = self._request("POST", "/session", data=data, timeout=30)
189
+ if response:
190
+ session_id = response.get("id")
191
+ logger.info(f"Created session: {session_id}")
192
+ return session_id
193
+ return None
194
+
195
+ def get_session(self, session_id: str) -> Optional[Dict]:
196
+ """
197
+ Get session details.
198
+
199
+ Args:
200
+ session_id: Session ID to retrieve
201
+
202
+ Returns:
203
+ Session data or None if not found
204
+ """
205
+ return self._request("GET", f"/session/{session_id}", timeout=10)
206
+
207
+ def session_exists(self, session_id: str) -> bool:
208
+ """
209
+ Check if a session exists.
210
+
211
+ Args:
212
+ session_id: Session ID to check
213
+
214
+ Returns:
215
+ True if session exists
216
+ """
217
+ session = self.get_session(session_id)
218
+ return session is not None and "id" in session
219
+
220
+ def send_message(
221
+ self,
222
+ session_id: str,
223
+ text: str,
224
+ timeout: Optional[int] = None,
225
+ ) -> Optional[MessageResponse]:
226
+ """
227
+ Send a message to a session and wait for response.
228
+
229
+ Args:
230
+ session_id: Session to send message to
231
+ text: Message text
232
+ timeout: Override default timeout
233
+
234
+ Returns:
235
+ MessageResponse or None on error
236
+ """
237
+ data = {
238
+ "parts": [{"type": "text", "text": text}]
239
+ }
240
+
241
+ response = self._request(
242
+ "POST",
243
+ f"/session/{session_id}/message",
244
+ data=data,
245
+ timeout=timeout,
246
+ )
247
+
248
+ if not response:
249
+ return None
250
+
251
+ try:
252
+ info = response.get("info", {})
253
+ raw_parts = response.get("parts", [])
254
+
255
+ parts = []
256
+ for p in raw_parts:
257
+ part = MessagePart(type=p.get("type", "unknown"))
258
+ if p.get("type") == "text":
259
+ part.text = p.get("text", "")
260
+ elif p.get("type") == "tool":
261
+ part.tool = p.get("tool")
262
+ state = p.get("state", {})
263
+ part.tool_input = state.get("input")
264
+ part.tool_output = state.get("output")
265
+ elif p.get("type") == "reasoning":
266
+ part.text = p.get("text", "")
267
+ parts.append(part)
268
+
269
+ return MessageResponse(
270
+ message_id=info.get("id", ""),
271
+ session_id=info.get("sessionID", session_id),
272
+ role=info.get("role", "assistant"),
273
+ parts=parts,
274
+ tokens=info.get("tokens", {}),
275
+ finish_reason=info.get("finish"),
276
+ )
277
+ except Exception as e:
278
+ logger.error(f"Failed to parse message response: {e}")
279
+ return None
280
+
281
+ def get_messages(
282
+ self,
283
+ session_id: str,
284
+ limit: Optional[int] = None,
285
+ ) -> List[Dict]:
286
+ """
287
+ Get message history for a session.
288
+
289
+ Args:
290
+ session_id: Session to get messages from
291
+ limit: Maximum number of messages to retrieve
292
+
293
+ Returns:
294
+ List of message dictionaries
295
+ """
296
+ path = f"/session/{session_id}/message"
297
+ if limit:
298
+ path += f"?limit={limit}"
299
+
300
+ response = self._request("GET", path, timeout=30)
301
+ return response if isinstance(response, list) else []
302
+
303
+
304
+ class OpenCodeServerManager:
305
+ """
306
+ Manages OpenCode server instances for worktrees.
307
+
308
+ Handles starting, stopping, and connecting to OpenCode servers
309
+ for different worktree directories.
310
+ """
311
+
312
+ def __init__(self, opencode_binary: str = "opencode"):
313
+ """
314
+ Initialize server manager.
315
+
316
+ Args:
317
+ opencode_binary: Path to opencode binary
318
+ """
319
+ self.binary = opencode_binary
320
+ self._servers: Dict[str, ServerInfo] = {} # worktree path -> server info
321
+
322
+ def _parse_port_from_output(self, output: str) -> Optional[int]:
323
+ """Parse port number from server startup output."""
324
+ match = re.search(r"listening on http://[^:]+:(\d+)", output)
325
+ if match:
326
+ return int(match.group(1))
327
+ return None
328
+
329
+ def start_server(
330
+ self,
331
+ worktree: Path,
332
+ timeout: int = 30,
333
+ ) -> Optional[ServerInfo]:
334
+ """
335
+ Start an OpenCode server for a worktree.
336
+
337
+ Args:
338
+ worktree: Path to the worktree directory
339
+ timeout: Seconds to wait for server to start
340
+
341
+ Returns:
342
+ ServerInfo or None on failure
343
+ """
344
+ worktree_str = str(worktree.resolve())
345
+
346
+ # Check if we already have a server for this worktree.
347
+ # Note: There's a theoretical TOCTOU race between health_check() and using
348
+ # the server, but Python's GIL + our single-threaded runner make this safe.
349
+ # If the server dies between check and use, the HTTP call will fail and
350
+ # the caller can retry, which will start a fresh server.
351
+ if worktree_str in self._servers:
352
+ info = self._servers[worktree_str]
353
+ client = OpenCodeClient(info.port)
354
+ if client.health_check():
355
+ logger.info(f"Reusing existing server on port {info.port}")
356
+ return info
357
+ else:
358
+ # Server died or unresponsive - kill stale process and remove from cache
359
+ logger.warning(f"Server on port {info.port} not responding, killing PID {info.pid}")
360
+ try:
361
+ os.kill(info.pid, signal.SIGTERM)
362
+ except ProcessLookupError:
363
+ pass # Already dead
364
+ except Exception as e:
365
+ logger.debug(f"Error killing stale server: {e}")
366
+ del self._servers[worktree_str]
367
+
368
+ logger.info(f"Starting OpenCode server for {worktree}")
369
+
370
+ # Start server process
371
+ proc = subprocess.Popen(
372
+ [self.binary, "serve"],
373
+ cwd=worktree,
374
+ stdout=subprocess.PIPE,
375
+ stderr=subprocess.STDOUT,
376
+ text=True,
377
+ )
378
+
379
+ # Wait for server to output port
380
+ port = None
381
+ start_time = time.time()
382
+
383
+ while time.time() - start_time < timeout:
384
+ if proc.poll() is not None:
385
+ # Process exited
386
+ output = proc.stdout.read() if proc.stdout else ""
387
+ logger.error(f"Server exited unexpectedly: {output}")
388
+ return None
389
+
390
+ # Blocking readline is acceptable here because:
391
+ # 1. OpenCode always outputs the port line quickly on startup
392
+ # 2. The outer timeout loop + proc.poll() handles hung processes
393
+ # 3. Non-blocking I/O adds complexity without real benefit
394
+ if proc.stdout:
395
+ line = proc.stdout.readline()
396
+ if line:
397
+ logger.debug(f"Server output: {line.strip()}")
398
+ port = self._parse_port_from_output(line)
399
+ if port:
400
+ break
401
+ # Warn if we see output but can't parse port (format may have changed)
402
+ if "listen" in line.lower():
403
+ logger.warning(f"Could not parse port from: {line.strip()}")
404
+
405
+ time.sleep(0.1)
406
+
407
+ if not port:
408
+ logger.error("Failed to get server port - check OpenCode version/output format")
409
+ proc.terminate()
410
+ return None
411
+
412
+ # Close stdout to prevent buffer fill-up since the server runs detached.
413
+ # The server communicates via HTTP after startup, not stdout, so any
414
+ # SIGPIPE from further writes is harmless and expected.
415
+ if proc.stdout:
416
+ proc.stdout.close()
417
+
418
+ # Verify server is responding
419
+ client = OpenCodeClient(port)
420
+ if not self._wait_for_health(client, timeout=10):
421
+ logger.error("Server not responding to health checks")
422
+ proc.terminate()
423
+ return None
424
+
425
+ info = ServerInfo(port=port, pid=proc.pid, worktree=worktree)
426
+ self._servers[worktree_str] = info
427
+ logger.info(f"Server started on port {port} (PID {proc.pid})")
428
+
429
+ return info
430
+
431
+ def _wait_for_health(self, client: OpenCodeClient, timeout: int = 10) -> bool:
432
+ """Wait for server to become healthy."""
433
+ start_time = time.time()
434
+ while time.time() - start_time < timeout:
435
+ if client.health_check():
436
+ return True
437
+ time.sleep(0.5)
438
+ return False
439
+
440
+ def get_client(self, worktree: Path) -> Optional[OpenCodeClient]:
441
+ """
442
+ Get a client for a worktree, starting server if needed.
443
+
444
+ Args:
445
+ worktree: Path to worktree
446
+
447
+ Returns:
448
+ OpenCodeClient or None if server couldn't be started
449
+ """
450
+ info = self.start_server(worktree)
451
+ if info:
452
+ return OpenCodeClient(info.port)
453
+ return None
454
+
455
+ def stop_server(self, worktree: Path) -> bool:
456
+ """
457
+ Stop the server for a worktree.
458
+
459
+ Args:
460
+ worktree: Path to worktree
461
+
462
+ Returns:
463
+ True if server was stopped
464
+ """
465
+ worktree_str = str(worktree.resolve())
466
+
467
+ if worktree_str not in self._servers:
468
+ return False
469
+
470
+ info = self._servers[worktree_str]
471
+
472
+ try:
473
+ os.kill(info.pid, signal.SIGTERM)
474
+ logger.info(f"Stopped server on port {info.port} (PID {info.pid})")
475
+ except ProcessLookupError:
476
+ logger.debug(f"Server already stopped (PID {info.pid})")
477
+ except Exception as e:
478
+ logger.warning(f"Error stopping server: {e}")
479
+
480
+ del self._servers[worktree_str]
481
+ return True
482
+
483
+ def stop_all(self) -> None:
484
+ """Stop all managed servers."""
485
+ for worktree_str in list(self._servers.keys()):
486
+ self.stop_server(Path(worktree_str))
@@ -0,0 +1,247 @@
1
+ """
2
+ OpenCode server-based agent implementation.
3
+
4
+ This agent uses the OpenCode HTTP server API instead of the CLI,
5
+ providing proper session management and continuity across multiple
6
+ agent calls within the same issue workflow.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ import shutil
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from .base import BaseAgent, AgentResult
16
+ from .opencode_client import OpenCodeClient, OpenCodeServerManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Global server manager shared across agent instances.
21
+ # This is intentionally global (not a class variable or injected) because:
22
+ # 1. Multiple IssueRunner instances may create separate agent instances
23
+ # 2. The manager must track ALL running servers to properly reuse/cleanup
24
+ # 3. Python's GIL makes this safe for our single-threaded runner
25
+ _server_manager: Optional[OpenCodeServerManager] = None
26
+
27
+
28
+ def get_server_manager(binary_path: str = "opencode") -> OpenCodeServerManager:
29
+ """Get or create the global server manager."""
30
+ global _server_manager
31
+ if _server_manager is None:
32
+ _server_manager = OpenCodeServerManager(binary_path)
33
+ return _server_manager
34
+
35
+
36
+ def reset_server_manager() -> None:
37
+ """Reset the global server manager. Used for testing."""
38
+ global _server_manager
39
+ if _server_manager is not None:
40
+ _server_manager.stop_all()
41
+ _server_manager = None
42
+
43
+
44
+ class OpenCodeServerAgent(BaseAgent):
45
+ """
46
+ Agent implementation using OpenCode HTTP server.
47
+
48
+ Unlike the CLI-based agent, this implementation:
49
+ - Starts an OpenCode server per worktree
50
+ - Maintains proper session continuity via session IDs
51
+ - Persists sessions across server restarts (sessions stored in worktree)
52
+ """
53
+
54
+ def __init__(self, config: dict):
55
+ super().__init__(config)
56
+ self._server_manager: Optional[OpenCodeServerManager] = None
57
+
58
+ @property
59
+ def name(self) -> str:
60
+ return "OpenCode Server"
61
+
62
+ @property
63
+ def supports_sessions(self) -> bool:
64
+ return True
65
+
66
+ @property
67
+ def server_manager(self) -> OpenCodeServerManager:
68
+ """Get the server manager, initializing if needed.
69
+
70
+ Note: self.binary_path is inherited from BaseAgent which caches
71
+ the result of find_binary() on first access.
72
+ """
73
+ if self._server_manager is None:
74
+ self._server_manager = get_server_manager(self.binary_path)
75
+ return self._server_manager
76
+
77
+ def find_binary(self) -> str:
78
+ """
79
+ Locate the opencode binary.
80
+
81
+ Search order:
82
+ 1. agentPath from config
83
+ 2. PATH
84
+ 3. Common nvm locations
85
+ 4. Other common locations
86
+ """
87
+ # 1. Config-specified path
88
+ agent_path = self.config.get("agentPath", "")
89
+ if agent_path and os.path.isfile(agent_path) and os.access(agent_path, os.X_OK):
90
+ return agent_path
91
+
92
+ # 2. Already in PATH
93
+ which_result = shutil.which("opencode")
94
+ if which_result:
95
+ return which_result
96
+
97
+ # 3. Common nvm locations
98
+ home = Path.home()
99
+ nvm_dir = home / ".nvm" / "versions" / "node"
100
+ if nvm_dir.exists():
101
+ for node_dir in nvm_dir.iterdir():
102
+ opencode_path = node_dir / "bin" / "opencode"
103
+ if opencode_path.exists() and os.access(opencode_path, os.X_OK):
104
+ return str(opencode_path)
105
+
106
+ # 4. Other common locations
107
+ common_paths = [
108
+ home / ".local" / "bin" / "opencode",
109
+ Path("/usr/local/bin/opencode"),
110
+ home / ".npm-global" / "bin" / "opencode",
111
+ ]
112
+ for path in common_paths:
113
+ if path.exists() and os.access(path, os.X_OK):
114
+ return str(path)
115
+
116
+ raise FileNotFoundError(
117
+ "opencode not found. Set 'agentPath' in autopilot.json or ensure opencode is installed."
118
+ )
119
+
120
+ def _get_client(self, worktree: Path) -> OpenCodeClient:
121
+ """
122
+ Get an OpenCode client for a worktree, starting server if needed.
123
+
124
+ Args:
125
+ worktree: Path to the worktree
126
+
127
+ Returns:
128
+ OpenCodeClient connected to server
129
+
130
+ Raises:
131
+ RuntimeError: If server couldn't be started
132
+ """
133
+ client = self.server_manager.get_client(worktree)
134
+ if client is None:
135
+ raise RuntimeError(f"Failed to start OpenCode server for {worktree}")
136
+ return client
137
+
138
+ def _ensure_session(
139
+ self,
140
+ client: OpenCodeClient,
141
+ session_id: Optional[str],
142
+ title: Optional[str] = None,
143
+ ) -> str:
144
+ """
145
+ Ensure a valid session exists, creating one if needed.
146
+
147
+ Args:
148
+ client: OpenCode client
149
+ session_id: Existing session ID (if any)
150
+ title: Title for new session
151
+
152
+ Returns:
153
+ Valid session ID
154
+
155
+ Raises:
156
+ RuntimeError: If session couldn't be created
157
+ """
158
+ # Check if existing session is valid
159
+ if session_id and client.session_exists(session_id):
160
+ logger.info(f"Reusing existing session: {session_id}")
161
+ return session_id
162
+
163
+ # Create new session
164
+ new_session_id = client.create_session(title=title)
165
+ if not new_session_id:
166
+ raise RuntimeError("Failed to create OpenCode session")
167
+
168
+ logger.info(f"Created new session: {new_session_id}")
169
+ return new_session_id
170
+
171
+ def run(
172
+ self,
173
+ worktree: Path,
174
+ prompt: str,
175
+ session_id: Optional[str] = None,
176
+ ) -> AgentResult:
177
+ """
178
+ Run OpenCode with the given prompt.
179
+
180
+ Args:
181
+ worktree: Working directory for the agent
182
+ prompt: The task/prompt
183
+ session_id: Previous session ID to continue (if any)
184
+
185
+ Returns:
186
+ AgentResult with session_id for future continuation
187
+ """
188
+ try:
189
+ # Get client (starts server if needed)
190
+ client = self._get_client(worktree)
191
+
192
+ # Ensure we have a valid session
193
+ session_id = self._ensure_session(client, session_id, title="autopilot")
194
+
195
+ # Send message
196
+ logger.info(f"Sending message to session {session_id}")
197
+ response = client.send_message(session_id, prompt)
198
+
199
+ if response is None:
200
+ return AgentResult(
201
+ success=False,
202
+ session_id=session_id,
203
+ output="",
204
+ error="No response from OpenCode server",
205
+ )
206
+
207
+ # Extract text from response
208
+ output_text = response.get_text()
209
+
210
+ # Check finish reason
211
+ success = response.finish_reason in ("stop", "tool-calls", None)
212
+
213
+ return AgentResult(
214
+ success=success,
215
+ session_id=session_id,
216
+ output=output_text,
217
+ error=None if success else f"Unexpected finish: {response.finish_reason}",
218
+ )
219
+
220
+ except Exception as e:
221
+ logger.exception("Error running OpenCode server agent")
222
+ return AgentResult(
223
+ success=False,
224
+ session_id=session_id,
225
+ output="",
226
+ error=str(e),
227
+ )
228
+
229
+ def stop_server(self, worktree: Path) -> None:
230
+ """
231
+ Stop the server for a worktree.
232
+
233
+ Should be called during cleanup to release resources.
234
+
235
+ Args:
236
+ worktree: Path to the worktree
237
+ """
238
+ self.server_manager.stop_server(worktree)
239
+
240
+ def cleanup(self, worktree: Path) -> None:
241
+ """
242
+ Clean up resources for a worktree.
243
+
244
+ Args:
245
+ worktree: Path to the worktree
246
+ """
247
+ self.stop_server(worktree)
@@ -0,0 +1,321 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Integration tests for OpenCode server agent.
4
+
5
+ Run with: python3 -m scripts.issue_runner.agents.test_opencode_server
6
+
7
+ These tests require OpenCode to be installed and configured.
8
+
9
+ NOTE: Some tests (session_continuity, session_persistence, concurrent_sessions)
10
+ rely on LLM responses and may occasionally fail due to non-deterministic model
11
+ output. This is expected - the tests verify real end-to-end behavior rather
12
+ than mocking. Re-run if a single test fails sporadically.
13
+ """
14
+
15
+ import logging
16
+ import sys
17
+ import tempfile
18
+ import shutil
19
+ from pathlib import Path
20
+
21
+ # Setup logging
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
25
+ )
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class TestResult:
30
+ def __init__(self, name: str):
31
+ self.name = name
32
+ self.passed = False
33
+ self.error = None
34
+
35
+ def __str__(self):
36
+ status = "✅ PASS" if self.passed else f"❌ FAIL: {self.error}"
37
+ return f"{self.name}: {status}"
38
+
39
+
40
+ def test_imports() -> TestResult:
41
+ """Test that all imports work correctly."""
42
+ result = TestResult("imports")
43
+ try:
44
+ from scripts.issue_runner.agents import get_agent, OpenCodeServerAgent
45
+ from scripts.issue_runner.agents.opencode_client import (
46
+ OpenCodeClient,
47
+ OpenCodeServerManager,
48
+ ServerInfo,
49
+ MessageResponse,
50
+ )
51
+ result.passed = True
52
+ except Exception as e:
53
+ result.error = str(e)
54
+ return result
55
+
56
+
57
+ def test_agent_factory() -> TestResult:
58
+ """Test that the agent factory creates the correct agent type."""
59
+ result = TestResult("agent_factory")
60
+ try:
61
+ from scripts.issue_runner.agents import get_agent, OpenCodeServerAgent
62
+
63
+ agent = get_agent("opencode-server", {})
64
+ assert isinstance(agent, OpenCodeServerAgent), f"Wrong type: {type(agent)}"
65
+ assert agent.name == "OpenCode Server"
66
+ assert agent.supports_sessions is True
67
+ result.passed = True
68
+ except Exception as e:
69
+ result.error = str(e)
70
+ return result
71
+
72
+
73
+ def test_find_binary() -> TestResult:
74
+ """Test that the opencode binary can be found."""
75
+ result = TestResult("find_binary")
76
+ try:
77
+ from scripts.issue_runner.agents import get_agent
78
+
79
+ agent = get_agent("opencode-server", {})
80
+ binary = agent.find_binary()
81
+ assert binary is not None, "Binary not found"
82
+ assert Path(binary).exists(), f"Binary does not exist: {binary}"
83
+ result.passed = True
84
+ except FileNotFoundError as e:
85
+ result.error = f"OpenCode not installed: {e}"
86
+ except Exception as e:
87
+ result.error = str(e)
88
+ return result
89
+
90
+
91
+ def test_server_start_stop(worktree: Path) -> TestResult:
92
+ """Test starting and stopping a server."""
93
+ result = TestResult("server_start_stop")
94
+ try:
95
+ from scripts.issue_runner.agents.opencode_client import OpenCodeServerManager
96
+
97
+ manager = OpenCodeServerManager()
98
+
99
+ # Start server
100
+ info = manager.start_server(worktree)
101
+ assert info is not None, "Failed to start server"
102
+ assert info.port > 0, f"Invalid port: {info.port}"
103
+ assert info.pid > 0, f"Invalid PID: {info.pid}"
104
+
105
+ # Stop server
106
+ stopped = manager.stop_server(worktree)
107
+ assert stopped, "Failed to stop server"
108
+
109
+ result.passed = True
110
+ except Exception as e:
111
+ result.error = str(e)
112
+ return result
113
+
114
+
115
+ def test_session_creation(worktree: Path) -> TestResult:
116
+ """Test creating a session."""
117
+ result = TestResult("session_creation")
118
+ try:
119
+ from scripts.issue_runner.agents import get_agent
120
+
121
+ agent = get_agent("opencode-server", {})
122
+
123
+ # Run a simple command to create session
124
+ res = agent.run(worktree, "What is 1+1? Reply with just the number.")
125
+
126
+ assert res.success, f"Agent failed: {res.error}"
127
+ assert res.session_id is not None, "No session ID returned"
128
+ # OpenCode uses "ses_" prefix as of v1.x - if this fails, check version compatibility
129
+ assert res.session_id.startswith("ses_"), f"Invalid session ID: {res.session_id}"
130
+
131
+ agent.cleanup(worktree)
132
+ result.passed = True
133
+ except Exception as e:
134
+ result.error = str(e)
135
+ return result
136
+
137
+
138
+ def test_session_continuity(worktree: Path) -> TestResult:
139
+ """Test that session continuity works within a single server."""
140
+ result = TestResult("session_continuity")
141
+ try:
142
+ from scripts.issue_runner.agents import get_agent
143
+
144
+ agent = get_agent("opencode-server", {})
145
+
146
+ # First message - set a secret
147
+ res1 = agent.run(worktree, "Remember: the password is ELEPHANT. Just say OK.")
148
+ assert res1.success, f"First message failed: {res1.error}"
149
+ session_id = res1.session_id
150
+
151
+ # Second message - recall the secret
152
+ res2 = agent.run(worktree, "What was the password?", session_id=session_id)
153
+ assert res2.success, f"Second message failed: {res2.error}"
154
+ assert res2.session_id == session_id, "Session ID changed"
155
+ assert "ELEPHANT" in res2.output.upper(), f"Password not recalled: {res2.output}"
156
+
157
+ agent.cleanup(worktree)
158
+ result.passed = True
159
+ except Exception as e:
160
+ result.error = str(e)
161
+ return result
162
+
163
+
164
+ def test_session_persistence(worktree: Path) -> TestResult:
165
+ """Test that sessions persist across server restarts."""
166
+ result = TestResult("session_persistence")
167
+ try:
168
+ from scripts.issue_runner.agents import get_agent
169
+
170
+ # First agent - set secret and stop
171
+ agent1 = get_agent("opencode-server", {})
172
+ res1 = agent1.run(worktree, "Remember: the code is ZEBRA. Just say OK.")
173
+ assert res1.success, f"First message failed: {res1.error}"
174
+ session_id = res1.session_id
175
+
176
+ # Stop the server
177
+ agent1.cleanup(worktree)
178
+
179
+ # Second agent - recall secret with same session ID
180
+ agent2 = get_agent("opencode-server", {})
181
+ res2 = agent2.run(worktree, "What was the code?", session_id=session_id)
182
+ assert res2.success, f"Second message failed: {res2.error}"
183
+ assert res2.session_id == session_id, "Session ID changed after restart"
184
+ assert "ZEBRA" in res2.output.upper(), f"Code not recalled after restart: {res2.output}"
185
+
186
+ agent2.cleanup(worktree)
187
+ result.passed = True
188
+ except Exception as e:
189
+ result.error = str(e)
190
+ return result
191
+
192
+
193
+ def test_concurrent_sessions(worktree1: Path, worktree2: Path) -> TestResult:
194
+ """Test that multiple worktrees have independent sessions.
195
+
196
+ "Concurrent" here means multiple worktrees can have active sessions
197
+ simultaneously (each with its own server), not parallel thread execution.
198
+ This verifies session isolation - each worktree remembers its own context.
199
+ """
200
+ result = TestResult("concurrent_sessions")
201
+ try:
202
+ from scripts.issue_runner.agents import get_agent
203
+
204
+ agent = get_agent("opencode-server", {})
205
+
206
+ # Create sessions in both worktrees (runs sequentially but sessions coexist)
207
+ res1 = agent.run(worktree1, "Remember: worktree1 secret is APPLE. Say OK.")
208
+ res2 = agent.run(worktree2, "Remember: worktree2 secret is BANANA. Say OK.")
209
+
210
+ assert res1.success, f"Worktree1 failed: {res1.error}"
211
+ assert res2.success, f"Worktree2 failed: {res2.error}"
212
+ assert res1.session_id != res2.session_id, "Sessions should be different"
213
+
214
+ # Verify each remembers its own secret
215
+ res1b = agent.run(worktree1, "What was the secret?", session_id=res1.session_id)
216
+ res2b = agent.run(worktree2, "What was the secret?", session_id=res2.session_id)
217
+
218
+ assert "APPLE" in res1b.output.upper(), f"Worktree1 wrong: {res1b.output}"
219
+ assert "BANANA" in res2b.output.upper(), f"Worktree2 wrong: {res2b.output}"
220
+
221
+ agent.cleanup(worktree1)
222
+ agent.cleanup(worktree2)
223
+ result.passed = True
224
+ except Exception as e:
225
+ result.error = str(e)
226
+ return result
227
+
228
+
229
+ def run_tests():
230
+ """Run all tests and report results."""
231
+ from scripts.issue_runner.agents.opencode_server import reset_server_manager
232
+
233
+ print("\n" + "=" * 60)
234
+ print("OpenCode Server Agent - Integration Tests")
235
+ print("=" * 60 + "\n")
236
+
237
+ results = []
238
+
239
+ # Basic tests that don't need a worktree
240
+ print("Running basic tests...")
241
+ results.append(test_imports())
242
+ results.append(test_agent_factory())
243
+ results.append(test_find_binary())
244
+
245
+ # Check if we can proceed with integration tests
246
+ if not all(r.passed for r in results):
247
+ print("\n⚠️ Basic tests failed, skipping integration tests\n")
248
+ else:
249
+ # Create temporary worktrees for integration tests
250
+ print("\nRunning integration tests (requires OpenCode)...")
251
+
252
+ # Reset global state before integration tests
253
+ reset_server_manager()
254
+
255
+ # Use the current repo as worktree for single-worktree tests
256
+ worktree = Path(__file__).parent.parent.parent.parent.resolve()
257
+
258
+ results.append(test_server_start_stop(worktree))
259
+ reset_server_manager() # Clean state between tests
260
+
261
+ results.append(test_session_creation(worktree))
262
+ reset_server_manager()
263
+
264
+ results.append(test_session_continuity(worktree))
265
+ reset_server_manager()
266
+
267
+ results.append(test_session_persistence(worktree))
268
+ reset_server_manager()
269
+
270
+ # For concurrent test, we need two different directories
271
+ # Use temp directories that are git repos
272
+ temp1 = None
273
+ temp2 = None
274
+ try:
275
+ temp1 = Path(tempfile.mkdtemp(prefix="autopilot-test1-"))
276
+ temp2 = Path(tempfile.mkdtemp(prefix="autopilot-test2-"))
277
+
278
+ # Initialize as git repos (required by opencode)
279
+ import subprocess
280
+ subprocess.run(["git", "init"], cwd=temp1, capture_output=True)
281
+ subprocess.run(["git", "init"], cwd=temp2, capture_output=True)
282
+
283
+ results.append(test_concurrent_sessions(temp1, temp2))
284
+ except Exception as e:
285
+ result = TestResult("concurrent_sessions")
286
+ result.error = f"Setup failed: {e}"
287
+ results.append(result)
288
+ finally:
289
+ # Always cleanup temp dirs and reset server state
290
+ reset_server_manager()
291
+ if temp1:
292
+ shutil.rmtree(temp1, ignore_errors=True)
293
+ if temp2:
294
+ shutil.rmtree(temp2, ignore_errors=True)
295
+
296
+ # Print results
297
+ print("\n" + "-" * 60)
298
+ print("Results:")
299
+ print("-" * 60)
300
+
301
+ for r in results:
302
+ print(f" {r}")
303
+
304
+ passed = sum(1 for r in results if r.passed)
305
+ total = len(results)
306
+
307
+ print("-" * 60)
308
+ print(f"\n{'✅' if passed == total else '❌'} {passed}/{total} tests passed\n")
309
+
310
+ return passed == total
311
+
312
+
313
+ if __name__ == "__main__":
314
+ # Change to repo root so imports work
315
+ import os
316
+ repo_root = Path(__file__).parent.parent.parent.parent.resolve()
317
+ os.chdir(repo_root)
318
+ sys.path.insert(0, str(repo_root))
319
+
320
+ success = run_tests()
321
+ sys.exit(0 if success else 1)
@@ -81,6 +81,7 @@ class IssueRunner:
81
81
  updated_at=datetime.utcnow().isoformat() + "Z",
82
82
  )
83
83
  self._save_state(state, f"❌ Failed at step {state.step.value}: {e}")
84
+ self._cleanup_agent(Path(state.worktree))
84
85
  return False
85
86
 
86
87
  return state.step == IssueStep.DONE
@@ -110,6 +111,14 @@ class IssueRunner:
110
111
  message = STEP_STATUS_MESSAGES.get(state.step, f"Step: {state.step.value}")
111
112
  self.github.save_state(state.issue_number, state, message)
112
113
 
114
+ def _cleanup_agent(self, worktree: Path) -> None:
115
+ """Clean up agent resources if supported."""
116
+ if hasattr(self.agent, "cleanup"):
117
+ try:
118
+ self.agent.cleanup(worktree)
119
+ except Exception as e:
120
+ logger.warning(f"Error cleaning up agent: {e}")
121
+
113
122
  def _transition(self, state: StateData) -> StateData:
114
123
  """Execute one state transition."""
115
124
  handlers = {
@@ -672,6 +681,8 @@ This PR is automatically created by Autopilot to implement issue #{state.issue_n
672
681
 
673
682
  # Cleanup worktree
674
683
  worktree = Path(state.worktree)
684
+ self._cleanup_agent(worktree)
685
+
675
686
  if self.git.worktree_exists(worktree):
676
687
  self.git.remove_worktree(worktree)
677
688