codetether 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- a2a_server/__init__.py +29 -0
- a2a_server/a2a_agent_card.py +365 -0
- a2a_server/a2a_errors.py +1133 -0
- a2a_server/a2a_executor.py +926 -0
- a2a_server/a2a_router.py +1033 -0
- a2a_server/a2a_types.py +344 -0
- a2a_server/agent_card.py +408 -0
- a2a_server/agents_server.py +271 -0
- a2a_server/auth_api.py +349 -0
- a2a_server/billing_api.py +638 -0
- a2a_server/billing_service.py +712 -0
- a2a_server/billing_webhooks.py +501 -0
- a2a_server/config.py +96 -0
- a2a_server/database.py +2165 -0
- a2a_server/email_inbound.py +398 -0
- a2a_server/email_notifications.py +486 -0
- a2a_server/enhanced_agents.py +919 -0
- a2a_server/enhanced_server.py +160 -0
- a2a_server/hosted_worker.py +1049 -0
- a2a_server/integrated_agents_server.py +347 -0
- a2a_server/keycloak_auth.py +750 -0
- a2a_server/livekit_bridge.py +439 -0
- a2a_server/marketing_tools.py +1364 -0
- a2a_server/mcp_client.py +196 -0
- a2a_server/mcp_http_server.py +2256 -0
- a2a_server/mcp_server.py +191 -0
- a2a_server/message_broker.py +725 -0
- a2a_server/mock_mcp.py +273 -0
- a2a_server/models.py +494 -0
- a2a_server/monitor_api.py +5904 -0
- a2a_server/opencode_bridge.py +1594 -0
- a2a_server/redis_task_manager.py +518 -0
- a2a_server/server.py +726 -0
- a2a_server/task_manager.py +668 -0
- a2a_server/task_queue.py +742 -0
- a2a_server/tenant_api.py +333 -0
- a2a_server/tenant_middleware.py +219 -0
- a2a_server/tenant_service.py +760 -0
- a2a_server/user_auth.py +721 -0
- a2a_server/vault_client.py +576 -0
- a2a_server/worker_sse.py +873 -0
- agent_worker/__init__.py +8 -0
- agent_worker/worker.py +4877 -0
- codetether/__init__.py +10 -0
- codetether/__main__.py +4 -0
- codetether/cli.py +112 -0
- codetether/worker_cli.py +57 -0
- codetether-1.2.2.dist-info/METADATA +570 -0
- codetether-1.2.2.dist-info/RECORD +66 -0
- codetether-1.2.2.dist-info/WHEEL +5 -0
- codetether-1.2.2.dist-info/entry_points.txt +4 -0
- codetether-1.2.2.dist-info/licenses/LICENSE +202 -0
- codetether-1.2.2.dist-info/top_level.txt +5 -0
- codetether_voice_agent/__init__.py +6 -0
- codetether_voice_agent/agent.py +445 -0
- codetether_voice_agent/codetether_mcp.py +345 -0
- codetether_voice_agent/config.py +16 -0
- codetether_voice_agent/functiongemma_caller.py +380 -0
- codetether_voice_agent/session_playback.py +247 -0
- codetether_voice_agent/tools/__init__.py +21 -0
- codetether_voice_agent/tools/definitions.py +135 -0
- codetether_voice_agent/tools/handlers.py +380 -0
- run_server.py +314 -0
- ui/monitor-tailwind.html +1790 -0
- ui/monitor.html +1775 -0
- ui/monitor.js +2662 -0
agent_worker/worker.py
ADDED
|
@@ -0,0 +1,4877 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
A2A Agent Worker - Runs on machines with codebases, connects to A2A server
|
|
4
|
+
|
|
5
|
+
This worker:
|
|
6
|
+
1. Registers itself with the A2A server
|
|
7
|
+
2. Registers local codebases it can work on
|
|
8
|
+
3. Connects via SSE to receive task assignments pushed from server
|
|
9
|
+
4. Executes tasks using OpenCode
|
|
10
|
+
5. Reports results back to the server
|
|
11
|
+
6. Reports OpenCode session history to the server
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
python worker.py --server https://api.codetether.run --name "dev-vm-worker"
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import asyncio
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import signal
|
|
23
|
+
import subprocess
|
|
24
|
+
import sys
|
|
25
|
+
import time
|
|
26
|
+
import uuid
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from datetime import datetime
|
|
29
|
+
from enum import StrEnum
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from collections import OrderedDict
|
|
32
|
+
from typing import Any, Dict, List, Optional, Callable, Set
|
|
33
|
+
|
|
34
|
+
import aiohttp
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# =============================================================================
|
|
38
|
+
# VaultClient - HashiCorp Vault integration for secrets
|
|
39
|
+
# =============================================================================
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class VaultClient:
|
|
43
|
+
"""
|
|
44
|
+
Simple Vault client for fetching secrets.
|
|
45
|
+
|
|
46
|
+
Supports KV v2 secrets engine.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
addr: Optional[str] = None,
|
|
52
|
+
token: Optional[str] = None,
|
|
53
|
+
):
|
|
54
|
+
self.addr = addr or os.environ.get('VAULT_ADDR')
|
|
55
|
+
self.token = token or os.environ.get('VAULT_TOKEN')
|
|
56
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
|
57
|
+
|
|
58
|
+
def is_configured(self) -> bool:
|
|
59
|
+
"""Check if Vault is configured."""
|
|
60
|
+
return bool(self.addr and self.token)
|
|
61
|
+
|
|
62
|
+
async def _get_session(self) -> aiohttp.ClientSession:
|
|
63
|
+
"""Get or create HTTP session."""
|
|
64
|
+
if self._session is None or self._session.closed:
|
|
65
|
+
self._session = aiohttp.ClientSession(
|
|
66
|
+
timeout=aiohttp.ClientTimeout(total=10),
|
|
67
|
+
headers={'X-Vault-Token': self.token or ''},
|
|
68
|
+
)
|
|
69
|
+
return self._session
|
|
70
|
+
|
|
71
|
+
async def close(self):
|
|
72
|
+
"""Close the HTTP session."""
|
|
73
|
+
if self._session and not self._session.closed:
|
|
74
|
+
await self._session.close()
|
|
75
|
+
|
|
76
|
+
async def get_secret(self, path: str) -> Optional[Dict[str, Any]]:
|
|
77
|
+
"""
|
|
78
|
+
Get a secret from Vault KV v2.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
path: Secret path (e.g., 'secret/spotlessbinco/sendgrid')
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dictionary of secret data, or None if not found.
|
|
85
|
+
"""
|
|
86
|
+
if not self.is_configured():
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
session = await self._get_session()
|
|
91
|
+
|
|
92
|
+
# KV v2 requires /data/ in the path
|
|
93
|
+
# Convert 'secret/foo' to 'secret/data/foo'
|
|
94
|
+
parts = path.split('/', 1)
|
|
95
|
+
if len(parts) == 2:
|
|
96
|
+
mount = parts[0]
|
|
97
|
+
secret_path = parts[1]
|
|
98
|
+
url = f'{self.addr}/v1/{mount}/data/{secret_path}'
|
|
99
|
+
else:
|
|
100
|
+
url = f'{self.addr}/v1/{path}'
|
|
101
|
+
|
|
102
|
+
async with session.get(url) as resp:
|
|
103
|
+
if resp.status == 200:
|
|
104
|
+
data = await resp.json()
|
|
105
|
+
return data.get('data', {}).get('data', {})
|
|
106
|
+
else:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logging.getLogger('a2a-worker').warning(
|
|
111
|
+
f'Failed to fetch secret from Vault: {e}'
|
|
112
|
+
)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class TaskStatus(StrEnum):
|
|
117
|
+
"""Status values for tasks in the task queue."""
|
|
118
|
+
|
|
119
|
+
PENDING = 'pending'
|
|
120
|
+
RUNNING = 'running'
|
|
121
|
+
COMPLETED = 'completed'
|
|
122
|
+
FAILED = 'failed'
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SpecialCodebaseId(StrEnum):
|
|
126
|
+
"""Special codebase ID values with semantic meaning."""
|
|
127
|
+
|
|
128
|
+
PENDING = '__pending__' # Tasks awaiting codebase assignment
|
|
129
|
+
GLOBAL = 'global' # Global sessions not tied to a specific project
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class AgentType(StrEnum):
|
|
133
|
+
"""Agent types that determine how tasks are executed."""
|
|
134
|
+
|
|
135
|
+
BUILD = 'build' # Default OpenCode build agent
|
|
136
|
+
ECHO = 'echo' # Lightweight test agent that echoes input
|
|
137
|
+
NOOP = 'noop' # Lightweight test agent that does nothing
|
|
138
|
+
REGISTER_CODEBASE = (
|
|
139
|
+
'register_codebase' # Special task for codebase registration
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# Configure logging
|
|
144
|
+
logging.basicConfig(
|
|
145
|
+
level=logging.INFO,
|
|
146
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
147
|
+
handlers=[
|
|
148
|
+
logging.StreamHandler(sys.stdout),
|
|
149
|
+
],
|
|
150
|
+
)
|
|
151
|
+
logger = logging.getLogger('a2a-worker')
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class WorkerConfig:
|
|
156
|
+
"""Configuration for the agent worker."""
|
|
157
|
+
|
|
158
|
+
server_url: str
|
|
159
|
+
worker_name: str
|
|
160
|
+
worker_id: str = field(default_factory=lambda: str(uuid.uuid4())[:12])
|
|
161
|
+
codebases: List[Dict[str, str]] = field(default_factory=list)
|
|
162
|
+
poll_interval: int = 5 # Fallback poll interval when SSE is unavailable
|
|
163
|
+
opencode_bin: Optional[str] = None
|
|
164
|
+
# Optional override for OpenCode storage location (directory that contains
|
|
165
|
+
# subdirs like project/, session/, message/, part/).
|
|
166
|
+
opencode_storage_path: Optional[str] = None
|
|
167
|
+
# Optional message sync (for session detail view on remote codebases)
|
|
168
|
+
session_message_sync_max_sessions: int = 3
|
|
169
|
+
session_message_sync_max_messages: int = 100
|
|
170
|
+
capabilities: List[str] = field(
|
|
171
|
+
default_factory=lambda: ['opencode', 'build', 'deploy']
|
|
172
|
+
)
|
|
173
|
+
# Max concurrent tasks (bounded worker pool)
|
|
174
|
+
max_concurrent_tasks: int = 2
|
|
175
|
+
# SSE reconnection settings
|
|
176
|
+
sse_reconnect_delay: float = 1.0
|
|
177
|
+
sse_max_reconnect_delay: float = 60.0
|
|
178
|
+
sse_heartbeat_timeout: float = (
|
|
179
|
+
45.0 # Server should send heartbeats every 30s
|
|
180
|
+
)
|
|
181
|
+
# Auth token for SSE endpoint (from A2A_AUTH_TOKEN env var)
|
|
182
|
+
auth_token: Optional[str] = None
|
|
183
|
+
# Email notifications via SendGrid
|
|
184
|
+
sendgrid_api_key: Optional[str] = None
|
|
185
|
+
sendgrid_from_email: Optional[str] = None
|
|
186
|
+
notification_email: Optional[str] = None # Recipient for task reports
|
|
187
|
+
# Email reply-to configuration for task continuation
|
|
188
|
+
email_inbound_domain: Optional[str] = None # e.g., 'inbound.codetether.run'
|
|
189
|
+
email_reply_prefix: str = 'task' # Prefix for reply-to addresses
|
|
190
|
+
# Email debugging options
|
|
191
|
+
email_dry_run: bool = False # Log emails instead of sending
|
|
192
|
+
email_verbose: bool = False # Verbose logging for email operations
|
|
193
|
+
# Auto-compaction settings for task handoffs
|
|
194
|
+
compaction_max_tokens: int = 100000 # Trigger compaction above this
|
|
195
|
+
compaction_target_tokens: int = 50000 # Target size after compaction
|
|
196
|
+
auto_summarize_handoffs: bool = (
|
|
197
|
+
True # Enable auto-summarization for session resumes
|
|
198
|
+
)
|
|
199
|
+
# Agent registration: Register worker as a discoverable agent in the A2A network
|
|
200
|
+
# This allows other agents to find this worker via discover_agents MCP tool
|
|
201
|
+
register_as_agent: bool = True # Auto-register as discoverable agent
|
|
202
|
+
agent_name: Optional[str] = (
|
|
203
|
+
None # Name for agent discovery (defaults to worker_name)
|
|
204
|
+
)
|
|
205
|
+
agent_description: Optional[str] = None # Description for agent discovery
|
|
206
|
+
agent_url: Optional[str] = (
|
|
207
|
+
None # URL where this agent can be reached (optional)
|
|
208
|
+
)
|
|
209
|
+
# Instance ID for unique agent identity (role:instance pattern)
|
|
210
|
+
# If not set, generated as hostname:short_uuid
|
|
211
|
+
agent_instance_id: Optional[str] = None
|
|
212
|
+
agent_description: Optional[str] = None # Description for agent discovery
|
|
213
|
+
agent_url: Optional[str] = (
|
|
214
|
+
None # URL where this agent can be reached (optional)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@dataclass
|
|
219
|
+
class LocalCodebase:
|
|
220
|
+
"""A codebase registered with this worker."""
|
|
221
|
+
|
|
222
|
+
id: str # Server-assigned ID
|
|
223
|
+
name: str
|
|
224
|
+
path: str
|
|
225
|
+
description: str = ''
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# =============================================================================
|
|
229
|
+
# WorkerClient - HTTP/SSE communication with the A2A server
|
|
230
|
+
# =============================================================================
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class WorkerClient:
|
|
234
|
+
"""
|
|
235
|
+
Handles HTTP and SSE communication with the A2A server.
|
|
236
|
+
|
|
237
|
+
Responsibilities:
|
|
238
|
+
- Manage aiohttp session lifecycle and connection pooling
|
|
239
|
+
- SSE connection establishment and event handling
|
|
240
|
+
- API calls for task status updates, output streaming
|
|
241
|
+
- Worker registration/unregistration and heartbeat management
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
def __init__(self, config: WorkerConfig):
|
|
245
|
+
self.config = config
|
|
246
|
+
self.session: Optional[aiohttp.ClientSession] = None
|
|
247
|
+
# SSE connection state
|
|
248
|
+
self._sse_connected = False
|
|
249
|
+
self._sse_reconnect_delay = config.sse_reconnect_delay
|
|
250
|
+
self._last_heartbeat: float = 0.0
|
|
251
|
+
|
|
252
|
+
async def get_session(self) -> aiohttp.ClientSession:
|
|
253
|
+
"""Get or create HTTP session with connection pooling."""
|
|
254
|
+
if self.session is None or self.session.closed:
|
|
255
|
+
# Configure connection pool for better performance under load
|
|
256
|
+
connector = aiohttp.TCPConnector(
|
|
257
|
+
limit=100, # Total connection pool size
|
|
258
|
+
limit_per_host=30, # Max connections per host
|
|
259
|
+
ttl_dns_cache=300, # DNS cache TTL in seconds
|
|
260
|
+
enable_cleanup_closed=True, # Clean up closed connections
|
|
261
|
+
)
|
|
262
|
+
self.session = aiohttp.ClientSession(
|
|
263
|
+
connector=connector,
|
|
264
|
+
timeout=aiohttp.ClientTimeout(total=30),
|
|
265
|
+
headers={'Content-Type': 'application/json'},
|
|
266
|
+
)
|
|
267
|
+
return self.session
|
|
268
|
+
|
|
269
|
+
async def close(self):
|
|
270
|
+
"""Close the HTTP session."""
|
|
271
|
+
if self.session is not None and not self.session.closed:
|
|
272
|
+
await self.session.close()
|
|
273
|
+
# Wait for underlying connector to close
|
|
274
|
+
await asyncio.sleep(0.1)
|
|
275
|
+
|
|
276
|
+
async def register_worker(
|
|
277
|
+
self,
|
|
278
|
+
models: List[Dict[str, Any]],
|
|
279
|
+
global_codebase_id: Optional[str],
|
|
280
|
+
) -> bool:
|
|
281
|
+
"""Register this worker with the A2A server."""
|
|
282
|
+
try:
|
|
283
|
+
session = await self.get_session()
|
|
284
|
+
url = f'{self.config.server_url}/v1/opencode/workers/register'
|
|
285
|
+
|
|
286
|
+
import platform
|
|
287
|
+
|
|
288
|
+
payload = {
|
|
289
|
+
'worker_id': self.config.worker_id,
|
|
290
|
+
'name': self.config.worker_name,
|
|
291
|
+
'capabilities': self.config.capabilities,
|
|
292
|
+
'hostname': platform.node(), # Cross-platform (works on Windows)
|
|
293
|
+
'models': models,
|
|
294
|
+
'global_codebase_id': global_codebase_id,
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
async with session.post(url, json=payload) as resp:
|
|
298
|
+
if resp.status == 200:
|
|
299
|
+
data = await resp.json()
|
|
300
|
+
logger.info(f'Worker registered successfully: {data}')
|
|
301
|
+
return True
|
|
302
|
+
else:
|
|
303
|
+
text = await resp.text()
|
|
304
|
+
logger.warning(
|
|
305
|
+
f'Worker registration returned {resp.status}: {text}'
|
|
306
|
+
)
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.warning(
|
|
311
|
+
f'Failed to register worker (continuing anyway): {e}'
|
|
312
|
+
)
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
async def unregister_worker(self):
|
|
316
|
+
"""Unregister this worker from the A2A server."""
|
|
317
|
+
try:
|
|
318
|
+
session = await self.get_session()
|
|
319
|
+
url = f'{self.config.server_url}/v1/opencode/workers/{self.config.worker_id}/unregister'
|
|
320
|
+
|
|
321
|
+
async with session.post(url) as resp:
|
|
322
|
+
if resp.status == 200:
|
|
323
|
+
logger.info('Worker unregistered successfully')
|
|
324
|
+
|
|
325
|
+
except Exception as e:
|
|
326
|
+
logger.debug(f'Failed to unregister worker: {e}')
|
|
327
|
+
|
|
328
|
+
async def send_heartbeat(self) -> bool:
|
|
329
|
+
"""Send heartbeat to the A2A server to indicate worker is alive.
|
|
330
|
+
|
|
331
|
+
Returns True if heartbeat was successful, False otherwise.
|
|
332
|
+
"""
|
|
333
|
+
try:
|
|
334
|
+
session = await self.get_session()
|
|
335
|
+
url = f'{self.config.server_url}/v1/opencode/workers/{self.config.worker_id}/heartbeat'
|
|
336
|
+
|
|
337
|
+
async with session.post(
|
|
338
|
+
url, timeout=aiohttp.ClientTimeout(total=10)
|
|
339
|
+
) as resp:
|
|
340
|
+
if resp.status == 200:
|
|
341
|
+
logger.debug('Heartbeat sent successfully')
|
|
342
|
+
return True
|
|
343
|
+
else:
|
|
344
|
+
logger.warning(f'Heartbeat returned {resp.status}')
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.debug(f'Failed to send heartbeat: {e}')
|
|
349
|
+
return False
|
|
350
|
+
|
|
351
|
+
async def register_as_agent(
|
|
352
|
+
self,
|
|
353
|
+
agent_name: Optional[str] = None,
|
|
354
|
+
description: Optional[str] = None,
|
|
355
|
+
url: Optional[str] = None,
|
|
356
|
+
routing_capabilities: Optional[List[str]] = None,
|
|
357
|
+
) -> bool:
|
|
358
|
+
"""
|
|
359
|
+
Register this worker as a discoverable agent in the A2A network.
|
|
360
|
+
|
|
361
|
+
This makes the worker visible to other agents via the discover_agents
|
|
362
|
+
MCP tool, enabling agent-to-agent communication.
|
|
363
|
+
|
|
364
|
+
Uses the role:instance pattern for unique identity:
|
|
365
|
+
- role (agent_name): stable role like "code-reviewer" for routing
|
|
366
|
+
- instance_id: unique per-worker instance for disambiguation
|
|
367
|
+
|
|
368
|
+
The full discovery name is "{role}:{instance_id}" to handle multiple
|
|
369
|
+
workers with the same role. Routing (send_to_agent) uses the role.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
agent_name: Name for agent discovery (defaults to config.agent_name or worker_name)
|
|
373
|
+
description: Human-readable description of what this agent does
|
|
374
|
+
url: Optional URL where this agent can be reached directly
|
|
375
|
+
routing_capabilities: List of task routing capabilities (e.g., ["pytest", "terraform"])
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
True if registration succeeded, False otherwise.
|
|
379
|
+
"""
|
|
380
|
+
import platform
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
session = await self.get_session()
|
|
384
|
+
# Use the proper MCP JSON-RPC endpoint
|
|
385
|
+
url_endpoint = f'{self.config.server_url}/mcp/v1/rpc'
|
|
386
|
+
|
|
387
|
+
# Use platform.node() for cross-platform hostname (works on Windows too)
|
|
388
|
+
hostname = platform.node()
|
|
389
|
+
|
|
390
|
+
# Build instance_id for unique identity
|
|
391
|
+
# Format: hostname:short_uuid (e.g., "dev-vm:a1b2c3")
|
|
392
|
+
instance_id = (
|
|
393
|
+
self.config.agent_instance_id
|
|
394
|
+
or f'{hostname}:{self.config.worker_id[:6]}'
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Role is the routing identity (used by send_to_agent)
|
|
398
|
+
role = (
|
|
399
|
+
agent_name or self.config.agent_name or self.config.worker_name
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Full discovery name is role:instance for uniqueness
|
|
403
|
+
# This prevents registry collisions when multiple workers have the same role
|
|
404
|
+
discovery_name = f'{role}:{instance_id}'
|
|
405
|
+
|
|
406
|
+
# Store the resolved names for heartbeat refresh
|
|
407
|
+
self._agent_role = role
|
|
408
|
+
self._agent_discovery_name = discovery_name
|
|
409
|
+
|
|
410
|
+
# Build routing capabilities list for task matching
|
|
411
|
+
caps_list = routing_capabilities or self.config.capabilities or []
|
|
412
|
+
caps_str = ', '.join(caps_list) if caps_list else 'general'
|
|
413
|
+
|
|
414
|
+
agent_description = description or (
|
|
415
|
+
f'OpenCode worker agent (role={role}, instance={instance_id}). '
|
|
416
|
+
f'Routing capabilities: {caps_str}'
|
|
417
|
+
)
|
|
418
|
+
agent_url = url or self.config.agent_url or self.config.server_url
|
|
419
|
+
|
|
420
|
+
# JSON-RPC 2.0 request to call register_agent tool
|
|
421
|
+
# Note: 'capabilities' here is the A2A protocol AgentCapabilities (dict)
|
|
422
|
+
# for streaming/push_notifications - NOT the routing capabilities (list)
|
|
423
|
+
payload = {
|
|
424
|
+
'jsonrpc': '2.0',
|
|
425
|
+
'id': str(uuid.uuid4()),
|
|
426
|
+
'method': 'tools/call',
|
|
427
|
+
'params': {
|
|
428
|
+
'name': 'register_agent',
|
|
429
|
+
'arguments': {
|
|
430
|
+
'name': discovery_name,
|
|
431
|
+
'description': agent_description,
|
|
432
|
+
'url': agent_url,
|
|
433
|
+
# A2A protocol capabilities (dict) - for streaming/push features
|
|
434
|
+
'capabilities': {
|
|
435
|
+
'streaming': True,
|
|
436
|
+
'push_notifications': True,
|
|
437
|
+
},
|
|
438
|
+
},
|
|
439
|
+
},
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
headers = {'Content-Type': 'application/json'}
|
|
443
|
+
if self.config.auth_token:
|
|
444
|
+
headers['Authorization'] = f'Bearer {self.config.auth_token}'
|
|
445
|
+
|
|
446
|
+
async with session.post(
|
|
447
|
+
url_endpoint,
|
|
448
|
+
json=payload,
|
|
449
|
+
headers=headers,
|
|
450
|
+
timeout=aiohttp.ClientTimeout(
|
|
451
|
+
total=10
|
|
452
|
+
), # Best-effort, don't block startup
|
|
453
|
+
) as resp:
|
|
454
|
+
if resp.status == 200:
|
|
455
|
+
result = await resp.json()
|
|
456
|
+
if result.get('error'):
|
|
457
|
+
logger.warning(
|
|
458
|
+
f'Agent registration failed: {result["error"]}'
|
|
459
|
+
)
|
|
460
|
+
return False
|
|
461
|
+
logger.info(
|
|
462
|
+
f"Registered as discoverable agent: '{discovery_name}' (role='{role}')"
|
|
463
|
+
)
|
|
464
|
+
return True
|
|
465
|
+
else:
|
|
466
|
+
text = await resp.text()
|
|
467
|
+
logger.warning(
|
|
468
|
+
f'Agent registration returned {resp.status}: {text}'
|
|
469
|
+
)
|
|
470
|
+
return False
|
|
471
|
+
|
|
472
|
+
except asyncio.TimeoutError:
|
|
473
|
+
logger.warning(
|
|
474
|
+
'Agent registration timed out (continuing without discovery registration)'
|
|
475
|
+
)
|
|
476
|
+
return False
|
|
477
|
+
except Exception as e:
|
|
478
|
+
logger.warning(f'Failed to register as agent (non-fatal): {e}')
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
async def refresh_agent_heartbeat(self) -> bool:
|
|
482
|
+
"""
|
|
483
|
+
Refresh the agent's last_seen timestamp to keep it visible in discovery.
|
|
484
|
+
|
|
485
|
+
Should be called periodically (every 30-60s). Agents not seen within
|
|
486
|
+
120s are filtered from discover_agents results.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
True if heartbeat was refreshed, False otherwise.
|
|
490
|
+
"""
|
|
491
|
+
if (
|
|
492
|
+
not hasattr(self, '_agent_discovery_name')
|
|
493
|
+
or not self._agent_discovery_name
|
|
494
|
+
):
|
|
495
|
+
return False # Not registered as agent
|
|
496
|
+
|
|
497
|
+
try:
|
|
498
|
+
session = await self.get_session()
|
|
499
|
+
url_endpoint = f'{self.config.server_url}/mcp/v1/rpc'
|
|
500
|
+
|
|
501
|
+
payload = {
|
|
502
|
+
'jsonrpc': '2.0',
|
|
503
|
+
'id': str(uuid.uuid4()),
|
|
504
|
+
'method': 'tools/call',
|
|
505
|
+
'params': {
|
|
506
|
+
'name': 'refresh_agent_heartbeat',
|
|
507
|
+
'arguments': {
|
|
508
|
+
'agent_name': self._agent_discovery_name,
|
|
509
|
+
},
|
|
510
|
+
},
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
headers = {'Content-Type': 'application/json'}
|
|
514
|
+
if self.config.auth_token:
|
|
515
|
+
headers['Authorization'] = f'Bearer {self.config.auth_token}'
|
|
516
|
+
|
|
517
|
+
async with session.post(
|
|
518
|
+
url_endpoint,
|
|
519
|
+
json=payload,
|
|
520
|
+
headers=headers,
|
|
521
|
+
timeout=aiohttp.ClientTimeout(total=5),
|
|
522
|
+
) as resp:
|
|
523
|
+
if resp.status == 200:
|
|
524
|
+
result = await resp.json()
|
|
525
|
+
if result.get('error'):
|
|
526
|
+
logger.debug(
|
|
527
|
+
f'Agent heartbeat failed: {result["error"]}'
|
|
528
|
+
)
|
|
529
|
+
return False
|
|
530
|
+
logger.debug(
|
|
531
|
+
f'Agent heartbeat refreshed: {self._agent_discovery_name}'
|
|
532
|
+
)
|
|
533
|
+
return True
|
|
534
|
+
return False
|
|
535
|
+
|
|
536
|
+
except Exception as e:
|
|
537
|
+
logger.debug(f'Agent heartbeat error: {e}')
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
async def unregister_agent(self) -> bool:
|
|
541
|
+
"""
|
|
542
|
+
Unregister this worker from the agent discovery registry.
|
|
543
|
+
|
|
544
|
+
Called during graceful shutdown.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
True if unregistration succeeded, False otherwise.
|
|
548
|
+
"""
|
|
549
|
+
# Note: There's no explicit unregister_agent MCP tool currently,
|
|
550
|
+
# but the agent will be marked inactive when heartbeats stop (TTL).
|
|
551
|
+
agent_name = (
|
|
552
|
+
getattr(self, '_agent_discovery_name', None)
|
|
553
|
+
or self.config.agent_name
|
|
554
|
+
or self.config.worker_name
|
|
555
|
+
)
|
|
556
|
+
logger.debug(
|
|
557
|
+
f"Agent '{agent_name}' will be filtered from discovery after TTL expires"
|
|
558
|
+
)
|
|
559
|
+
return True
|
|
560
|
+
|
|
561
|
+
async def register_codebase(
|
|
562
|
+
self, name: str, path: str, description: str = ''
|
|
563
|
+
) -> Optional[str]:
|
|
564
|
+
"""Register a local codebase with the A2A server.
|
|
565
|
+
|
|
566
|
+
Returns the server-assigned codebase ID, or None on failure.
|
|
567
|
+
"""
|
|
568
|
+
# Validate path exists locally
|
|
569
|
+
if not os.path.isdir(path):
|
|
570
|
+
logger.error(f'Codebase path does not exist: {path}')
|
|
571
|
+
return None
|
|
572
|
+
|
|
573
|
+
# Normalize for comparisons / de-duping when re-registering.
|
|
574
|
+
normalized_path = os.path.abspath(os.path.expanduser(path))
|
|
575
|
+
|
|
576
|
+
try:
|
|
577
|
+
session = await self.get_session()
|
|
578
|
+
url = f'{self.config.server_url}/v1/opencode/codebases'
|
|
579
|
+
|
|
580
|
+
payload = {
|
|
581
|
+
'name': name,
|
|
582
|
+
'path': normalized_path,
|
|
583
|
+
'description': description,
|
|
584
|
+
'worker_id': self.config.worker_id, # Associate with this worker
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
async with session.post(url, json=payload) as resp:
|
|
588
|
+
if resp.status == 200:
|
|
589
|
+
data = await resp.json()
|
|
590
|
+
codebase_data = data.get('codebase', data)
|
|
591
|
+
codebase_id = codebase_data.get('id')
|
|
592
|
+
|
|
593
|
+
logger.info(
|
|
594
|
+
f"Registered codebase '{name}' (ID: {codebase_id}) at {path}"
|
|
595
|
+
)
|
|
596
|
+
return codebase_id
|
|
597
|
+
else:
|
|
598
|
+
text = await resp.text()
|
|
599
|
+
logger.error(
|
|
600
|
+
f'Failed to register codebase: {resp.status} - {text}'
|
|
601
|
+
)
|
|
602
|
+
return None
|
|
603
|
+
|
|
604
|
+
except Exception as e:
|
|
605
|
+
logger.error(f'Failed to register codebase: {e}')
|
|
606
|
+
return None
|
|
607
|
+
|
|
608
|
+
async def get_pending_tasks(
|
|
609
|
+
self, codebase_ids: List[str]
|
|
610
|
+
) -> List[Dict[str, Any]]:
|
|
611
|
+
"""Get pending tasks from the server (fallback polling method)."""
|
|
612
|
+
try:
|
|
613
|
+
session = await self.get_session()
|
|
614
|
+
|
|
615
|
+
url = f'{self.config.server_url}/v1/opencode/tasks'
|
|
616
|
+
params = {
|
|
617
|
+
'status': TaskStatus.PENDING,
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
async with session.get(url, params=params) as resp:
|
|
621
|
+
if resp.status == 200:
|
|
622
|
+
tasks = await resp.json()
|
|
623
|
+
return tasks
|
|
624
|
+
else:
|
|
625
|
+
return []
|
|
626
|
+
|
|
627
|
+
except Exception as e:
|
|
628
|
+
logger.debug(f'Failed to get pending tasks: {e}')
|
|
629
|
+
return []
|
|
630
|
+
|
|
631
|
+
async def claim_task(self, task_id: str) -> bool:
|
|
632
|
+
"""
|
|
633
|
+
Atomically claim a task on the server.
|
|
634
|
+
|
|
635
|
+
Returns True if claim succeeded, False if task was already claimed
|
|
636
|
+
by another worker.
|
|
637
|
+
"""
|
|
638
|
+
try:
|
|
639
|
+
session = await self.get_session()
|
|
640
|
+
url = f'{self.config.server_url}/v1/worker/tasks/claim'
|
|
641
|
+
|
|
642
|
+
# Build headers including auth token if available
|
|
643
|
+
headers = {'Content-Type': 'application/json'}
|
|
644
|
+
if self.config.auth_token:
|
|
645
|
+
headers['Authorization'] = f'Bearer {self.config.auth_token}'
|
|
646
|
+
headers['X-Worker-ID'] = self.config.worker_id
|
|
647
|
+
|
|
648
|
+
payload = {'task_id': task_id}
|
|
649
|
+
|
|
650
|
+
async with session.post(
|
|
651
|
+
url,
|
|
652
|
+
json=payload,
|
|
653
|
+
headers=headers,
|
|
654
|
+
timeout=aiohttp.ClientTimeout(total=10),
|
|
655
|
+
) as resp:
|
|
656
|
+
if resp.status == 200:
|
|
657
|
+
logger.info(f'Successfully claimed task {task_id}')
|
|
658
|
+
return True
|
|
659
|
+
elif resp.status == 409:
|
|
660
|
+
# Task already claimed by another worker
|
|
661
|
+
logger.debug(
|
|
662
|
+
f'Task {task_id} already claimed by another worker'
|
|
663
|
+
)
|
|
664
|
+
return False
|
|
665
|
+
else:
|
|
666
|
+
text = await resp.text()
|
|
667
|
+
logger.warning(
|
|
668
|
+
f'Failed to claim task {task_id}: {resp.status} - {text}'
|
|
669
|
+
)
|
|
670
|
+
# On unexpected errors, don't process to be safe
|
|
671
|
+
return False
|
|
672
|
+
|
|
673
|
+
except Exception as e:
|
|
674
|
+
logger.warning(f'Error claiming task {task_id}: {e}')
|
|
675
|
+
# On network errors, don't process to avoid potential duplicates
|
|
676
|
+
return False
|
|
677
|
+
|
|
678
|
+
async def release_task(self, task_id: str) -> bool:
|
|
679
|
+
"""
|
|
680
|
+
Release a task claim on the server after processing.
|
|
681
|
+
|
|
682
|
+
This notifies the server that the worker is done with the task
|
|
683
|
+
(whether successful or failed).
|
|
684
|
+
"""
|
|
685
|
+
try:
|
|
686
|
+
session = await self.get_session()
|
|
687
|
+
url = f'{self.config.server_url}/v1/worker/tasks/release'
|
|
688
|
+
|
|
689
|
+
# Build headers including auth token if available
|
|
690
|
+
headers = {'Content-Type': 'application/json'}
|
|
691
|
+
if self.config.auth_token:
|
|
692
|
+
headers['Authorization'] = f'Bearer {self.config.auth_token}'
|
|
693
|
+
headers['X-Worker-ID'] = self.config.worker_id
|
|
694
|
+
|
|
695
|
+
payload = {'task_id': task_id}
|
|
696
|
+
|
|
697
|
+
async with session.post(
|
|
698
|
+
url,
|
|
699
|
+
json=payload,
|
|
700
|
+
headers=headers,
|
|
701
|
+
timeout=aiohttp.ClientTimeout(total=10),
|
|
702
|
+
) as resp:
|
|
703
|
+
if resp.status == 200:
|
|
704
|
+
logger.debug(f'Released task {task_id}')
|
|
705
|
+
return True
|
|
706
|
+
else:
|
|
707
|
+
text = await resp.text()
|
|
708
|
+
logger.debug(
|
|
709
|
+
f'Failed to release task {task_id}: {resp.status} - {text}'
|
|
710
|
+
)
|
|
711
|
+
return False
|
|
712
|
+
|
|
713
|
+
except Exception as e:
|
|
714
|
+
logger.debug(f'Error releasing task {task_id}: {e}')
|
|
715
|
+
return False
|
|
716
|
+
|
|
717
|
+
async def stream_task_output(self, task_id: str, output: str):
|
|
718
|
+
"""Stream output chunk to the server."""
|
|
719
|
+
if not output:
|
|
720
|
+
return
|
|
721
|
+
try:
|
|
722
|
+
session = await self.get_session()
|
|
723
|
+
url = f'{self.config.server_url}/v1/opencode/tasks/{task_id}/output'
|
|
724
|
+
|
|
725
|
+
payload = {
|
|
726
|
+
'worker_id': self.config.worker_id,
|
|
727
|
+
'output': output,
|
|
728
|
+
'timestamp': datetime.now().isoformat(),
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
async with session.post(url, json=payload) as resp:
|
|
732
|
+
if resp.status != 200:
|
|
733
|
+
logger.debug(f'Failed to stream output: {resp.status}')
|
|
734
|
+
except Exception as e:
|
|
735
|
+
logger.debug(f'Failed to stream output: {e}')
|
|
736
|
+
|
|
737
|
+
async def update_task_status(
|
|
738
|
+
self,
|
|
739
|
+
task_id: str,
|
|
740
|
+
status: str,
|
|
741
|
+
result: Optional[str] = None,
|
|
742
|
+
error: Optional[str] = None,
|
|
743
|
+
session_id: Optional[str] = None,
|
|
744
|
+
max_retries: int = 4,
|
|
745
|
+
base_delay: float = 1.0,
|
|
746
|
+
):
|
|
747
|
+
"""Update task status on the server with exponential backoff retry.
|
|
748
|
+
|
|
749
|
+
Status updates are critical for maintaining consistency between worker
|
|
750
|
+
and server state. This method retries failed updates with exponential
|
|
751
|
+
backoff to handle transient network issues.
|
|
752
|
+
|
|
753
|
+
The operation is idempotent - multiple updates to the same status are
|
|
754
|
+
safe as the server will simply acknowledge the current state.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
task_id: The task ID to update
|
|
758
|
+
status: New status value
|
|
759
|
+
result: Optional result data
|
|
760
|
+
error: Optional error message
|
|
761
|
+
session_id: Optional session ID
|
|
762
|
+
max_retries: Maximum number of retry attempts (default: 4, total 5 attempts)
|
|
763
|
+
base_delay: Initial delay in seconds before first retry (default: 1.0)
|
|
764
|
+
"""
|
|
765
|
+
url = f'{self.config.server_url}/v1/opencode/tasks/{task_id}/status'
|
|
766
|
+
|
|
767
|
+
payload = {
|
|
768
|
+
'status': status,
|
|
769
|
+
'worker_id': self.config.worker_id,
|
|
770
|
+
}
|
|
771
|
+
if session_id:
|
|
772
|
+
payload['session_id'] = session_id
|
|
773
|
+
if result:
|
|
774
|
+
payload['result'] = result
|
|
775
|
+
if error:
|
|
776
|
+
payload['error'] = error
|
|
777
|
+
|
|
778
|
+
last_exception: Optional[Exception] = None
|
|
779
|
+
last_status_code: Optional[int] = None
|
|
780
|
+
last_response_text: Optional[str] = None
|
|
781
|
+
|
|
782
|
+
for attempt in range(max_retries + 1):
|
|
783
|
+
try:
|
|
784
|
+
session = await self.get_session()
|
|
785
|
+
async with session.put(url, json=payload) as resp:
|
|
786
|
+
if resp.status == 200:
|
|
787
|
+
if attempt > 0:
|
|
788
|
+
logger.info(
|
|
789
|
+
f'Task {task_id} status update to "{status}" succeeded on retry {attempt}'
|
|
790
|
+
)
|
|
791
|
+
return # Success
|
|
792
|
+
|
|
793
|
+
last_status_code = resp.status
|
|
794
|
+
last_response_text = await resp.text()
|
|
795
|
+
|
|
796
|
+
# Don't retry client errors (4xx) except 429 (rate limit)
|
|
797
|
+
if 400 <= resp.status < 500 and resp.status != 429:
|
|
798
|
+
logger.warning(
|
|
799
|
+
f'Task {task_id} status update failed with client error: '
|
|
800
|
+
f'{resp.status} - {last_response_text}'
|
|
801
|
+
)
|
|
802
|
+
return # Don't retry client errors
|
|
803
|
+
|
|
804
|
+
except asyncio.CancelledError:
|
|
805
|
+
raise # Don't retry on cancellation
|
|
806
|
+
except Exception as e:
|
|
807
|
+
last_exception = e
|
|
808
|
+
|
|
809
|
+
# Calculate delay with exponential backoff (1s, 2s, 4s, 8s)
|
|
810
|
+
if attempt < max_retries:
|
|
811
|
+
delay = base_delay * (2**attempt)
|
|
812
|
+
logger.warning(
|
|
813
|
+
f'Task {task_id} status update to "{status}" failed '
|
|
814
|
+
f'(attempt {attempt + 1}/{max_retries + 1}), '
|
|
815
|
+
f'retrying in {delay:.1f}s...'
|
|
816
|
+
)
|
|
817
|
+
await asyncio.sleep(delay)
|
|
818
|
+
|
|
819
|
+
# All retries exhausted - log the final failure
|
|
820
|
+
if last_exception:
|
|
821
|
+
logger.error(
|
|
822
|
+
f'Task {task_id} status update to "{status}" failed after '
|
|
823
|
+
f'{max_retries + 1} attempts. Last error: {last_exception}'
|
|
824
|
+
)
|
|
825
|
+
elif last_status_code:
|
|
826
|
+
logger.error(
|
|
827
|
+
f'Task {task_id} status update to "{status}" failed after '
|
|
828
|
+
f'{max_retries + 1} attempts. Last response: {last_status_code} - '
|
|
829
|
+
f'{last_response_text}'
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
async def sync_api_keys_from_server(
|
|
833
|
+
self, user_id: Optional[str] = None
|
|
834
|
+
) -> bool:
|
|
835
|
+
"""
|
|
836
|
+
Sync API keys from the server (Vault-backed) to local OpenCode auth.json.
|
|
837
|
+
|
|
838
|
+
This allows users to manage their API keys in the web UI and have them
|
|
839
|
+
automatically synced to workers.
|
|
840
|
+
|
|
841
|
+
Args:
|
|
842
|
+
user_id: Optional user ID to sync keys for. If not provided,
|
|
843
|
+
syncs keys for the codebase owner.
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
True if sync was successful, False otherwise.
|
|
847
|
+
"""
|
|
848
|
+
try:
|
|
849
|
+
session = await self.get_session()
|
|
850
|
+
|
|
851
|
+
# Build sync URL with optional user_id
|
|
852
|
+
sync_url = f'{self.config.server_url}/v1/opencode/api-keys/sync'
|
|
853
|
+
params = {'worker_id': self.config.worker_id}
|
|
854
|
+
if user_id:
|
|
855
|
+
params['user_id'] = user_id
|
|
856
|
+
|
|
857
|
+
async with session.get(sync_url, params=params) as resp:
|
|
858
|
+
if resp.status != 200:
|
|
859
|
+
logger.warning(
|
|
860
|
+
f'Failed to sync API keys: HTTP {resp.status}'
|
|
861
|
+
)
|
|
862
|
+
return False
|
|
863
|
+
|
|
864
|
+
data = await resp.json()
|
|
865
|
+
|
|
866
|
+
# Get paths for auth.json and opencode.json
|
|
867
|
+
data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
|
|
868
|
+
'~/.local/share'
|
|
869
|
+
)
|
|
870
|
+
config_home = os.environ.get(
|
|
871
|
+
'XDG_CONFIG_HOME'
|
|
872
|
+
) or os.path.expanduser('~/.config')
|
|
873
|
+
|
|
874
|
+
auth_path = Path(data_home) / 'opencode' / 'auth.json'
|
|
875
|
+
config_path = Path(config_home) / 'opencode' / 'opencode.json'
|
|
876
|
+
|
|
877
|
+
# Merge server keys with existing local auth.json
|
|
878
|
+
server_auth = data.get('auth', {})
|
|
879
|
+
if server_auth:
|
|
880
|
+
existing_auth = {}
|
|
881
|
+
if auth_path.exists():
|
|
882
|
+
try:
|
|
883
|
+
with open(auth_path, 'r', encoding='utf-8') as f:
|
|
884
|
+
existing_auth = json.load(f)
|
|
885
|
+
except Exception as e:
|
|
886
|
+
logger.warning(
|
|
887
|
+
f'Failed to read existing auth.json: {e}'
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
# Merge: server keys override local for same provider
|
|
891
|
+
merged_auth = {**existing_auth, **server_auth}
|
|
892
|
+
|
|
893
|
+
# Write merged auth
|
|
894
|
+
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
|
895
|
+
with open(auth_path, 'w', encoding='utf-8') as f:
|
|
896
|
+
json.dump(merged_auth, f, indent=2)
|
|
897
|
+
|
|
898
|
+
logger.info(
|
|
899
|
+
f'Synced {len(server_auth)} API keys from server '
|
|
900
|
+
f'(total: {len(merged_auth)} providers)'
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
# Merge server provider configs with existing opencode.json
|
|
904
|
+
server_providers = data.get('providers', {})
|
|
905
|
+
if server_providers:
|
|
906
|
+
existing_config = {}
|
|
907
|
+
if config_path.exists():
|
|
908
|
+
try:
|
|
909
|
+
with open(config_path, 'r', encoding='utf-8') as f:
|
|
910
|
+
existing_config = json.load(f)
|
|
911
|
+
except Exception as e:
|
|
912
|
+
logger.warning(
|
|
913
|
+
f'Failed to read existing opencode.json: {e}'
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# Merge provider configs
|
|
917
|
+
existing_providers = existing_config.get('provider', {})
|
|
918
|
+
merged_providers = {**existing_providers, **server_providers}
|
|
919
|
+
existing_config['provider'] = merged_providers
|
|
920
|
+
|
|
921
|
+
# Write merged config
|
|
922
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
923
|
+
with open(config_path, 'w', encoding='utf-8') as f:
|
|
924
|
+
json.dump(existing_config, f, indent=2)
|
|
925
|
+
|
|
926
|
+
logger.info(
|
|
927
|
+
f'Synced {len(server_providers)} provider configs from server'
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
return True
|
|
931
|
+
|
|
932
|
+
except Exception as e:
|
|
933
|
+
logger.error(f'Failed to sync API keys from server: {e}')
|
|
934
|
+
return False
|
|
935
|
+
|
|
936
|
+
async def sync_sessions(
|
|
937
|
+
self,
|
|
938
|
+
codebase_id: str,
|
|
939
|
+
sessions: List[Dict[str, Any]],
|
|
940
|
+
) -> int:
|
|
941
|
+
"""Sync sessions to the server for a codebase.
|
|
942
|
+
|
|
943
|
+
Returns the HTTP status code.
|
|
944
|
+
"""
|
|
945
|
+
try:
|
|
946
|
+
session = await self.get_session()
|
|
947
|
+
url = f'{self.config.server_url}/v1/opencode/codebases/{codebase_id}/sessions/sync'
|
|
948
|
+
payload = {
|
|
949
|
+
'worker_id': self.config.worker_id,
|
|
950
|
+
'sessions': sessions,
|
|
951
|
+
}
|
|
952
|
+
async with session.post(url, json=payload) as resp:
|
|
953
|
+
if resp.status == 200:
|
|
954
|
+
logger.debug(
|
|
955
|
+
f'Synced {len(sessions)} sessions (codebase_id={codebase_id})'
|
|
956
|
+
)
|
|
957
|
+
else:
|
|
958
|
+
text = await resp.text()
|
|
959
|
+
logger.warning(
|
|
960
|
+
f'Session sync failed for codebase_id={codebase_id}: {resp.status} {text[:200]}'
|
|
961
|
+
)
|
|
962
|
+
return resp.status
|
|
963
|
+
except Exception as e:
|
|
964
|
+
logger.debug(f'Failed to sync sessions: {e}')
|
|
965
|
+
return 0
|
|
966
|
+
|
|
967
|
+
async def sync_session_messages(
|
|
968
|
+
self,
|
|
969
|
+
codebase_id: str,
|
|
970
|
+
session_id: str,
|
|
971
|
+
messages: List[Dict[str, Any]],
|
|
972
|
+
) -> bool:
|
|
973
|
+
"""Sync messages for a single session. Returns True on HTTP 200."""
|
|
974
|
+
try:
|
|
975
|
+
if not messages:
|
|
976
|
+
return False
|
|
977
|
+
|
|
978
|
+
session = await self.get_session()
|
|
979
|
+
url = (
|
|
980
|
+
f'{self.config.server_url}/v1/opencode/codebases/{codebase_id}'
|
|
981
|
+
f'/sessions/{session_id}/messages/sync'
|
|
982
|
+
)
|
|
983
|
+
payload = {
|
|
984
|
+
'worker_id': self.config.worker_id,
|
|
985
|
+
'messages': messages,
|
|
986
|
+
}
|
|
987
|
+
async with session.post(url, json=payload) as resp:
|
|
988
|
+
if resp.status == 200:
|
|
989
|
+
logger.debug(
|
|
990
|
+
f'Synced {len(messages)} messages for session {session_id}'
|
|
991
|
+
)
|
|
992
|
+
return True
|
|
993
|
+
else:
|
|
994
|
+
text = await resp.text()
|
|
995
|
+
logger.debug(f'Message sync returned {resp.status}: {text}')
|
|
996
|
+
return False
|
|
997
|
+
except Exception as e:
|
|
998
|
+
logger.debug(f'Message sync failed for session {session_id}: {e}')
|
|
999
|
+
return False
|
|
1000
|
+
|
|
1001
|
+
@property
|
|
1002
|
+
def sse_connected(self) -> bool:
|
|
1003
|
+
return self._sse_connected
|
|
1004
|
+
|
|
1005
|
+
@sse_connected.setter
|
|
1006
|
+
def sse_connected(self, value: bool):
|
|
1007
|
+
self._sse_connected = value
|
|
1008
|
+
|
|
1009
|
+
@property
|
|
1010
|
+
def sse_reconnect_delay(self) -> float:
|
|
1011
|
+
return self._sse_reconnect_delay
|
|
1012
|
+
|
|
1013
|
+
@sse_reconnect_delay.setter
|
|
1014
|
+
def sse_reconnect_delay(self, value: float):
|
|
1015
|
+
self._sse_reconnect_delay = value
|
|
1016
|
+
|
|
1017
|
+
@property
|
|
1018
|
+
def last_heartbeat(self) -> float:
|
|
1019
|
+
return self._last_heartbeat
|
|
1020
|
+
|
|
1021
|
+
@last_heartbeat.setter
|
|
1022
|
+
def last_heartbeat(self, value: float):
|
|
1023
|
+
self._last_heartbeat = value
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
# =============================================================================
|
|
1027
|
+
# EmailNotificationService - SendGrid email notifications
|
|
1028
|
+
# =============================================================================
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def _sanitize_email_for_log(email: str) -> str:
|
|
1032
|
+
"""
|
|
1033
|
+
Sanitize email address for logging (mask local part).
|
|
1034
|
+
|
|
1035
|
+
Security: Prevents sensitive email addresses from appearing in logs.
|
|
1036
|
+
|
|
1037
|
+
Args:
|
|
1038
|
+
email: Full email address
|
|
1039
|
+
|
|
1040
|
+
Returns:
|
|
1041
|
+
Sanitized email (e.g., 'r***y@example.com')
|
|
1042
|
+
"""
|
|
1043
|
+
if not email or '@' not in email:
|
|
1044
|
+
return '***@***'
|
|
1045
|
+
local, domain = email.rsplit('@', 1)
|
|
1046
|
+
if len(local) <= 2:
|
|
1047
|
+
return f'***@{domain}'
|
|
1048
|
+
return f'{local[0]}***{local[-1]}@{domain}'
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
class EmailNotificationService:
|
|
1052
|
+
"""
|
|
1053
|
+
Handles email notifications via SendGrid.
|
|
1054
|
+
|
|
1055
|
+
Sends task completion/failure reports to configured recipients.
|
|
1056
|
+
|
|
1057
|
+
Supports dry-run mode for testing (logs instead of sends) and
|
|
1058
|
+
verbose logging for debugging email operations.
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1061
|
+
def __init__(self, config: WorkerConfig):
|
|
1062
|
+
self.config = config
|
|
1063
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
|
1064
|
+
self._dry_run = config.email_dry_run
|
|
1065
|
+
self._verbose = config.email_verbose
|
|
1066
|
+
|
|
1067
|
+
def is_configured(self) -> bool:
|
|
1068
|
+
"""Check if email notifications are properly configured."""
|
|
1069
|
+
return bool(
|
|
1070
|
+
self.config.sendgrid_api_key
|
|
1071
|
+
and self.config.sendgrid_from_email
|
|
1072
|
+
and self.config.notification_email
|
|
1073
|
+
)
|
|
1074
|
+
|
|
1075
|
+
def get_config_status(self) -> Dict[str, Any]:
|
|
1076
|
+
"""
|
|
1077
|
+
Get email configuration status for debugging.
|
|
1078
|
+
|
|
1079
|
+
Returns dict with configuration details and any issues.
|
|
1080
|
+
"""
|
|
1081
|
+
issues = []
|
|
1082
|
+
|
|
1083
|
+
if not self.config.sendgrid_api_key:
|
|
1084
|
+
issues.append('SENDGRID_API_KEY not set')
|
|
1085
|
+
elif not self.config.sendgrid_api_key.startswith('SG.'):
|
|
1086
|
+
issues.append(
|
|
1087
|
+
'SENDGRID_API_KEY does not appear valid (should start with SG.)'
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
if not self.config.sendgrid_from_email:
|
|
1091
|
+
issues.append('SENDGRID_FROM_EMAIL not set')
|
|
1092
|
+
elif '@' not in self.config.sendgrid_from_email:
|
|
1093
|
+
issues.append(
|
|
1094
|
+
'SENDGRID_FROM_EMAIL does not appear to be a valid email'
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
if not self.config.notification_email:
|
|
1098
|
+
issues.append('notification_email not set')
|
|
1099
|
+
|
|
1100
|
+
return {
|
|
1101
|
+
'configured': self.is_configured(),
|
|
1102
|
+
'dry_run': self._dry_run,
|
|
1103
|
+
'verbose': self._verbose,
|
|
1104
|
+
'sendgrid_api_key_set': bool(self.config.sendgrid_api_key),
|
|
1105
|
+
'sendgrid_from_email': _sanitize_email_for_log(
|
|
1106
|
+
self.config.sendgrid_from_email or ''
|
|
1107
|
+
),
|
|
1108
|
+
'notification_email': _sanitize_email_for_log(
|
|
1109
|
+
self.config.notification_email or ''
|
|
1110
|
+
),
|
|
1111
|
+
'inbound_domain': self.config.email_inbound_domain,
|
|
1112
|
+
'reply_prefix': self.config.email_reply_prefix,
|
|
1113
|
+
'issues': issues,
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
async def _get_session(self) -> aiohttp.ClientSession:
|
|
1117
|
+
"""Get or create HTTP session for SendGrid API."""
|
|
1118
|
+
if self._session is None or self._session.closed:
|
|
1119
|
+
self._session = aiohttp.ClientSession(
|
|
1120
|
+
timeout=aiohttp.ClientTimeout(total=30)
|
|
1121
|
+
)
|
|
1122
|
+
return self._session
|
|
1123
|
+
|
|
1124
|
+
async def close(self):
|
|
1125
|
+
"""Close the HTTP session."""
|
|
1126
|
+
if self._session and not self._session.closed:
|
|
1127
|
+
await self._session.close()
|
|
1128
|
+
|
|
1129
|
+
def _build_reply_to_address(
|
|
1130
|
+
self,
|
|
1131
|
+
session_id: Optional[str],
|
|
1132
|
+
codebase_id: Optional[str] = None,
|
|
1133
|
+
) -> Optional[str]:
|
|
1134
|
+
"""
|
|
1135
|
+
Build the reply-to address for email replies to continue tasks.
|
|
1136
|
+
|
|
1137
|
+
Format: {prefix}+{session_id}@{domain}
|
|
1138
|
+
Or: {prefix}+{session_id}+{codebase_id}@{domain}
|
|
1139
|
+
"""
|
|
1140
|
+
if not session_id:
|
|
1141
|
+
return None
|
|
1142
|
+
if not self.config.email_inbound_domain:
|
|
1143
|
+
return None
|
|
1144
|
+
|
|
1145
|
+
prefix = self.config.email_reply_prefix or 'task'
|
|
1146
|
+
domain = self.config.email_inbound_domain
|
|
1147
|
+
|
|
1148
|
+
if codebase_id:
|
|
1149
|
+
return f'{prefix}+{session_id}+{codebase_id}@{domain}'
|
|
1150
|
+
return f'{prefix}+{session_id}@{domain}'
|
|
1151
|
+
|
|
1152
|
+
async def send_task_report(
|
|
1153
|
+
self,
|
|
1154
|
+
task_id: str,
|
|
1155
|
+
title: str,
|
|
1156
|
+
status: str,
|
|
1157
|
+
result: Optional[str] = None,
|
|
1158
|
+
error: Optional[str] = None,
|
|
1159
|
+
duration_ms: Optional[int] = None,
|
|
1160
|
+
session_id: Optional[str] = None,
|
|
1161
|
+
codebase_id: Optional[str] = None,
|
|
1162
|
+
) -> bool:
|
|
1163
|
+
"""Send a task completion/failure email report.
|
|
1164
|
+
|
|
1165
|
+
Returns True if email was sent successfully (or logged in dry-run mode).
|
|
1166
|
+
|
|
1167
|
+
In dry-run mode (--email-dry-run), emails are logged instead of sent.
|
|
1168
|
+
In verbose mode (--email-verbose), additional debugging info is logged.
|
|
1169
|
+
"""
|
|
1170
|
+
if self._verbose:
|
|
1171
|
+
logger.info(
|
|
1172
|
+
f'[EMAIL-DEBUG] send_task_report called: task_id={task_id}, '
|
|
1173
|
+
f'status={status}, session_id={session_id}'
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
if not self.is_configured():
|
|
1177
|
+
if self._verbose:
|
|
1178
|
+
config_status = self.get_config_status()
|
|
1179
|
+
logger.info(
|
|
1180
|
+
f'[EMAIL-DEBUG] Not configured: {config_status["issues"]}'
|
|
1181
|
+
)
|
|
1182
|
+
else:
|
|
1183
|
+
logger.debug('Email notifications not configured, skipping')
|
|
1184
|
+
return False
|
|
1185
|
+
|
|
1186
|
+
try:
|
|
1187
|
+
session = await self._get_session()
|
|
1188
|
+
|
|
1189
|
+
# Format duration
|
|
1190
|
+
duration_str = 'N/A'
|
|
1191
|
+
if duration_ms:
|
|
1192
|
+
seconds = duration_ms // 1000
|
|
1193
|
+
minutes = seconds // 60
|
|
1194
|
+
if minutes > 0:
|
|
1195
|
+
duration_str = f'{minutes}m {seconds % 60}s'
|
|
1196
|
+
else:
|
|
1197
|
+
duration_str = f'{seconds}s'
|
|
1198
|
+
|
|
1199
|
+
# Build email content
|
|
1200
|
+
status_color = '#22c55e' if status == 'completed' else '#ef4444'
|
|
1201
|
+
status_icon = '✓' if status == 'completed' else '✗'
|
|
1202
|
+
|
|
1203
|
+
result_section = ''
|
|
1204
|
+
if result and status == 'completed':
|
|
1205
|
+
# Try to parse and extract meaningful content
|
|
1206
|
+
display_result = result
|
|
1207
|
+
import json as json_module
|
|
1208
|
+
import html as html_module
|
|
1209
|
+
|
|
1210
|
+
# Handle NDJSON (newline-delimited JSON) from OpenCode streaming
|
|
1211
|
+
# Extract text content from streaming events
|
|
1212
|
+
text_parts = []
|
|
1213
|
+
try:
|
|
1214
|
+
lines = result.strip().split('\n')
|
|
1215
|
+
for line in lines:
|
|
1216
|
+
line = line.strip()
|
|
1217
|
+
if not line:
|
|
1218
|
+
continue
|
|
1219
|
+
try:
|
|
1220
|
+
parsed = json_module.loads(line)
|
|
1221
|
+
# OpenCode streaming format: look for text events
|
|
1222
|
+
if isinstance(parsed, dict):
|
|
1223
|
+
event_type = parsed.get('type', '')
|
|
1224
|
+
part = parsed.get('part', {})
|
|
1225
|
+
|
|
1226
|
+
# Extract text from "text" type events
|
|
1227
|
+
if event_type == 'text' and isinstance(
|
|
1228
|
+
part, dict
|
|
1229
|
+
):
|
|
1230
|
+
text = part.get('text', '')
|
|
1231
|
+
if text:
|
|
1232
|
+
text_parts.append(text)
|
|
1233
|
+
# Also check for direct text field
|
|
1234
|
+
elif 'text' in parsed and isinstance(
|
|
1235
|
+
parsed['text'], str
|
|
1236
|
+
):
|
|
1237
|
+
text_parts.append(parsed['text'])
|
|
1238
|
+
# Check for result/output/message fields
|
|
1239
|
+
elif 'result' in parsed:
|
|
1240
|
+
text_parts.append(str(parsed['result']))
|
|
1241
|
+
elif 'output' in parsed:
|
|
1242
|
+
text_parts.append(str(parsed['output']))
|
|
1243
|
+
elif 'message' in parsed and isinstance(
|
|
1244
|
+
parsed['message'], str
|
|
1245
|
+
):
|
|
1246
|
+
text_parts.append(parsed['message'])
|
|
1247
|
+
except json_module.JSONDecodeError:
|
|
1248
|
+
# Not JSON, might be plain text
|
|
1249
|
+
if line and not line.startswith('{'):
|
|
1250
|
+
text_parts.append(line)
|
|
1251
|
+
|
|
1252
|
+
if text_parts:
|
|
1253
|
+
# Join all extracted text
|
|
1254
|
+
display_result = ' '.join(text_parts)
|
|
1255
|
+
else:
|
|
1256
|
+
# Fallback: try parsing as single JSON
|
|
1257
|
+
try:
|
|
1258
|
+
parsed = json_module.loads(result)
|
|
1259
|
+
if isinstance(parsed, dict):
|
|
1260
|
+
for key in [
|
|
1261
|
+
'result',
|
|
1262
|
+
'output',
|
|
1263
|
+
'message',
|
|
1264
|
+
'content',
|
|
1265
|
+
'response',
|
|
1266
|
+
'text',
|
|
1267
|
+
]:
|
|
1268
|
+
if key in parsed:
|
|
1269
|
+
display_result = str(parsed[key])
|
|
1270
|
+
break
|
|
1271
|
+
except json_module.JSONDecodeError:
|
|
1272
|
+
pass
|
|
1273
|
+
|
|
1274
|
+
except Exception:
|
|
1275
|
+
# If all parsing fails, use as-is
|
|
1276
|
+
pass
|
|
1277
|
+
|
|
1278
|
+
# Escape HTML for safety
|
|
1279
|
+
display_result = html_module.escape(display_result)
|
|
1280
|
+
|
|
1281
|
+
# Convert newlines to <br> for display
|
|
1282
|
+
display_result = display_result.replace('\n', '<br>')
|
|
1283
|
+
|
|
1284
|
+
truncated = (
|
|
1285
|
+
display_result[:3000] + '...'
|
|
1286
|
+
if len(display_result) > 3000
|
|
1287
|
+
else display_result
|
|
1288
|
+
)
|
|
1289
|
+
result_section = f"""
|
|
1290
|
+
<tr>
|
|
1291
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px; vertical-align: top;">Output</td>
|
|
1292
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">
|
|
1293
|
+
<div style="font-size: 14px; line-height: 1.6; color: #1f2937;">{truncated}</div>
|
|
1294
|
+
</td>
|
|
1295
|
+
</tr>"""
|
|
1296
|
+
|
|
1297
|
+
error_section = ''
|
|
1298
|
+
if error:
|
|
1299
|
+
truncated = error[:1000] + '...' if len(error) > 1000 else error
|
|
1300
|
+
error_section = f"""
|
|
1301
|
+
<tr>
|
|
1302
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px;">Error</td>
|
|
1303
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">
|
|
1304
|
+
<pre style="margin: 0; white-space: pre-wrap; word-break: break-word; font-family: monospace; font-size: 13px; background: #fef2f2; padding: 12px; border-radius: 6px; color: #dc2626;">{truncated}</pre>
|
|
1305
|
+
</td>
|
|
1306
|
+
</tr>"""
|
|
1307
|
+
|
|
1308
|
+
# Build footer with reply instructions if email reply is configured
|
|
1309
|
+
reply_enabled = bool(
|
|
1310
|
+
self.config.email_inbound_domain and session_id
|
|
1311
|
+
)
|
|
1312
|
+
if reply_enabled:
|
|
1313
|
+
footer_html = f"""
|
|
1314
|
+
<div style="background: #f9fafb; padding: 16px; text-align: center;">
|
|
1315
|
+
<p style="margin: 0 0 8px 0; font-size: 13px; color: #374151; font-weight: 500;">
|
|
1316
|
+
Reply to this email to continue the conversation
|
|
1317
|
+
</p>
|
|
1318
|
+
<p style="margin: 0; font-size: 12px; color: #6b7280;">
|
|
1319
|
+
Your reply will be sent to the worker to continue working on this task.
|
|
1320
|
+
</p>
|
|
1321
|
+
<p style="margin: 8px 0 0 0; font-size: 11px; color: #9ca3af;">
|
|
1322
|
+
Sent by A2A Worker - {self.config.worker_name}
|
|
1323
|
+
</p>
|
|
1324
|
+
</div>"""
|
|
1325
|
+
else:
|
|
1326
|
+
footer_html = f"""
|
|
1327
|
+
<div style="background: #f9fafb; padding: 16px; text-align: center; font-size: 12px; color: #6b7280;">
|
|
1328
|
+
Sent by A2A Worker - {self.config.worker_name}
|
|
1329
|
+
</div>"""
|
|
1330
|
+
|
|
1331
|
+
html = f"""
|
|
1332
|
+
<!DOCTYPE html>
|
|
1333
|
+
<html>
|
|
1334
|
+
<head>
|
|
1335
|
+
<meta charset="utf-8">
|
|
1336
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
1337
|
+
</head>
|
|
1338
|
+
<body style="margin: 0; padding: 20px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; background-color: #f3f4f6;">
|
|
1339
|
+
<div style="max-width: 600px; margin: 0 auto; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
|
|
1340
|
+
<div style="background: linear-gradient(135deg, #1e293b 0%, #334155 100%); padding: 24px; text-align: center;">
|
|
1341
|
+
<h1 style="margin: 0; color: white; font-size: 20px; font-weight: 600;">A2A Task Report</h1>
|
|
1342
|
+
</div>
|
|
1343
|
+
<div style="padding: 24px;">
|
|
1344
|
+
<div style="display: inline-block; padding: 6px 12px; border-radius: 20px; background: {status_color}20; color: {status_color}; font-weight: 600; font-size: 14px; margin-bottom: 16px;">
|
|
1345
|
+
{status_icon} {status.upper()}
|
|
1346
|
+
</div>
|
|
1347
|
+
<table style="width: 100%; border-collapse: collapse; margin-top: 16px;">
|
|
1348
|
+
<tr>
|
|
1349
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px;">Task ID</td>
|
|
1350
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-family: monospace; font-size: 13px;">{task_id}</td>
|
|
1351
|
+
</tr>
|
|
1352
|
+
<tr>
|
|
1353
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Title</td>
|
|
1354
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{title}</td>
|
|
1355
|
+
</tr>
|
|
1356
|
+
<tr>
|
|
1357
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Session ID</td>
|
|
1358
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-family: monospace; font-size: 13px;">{session_id or 'N/A'}</td>
|
|
1359
|
+
</tr>
|
|
1360
|
+
<tr>
|
|
1361
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Worker</td>
|
|
1362
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{self.config.worker_name}</td>
|
|
1363
|
+
</tr>
|
|
1364
|
+
<tr>
|
|
1365
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Duration</td>
|
|
1366
|
+
<td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{duration_str}</td>
|
|
1367
|
+
</tr>
|
|
1368
|
+
{result_section}
|
|
1369
|
+
{error_section}
|
|
1370
|
+
</table>
|
|
1371
|
+
</div>
|
|
1372
|
+
{footer_html}
|
|
1373
|
+
</div>
|
|
1374
|
+
</body>
|
|
1375
|
+
</html>"""
|
|
1376
|
+
|
|
1377
|
+
subject = f'[A2A] Task {status}: {title}'
|
|
1378
|
+
|
|
1379
|
+
payload = {
|
|
1380
|
+
'personalizations': [
|
|
1381
|
+
{'to': [{'email': self.config.notification_email}]}
|
|
1382
|
+
],
|
|
1383
|
+
'from': {'email': self.config.sendgrid_from_email},
|
|
1384
|
+
'subject': subject,
|
|
1385
|
+
'content': [{'type': 'text/html', 'value': html}],
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
# Add reply-to address if configured for email reply continuation
|
|
1389
|
+
reply_to = self._build_reply_to_address(session_id, codebase_id)
|
|
1390
|
+
if reply_to:
|
|
1391
|
+
payload['reply_to'] = {'email': reply_to}
|
|
1392
|
+
if self._verbose:
|
|
1393
|
+
logger.info(f'[EMAIL-DEBUG] Reply-to set to: {reply_to}')
|
|
1394
|
+
else:
|
|
1395
|
+
logger.debug(f'Email reply-to set to: {reply_to}')
|
|
1396
|
+
|
|
1397
|
+
# Verbose logging of email details (with sanitized addresses)
|
|
1398
|
+
if self._verbose:
|
|
1399
|
+
logger.info(
|
|
1400
|
+
f'[EMAIL-DEBUG] Email payload: subject="{subject}", '
|
|
1401
|
+
f'from={_sanitize_email_for_log(self.config.sendgrid_from_email or "")}, '
|
|
1402
|
+
f'to={_sanitize_email_for_log(self.config.notification_email or "")}, '
|
|
1403
|
+
f'reply_to={reply_to or "none"}'
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
# Dry-run mode: log instead of sending
|
|
1407
|
+
if self._dry_run:
|
|
1408
|
+
logger.info(
|
|
1409
|
+
f'[EMAIL-DRY-RUN] Would send email for task {task_id}:\n'
|
|
1410
|
+
f' Subject: {subject}\n'
|
|
1411
|
+
f' To: {_sanitize_email_for_log(self.config.notification_email or "")}\n'
|
|
1412
|
+
f' From: {_sanitize_email_for_log(self.config.sendgrid_from_email or "")}\n'
|
|
1413
|
+
f' Reply-To: {reply_to or "none"}\n'
|
|
1414
|
+
f' Status: {status}'
|
|
1415
|
+
)
|
|
1416
|
+
return True # Return success in dry-run mode
|
|
1417
|
+
|
|
1418
|
+
headers = {
|
|
1419
|
+
'Authorization': f'Bearer {self.config.sendgrid_api_key}',
|
|
1420
|
+
'Content-Type': 'application/json',
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
if self._verbose:
|
|
1424
|
+
logger.info('[EMAIL-DEBUG] Sending to SendGrid API...')
|
|
1425
|
+
|
|
1426
|
+
async with session.post(
|
|
1427
|
+
'https://api.sendgrid.com/v3/mail/send',
|
|
1428
|
+
json=payload,
|
|
1429
|
+
headers=headers,
|
|
1430
|
+
) as resp:
|
|
1431
|
+
if resp.status in (200, 202):
|
|
1432
|
+
logger.info(
|
|
1433
|
+
f'Email report sent for task {task_id} to '
|
|
1434
|
+
f'{_sanitize_email_for_log(self.config.notification_email or "")}'
|
|
1435
|
+
)
|
|
1436
|
+
return True
|
|
1437
|
+
else:
|
|
1438
|
+
text = await resp.text()
|
|
1439
|
+
logger.error(
|
|
1440
|
+
f'Failed to send email: {resp.status} - {text}'
|
|
1441
|
+
)
|
|
1442
|
+
if self._verbose:
|
|
1443
|
+
logger.error(f'[EMAIL-DEBUG] SendGrid response: {text}')
|
|
1444
|
+
return False
|
|
1445
|
+
|
|
1446
|
+
except Exception as e:
|
|
1447
|
+
logger.error(f'Failed to send email notification: {e}')
|
|
1448
|
+
if self._verbose:
|
|
1449
|
+
import traceback
|
|
1450
|
+
|
|
1451
|
+
logger.error(
|
|
1452
|
+
f'[EMAIL-DEBUG] Traceback: {traceback.format_exc()}'
|
|
1453
|
+
)
|
|
1454
|
+
return False
|
|
1455
|
+
|
|
1456
|
+
async def send_test_email(self) -> Dict[str, Any]:
|
|
1457
|
+
"""
|
|
1458
|
+
Send a test email to validate email configuration.
|
|
1459
|
+
|
|
1460
|
+
This sends a simple test email to verify SendGrid is properly configured.
|
|
1461
|
+
Returns a dict with success status and any errors.
|
|
1462
|
+
|
|
1463
|
+
Used by the --test-email CLI flag.
|
|
1464
|
+
"""
|
|
1465
|
+
result: Dict[str, Any] = {
|
|
1466
|
+
'success': False,
|
|
1467
|
+
'configured': self.is_configured(),
|
|
1468
|
+
'config_status': self.get_config_status(),
|
|
1469
|
+
'dry_run': self._dry_run,
|
|
1470
|
+
'message': '',
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
if not self.is_configured():
|
|
1474
|
+
result['message'] = (
|
|
1475
|
+
'Email not fully configured. Issues: '
|
|
1476
|
+
+ ', '.join(result['config_status']['issues'])
|
|
1477
|
+
)
|
|
1478
|
+
return result
|
|
1479
|
+
|
|
1480
|
+
# Generate a test task ID
|
|
1481
|
+
test_task_id = f'test-{uuid.uuid4().hex[:8]}'
|
|
1482
|
+
test_session_id = f'ses_test_{uuid.uuid4().hex[:8]}'
|
|
1483
|
+
|
|
1484
|
+
logger.info(f'Sending test email (dry_run={self._dry_run})...')
|
|
1485
|
+
logger.info(
|
|
1486
|
+
f' To: {_sanitize_email_for_log(self.config.notification_email or "")}'
|
|
1487
|
+
)
|
|
1488
|
+
logger.info(
|
|
1489
|
+
f' From: {_sanitize_email_for_log(self.config.sendgrid_from_email or "")}'
|
|
1490
|
+
)
|
|
1491
|
+
|
|
1492
|
+
try:
|
|
1493
|
+
success = await self.send_task_report(
|
|
1494
|
+
task_id=test_task_id,
|
|
1495
|
+
title='Test Email from A2A Worker',
|
|
1496
|
+
status='completed',
|
|
1497
|
+
result='This is a test email to verify your email notification configuration is working correctly.',
|
|
1498
|
+
duration_ms=1234,
|
|
1499
|
+
session_id=test_session_id,
|
|
1500
|
+
)
|
|
1501
|
+
|
|
1502
|
+
if success:
|
|
1503
|
+
result['success'] = True
|
|
1504
|
+
if self._dry_run:
|
|
1505
|
+
result['message'] = (
|
|
1506
|
+
'Test email logged successfully (dry-run mode - no email sent)'
|
|
1507
|
+
)
|
|
1508
|
+
else:
|
|
1509
|
+
result['message'] = (
|
|
1510
|
+
f'Test email sent successfully to {_sanitize_email_for_log(self.config.notification_email or "")}'
|
|
1511
|
+
)
|
|
1512
|
+
else:
|
|
1513
|
+
result['message'] = (
|
|
1514
|
+
'Failed to send test email - check logs for details'
|
|
1515
|
+
)
|
|
1516
|
+
|
|
1517
|
+
except Exception as e:
|
|
1518
|
+
result['message'] = f'Exception while sending test email: {e}'
|
|
1519
|
+
logger.error(f'Test email failed: {e}')
|
|
1520
|
+
|
|
1521
|
+
return result
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
# =============================================================================
|
|
1525
|
+
# ConfigManager - Configuration and setup
|
|
1526
|
+
# =============================================================================
|
|
1527
|
+
|
|
1528
|
+
|
|
1529
|
+
class ConfigManager:
|
|
1530
|
+
"""
|
|
1531
|
+
Handles configuration and setup for the worker.
|
|
1532
|
+
|
|
1533
|
+
Responsibilities:
|
|
1534
|
+
- Finding OpenCode binary
|
|
1535
|
+
- Managing storage paths
|
|
1536
|
+
- Provider authentication discovery
|
|
1537
|
+
- Model discovery
|
|
1538
|
+
"""
|
|
1539
|
+
|
|
1540
|
+
def __init__(self, config: WorkerConfig):
|
|
1541
|
+
self.config = config
|
|
1542
|
+
self._opencode_storage_path: Optional[Path] = None
|
|
1543
|
+
|
|
1544
|
+
def find_opencode_binary(self) -> str:
|
|
1545
|
+
"""Find the opencode binary."""
|
|
1546
|
+
locations = [
|
|
1547
|
+
str(Path.home() / '.local' / 'bin' / 'opencode'),
|
|
1548
|
+
str(Path.home() / 'bin' / 'opencode'),
|
|
1549
|
+
'/usr/local/bin/opencode',
|
|
1550
|
+
'/usr/bin/opencode',
|
|
1551
|
+
# Check in the A2A project
|
|
1552
|
+
str(
|
|
1553
|
+
Path(__file__).parent.parent
|
|
1554
|
+
/ 'opencode'
|
|
1555
|
+
/ 'packages'
|
|
1556
|
+
/ 'opencode'
|
|
1557
|
+
/ 'bin'
|
|
1558
|
+
/ 'opencode'
|
|
1559
|
+
),
|
|
1560
|
+
]
|
|
1561
|
+
|
|
1562
|
+
for loc in locations:
|
|
1563
|
+
if Path(loc).exists() and os.access(loc, os.X_OK):
|
|
1564
|
+
logger.info(f'Found opencode at: {loc}')
|
|
1565
|
+
return loc
|
|
1566
|
+
|
|
1567
|
+
# Try PATH
|
|
1568
|
+
try:
|
|
1569
|
+
result = subprocess.run(
|
|
1570
|
+
['which', 'opencode'], capture_output=True, text=True
|
|
1571
|
+
)
|
|
1572
|
+
if result.returncode == 0:
|
|
1573
|
+
return result.stdout.strip()
|
|
1574
|
+
except Exception as e:
|
|
1575
|
+
logger.debug(f'Binary search via PATH failed: {e}')
|
|
1576
|
+
|
|
1577
|
+
logger.warning('OpenCode binary not found, some features may not work')
|
|
1578
|
+
return 'opencode'
|
|
1579
|
+
|
|
1580
|
+
def get_authenticated_providers(self) -> set:
|
|
1581
|
+
"""Get set of provider IDs that have authentication configured."""
|
|
1582
|
+
authenticated = set()
|
|
1583
|
+
try:
|
|
1584
|
+
data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
|
|
1585
|
+
'~/.local/share'
|
|
1586
|
+
)
|
|
1587
|
+
auth_path = (
|
|
1588
|
+
Path(os.path.expanduser(data_home)) / 'opencode' / 'auth.json'
|
|
1589
|
+
)
|
|
1590
|
+
if auth_path.exists():
|
|
1591
|
+
with open(auth_path, 'r', encoding='utf-8') as f:
|
|
1592
|
+
auth_data = json.load(f)
|
|
1593
|
+
for provider_id, provider_auth in auth_data.items():
|
|
1594
|
+
if isinstance(provider_auth, dict):
|
|
1595
|
+
# Check if provider has valid auth (key or oauth tokens)
|
|
1596
|
+
has_key = bool(provider_auth.get('key'))
|
|
1597
|
+
has_oauth = bool(
|
|
1598
|
+
provider_auth.get('access')
|
|
1599
|
+
or provider_auth.get('refresh')
|
|
1600
|
+
)
|
|
1601
|
+
if has_key or has_oauth:
|
|
1602
|
+
authenticated.add(provider_id)
|
|
1603
|
+
logger.debug(
|
|
1604
|
+
f"Provider '{provider_id}' has authentication configured"
|
|
1605
|
+
)
|
|
1606
|
+
logger.info(
|
|
1607
|
+
f'Found {len(authenticated)} authenticated providers: {sorted(authenticated)}'
|
|
1608
|
+
)
|
|
1609
|
+
except Exception as e:
|
|
1610
|
+
logger.warning(f'Failed to read OpenCode auth.json: {e}')
|
|
1611
|
+
return authenticated
|
|
1612
|
+
|
|
1613
|
+
async def get_available_models(
|
|
1614
|
+
self, opencode_bin: str
|
|
1615
|
+
) -> List[Dict[str, Any]]:
|
|
1616
|
+
"""Fetch available models from local OpenCode instance.
|
|
1617
|
+
|
|
1618
|
+
Only returns models from providers that have authentication configured.
|
|
1619
|
+
"""
|
|
1620
|
+
# Get authenticated providers first
|
|
1621
|
+
authenticated_providers = self.get_authenticated_providers()
|
|
1622
|
+
if not authenticated_providers:
|
|
1623
|
+
logger.warning(
|
|
1624
|
+
'No authenticated providers found - no models will be registered'
|
|
1625
|
+
)
|
|
1626
|
+
return []
|
|
1627
|
+
|
|
1628
|
+
all_models = []
|
|
1629
|
+
|
|
1630
|
+
# Try default port first
|
|
1631
|
+
port = 9777
|
|
1632
|
+
try:
|
|
1633
|
+
url = f'http://localhost:{port}/provider'
|
|
1634
|
+
async with aiohttp.ClientSession() as session:
|
|
1635
|
+
async with session.get(
|
|
1636
|
+
url, timeout=aiohttp.ClientTimeout(total=2)
|
|
1637
|
+
) as resp:
|
|
1638
|
+
if resp.status == 200:
|
|
1639
|
+
data = await resp.json()
|
|
1640
|
+
all_providers = data.get('all', [])
|
|
1641
|
+
for provider in all_providers:
|
|
1642
|
+
provider_id = provider.get('id')
|
|
1643
|
+
provider_name = provider.get('name', provider_id)
|
|
1644
|
+
for model_id, model_info in provider.get(
|
|
1645
|
+
'models', {}
|
|
1646
|
+
).items():
|
|
1647
|
+
all_models.append(
|
|
1648
|
+
{
|
|
1649
|
+
'id': f'{provider_id}/{model_id}',
|
|
1650
|
+
'name': model_info.get(
|
|
1651
|
+
'name', model_id
|
|
1652
|
+
),
|
|
1653
|
+
'provider': provider_name,
|
|
1654
|
+
'provider_id': provider_id,
|
|
1655
|
+
'capabilities': {
|
|
1656
|
+
'reasoning': model_info.get(
|
|
1657
|
+
'reasoning', False
|
|
1658
|
+
),
|
|
1659
|
+
'attachment': model_info.get(
|
|
1660
|
+
'attachment', False
|
|
1661
|
+
),
|
|
1662
|
+
'tool_call': model_info.get(
|
|
1663
|
+
'tool_call', False
|
|
1664
|
+
),
|
|
1665
|
+
},
|
|
1666
|
+
}
|
|
1667
|
+
)
|
|
1668
|
+
except Exception as e:
|
|
1669
|
+
# OpenCode might not be running
|
|
1670
|
+
logger.debug(f'Model discovery via API failed: {e}')
|
|
1671
|
+
|
|
1672
|
+
# Fallback: Try CLI if no models found via API
|
|
1673
|
+
if not all_models:
|
|
1674
|
+
try:
|
|
1675
|
+
logger.info(f'Trying CLI: {opencode_bin} models')
|
|
1676
|
+
if opencode_bin and os.path.exists(opencode_bin):
|
|
1677
|
+
proc = await asyncio.create_subprocess_exec(
|
|
1678
|
+
opencode_bin,
|
|
1679
|
+
'models',
|
|
1680
|
+
stdout=asyncio.subprocess.PIPE,
|
|
1681
|
+
stderr=asyncio.subprocess.PIPE,
|
|
1682
|
+
)
|
|
1683
|
+
stdout, stderr = await proc.communicate()
|
|
1684
|
+
if proc.returncode == 0:
|
|
1685
|
+
lines = stdout.decode().strip().splitlines()
|
|
1686
|
+
for line in lines:
|
|
1687
|
+
line = line.strip()
|
|
1688
|
+
if not line:
|
|
1689
|
+
continue
|
|
1690
|
+
# Format is provider/model
|
|
1691
|
+
parts = line.split('/', 1)
|
|
1692
|
+
if len(parts) == 2:
|
|
1693
|
+
provider, model_name = parts
|
|
1694
|
+
all_models.append(
|
|
1695
|
+
{
|
|
1696
|
+
'id': line,
|
|
1697
|
+
'name': model_name,
|
|
1698
|
+
'provider': provider,
|
|
1699
|
+
'provider_id': provider,
|
|
1700
|
+
'capabilities': {
|
|
1701
|
+
'reasoning': False,
|
|
1702
|
+
'attachment': False,
|
|
1703
|
+
'tool_call': True,
|
|
1704
|
+
},
|
|
1705
|
+
}
|
|
1706
|
+
)
|
|
1707
|
+
else:
|
|
1708
|
+
logger.warning(
|
|
1709
|
+
f'CLI failed with code {proc.returncode}: {stderr.decode()}'
|
|
1710
|
+
)
|
|
1711
|
+
else:
|
|
1712
|
+
logger.warning(
|
|
1713
|
+
f'OpenCode binary not found or not executable: {opencode_bin}'
|
|
1714
|
+
)
|
|
1715
|
+
except Exception as e:
|
|
1716
|
+
logger.warning(f'Failed to list models via CLI: {e}')
|
|
1717
|
+
|
|
1718
|
+
# Filter to only authenticated providers
|
|
1719
|
+
authenticated_models = []
|
|
1720
|
+
for model in all_models:
|
|
1721
|
+
provider_id = model.get('provider_id') or model.get('provider', '')
|
|
1722
|
+
if provider_id in authenticated_providers:
|
|
1723
|
+
authenticated_models.append(model)
|
|
1724
|
+
|
|
1725
|
+
logger.info(
|
|
1726
|
+
f'Discovered {len(all_models)} total models, '
|
|
1727
|
+
f'{len(authenticated_models)} from authenticated providers'
|
|
1728
|
+
)
|
|
1729
|
+
|
|
1730
|
+
if authenticated_models:
|
|
1731
|
+
providers_with_models = sorted(
|
|
1732
|
+
set(
|
|
1733
|
+
m.get('provider_id', m.get('provider'))
|
|
1734
|
+
for m in authenticated_models
|
|
1735
|
+
)
|
|
1736
|
+
)
|
|
1737
|
+
logger.info(
|
|
1738
|
+
f'Authenticated providers with models: {providers_with_models}'
|
|
1739
|
+
)
|
|
1740
|
+
|
|
1741
|
+
return authenticated_models
|
|
1742
|
+
|
|
1743
|
+
def get_opencode_storage_path(self) -> Path:
|
|
1744
|
+
"""Get the OpenCode global storage path.
|
|
1745
|
+
|
|
1746
|
+
We prefer an explicit override, but we also try to "do what I mean" in
|
|
1747
|
+
common deployments where the worker runs as a service account while the
|
|
1748
|
+
codebases (and OpenCode storage) live under /home/<user>/.
|
|
1749
|
+
"""
|
|
1750
|
+
|
|
1751
|
+
if self._opencode_storage_path is not None:
|
|
1752
|
+
return self._opencode_storage_path
|
|
1753
|
+
|
|
1754
|
+
def _dir_has_any_children(p: Path) -> bool:
|
|
1755
|
+
try:
|
|
1756
|
+
if not p.exists() or not p.is_dir():
|
|
1757
|
+
return False
|
|
1758
|
+
# Fast path: check for any entry without materializing a list.
|
|
1759
|
+
for _ in p.iterdir():
|
|
1760
|
+
return True
|
|
1761
|
+
return False
|
|
1762
|
+
except Exception as e:
|
|
1763
|
+
logger.debug(f'Error checking directory children for {p}: {e}')
|
|
1764
|
+
return False
|
|
1765
|
+
|
|
1766
|
+
def _storage_has_message_data(storage: Path) -> bool:
|
|
1767
|
+
"""Return True if this storage appears to contain message/part data."""
|
|
1768
|
+
return _dir_has_any_children(
|
|
1769
|
+
storage / 'message'
|
|
1770
|
+
) and _dir_has_any_children(storage / 'part')
|
|
1771
|
+
|
|
1772
|
+
def _storage_match_score(storage: Path) -> int:
|
|
1773
|
+
"""Return how many registered codebases appear in this OpenCode storage's project list."""
|
|
1774
|
+
codebase_paths: List[str] = [
|
|
1775
|
+
str(cb.get('path'))
|
|
1776
|
+
for cb in (self.config.codebases or [])
|
|
1777
|
+
if cb.get('path')
|
|
1778
|
+
]
|
|
1779
|
+
if not codebase_paths:
|
|
1780
|
+
return 0
|
|
1781
|
+
|
|
1782
|
+
project_dir = storage / 'project'
|
|
1783
|
+
if not project_dir.exists() or not project_dir.is_dir():
|
|
1784
|
+
return 0
|
|
1785
|
+
|
|
1786
|
+
# Compare resolved paths to handle symlinks/relative config.
|
|
1787
|
+
try:
|
|
1788
|
+
resolved_codebases = {
|
|
1789
|
+
str(Path(p).resolve()) for p in codebase_paths
|
|
1790
|
+
}
|
|
1791
|
+
except Exception as e:
|
|
1792
|
+
logger.debug(
|
|
1793
|
+
f'Failed to resolve codebase paths, using raw paths: {e}'
|
|
1794
|
+
)
|
|
1795
|
+
resolved_codebases = set(codebase_paths)
|
|
1796
|
+
|
|
1797
|
+
matched: set[str] = set()
|
|
1798
|
+
|
|
1799
|
+
for project_file in project_dir.glob('*.json'):
|
|
1800
|
+
if project_file.stem == 'global':
|
|
1801
|
+
continue
|
|
1802
|
+
try:
|
|
1803
|
+
with open(project_file, 'r', encoding='utf-8') as f:
|
|
1804
|
+
project = json.load(f)
|
|
1805
|
+
worktree = project.get('worktree')
|
|
1806
|
+
if not worktree:
|
|
1807
|
+
continue
|
|
1808
|
+
try:
|
|
1809
|
+
wt = str(Path(worktree).resolve())
|
|
1810
|
+
if wt in resolved_codebases:
|
|
1811
|
+
matched.add(wt)
|
|
1812
|
+
except Exception as e:
|
|
1813
|
+
logger.debug(
|
|
1814
|
+
f'Failed to resolve worktree path {worktree}: {e}'
|
|
1815
|
+
)
|
|
1816
|
+
if worktree in resolved_codebases:
|
|
1817
|
+
matched.add(worktree)
|
|
1818
|
+
except Exception as e:
|
|
1819
|
+
logger.debug(
|
|
1820
|
+
f'Error reading project file {project_file}: {e}'
|
|
1821
|
+
)
|
|
1822
|
+
continue
|
|
1823
|
+
|
|
1824
|
+
return len(matched)
|
|
1825
|
+
|
|
1826
|
+
candidates: List[Path] = []
|
|
1827
|
+
override_path: Optional[Path] = None
|
|
1828
|
+
|
|
1829
|
+
# 1) Explicit override (config/env)
|
|
1830
|
+
override = (
|
|
1831
|
+
self.config.opencode_storage_path
|
|
1832
|
+
or os.environ.get('A2A_OPENCODE_STORAGE_PATH')
|
|
1833
|
+
or os.environ.get('OPENCODE_STORAGE_PATH')
|
|
1834
|
+
)
|
|
1835
|
+
if override:
|
|
1836
|
+
override_path = Path(os.path.expanduser(override)).resolve()
|
|
1837
|
+
candidates.append(override_path)
|
|
1838
|
+
|
|
1839
|
+
# 2) Standard per-user location for the current service user
|
|
1840
|
+
xdg_data = os.environ.get(
|
|
1841
|
+
'XDG_DATA_HOME', str(Path.home() / '.local' / 'share')
|
|
1842
|
+
)
|
|
1843
|
+
candidates.append(
|
|
1844
|
+
Path(os.path.expanduser(xdg_data)) / 'opencode' / 'storage'
|
|
1845
|
+
)
|
|
1846
|
+
|
|
1847
|
+
# 3) Heuristic: infer /home/<user> from codebase paths
|
|
1848
|
+
inferred_users: List[str] = []
|
|
1849
|
+
for cb in self.config.codebases:
|
|
1850
|
+
p = cb.get('path')
|
|
1851
|
+
if not p:
|
|
1852
|
+
continue
|
|
1853
|
+
parts = Path(p).parts
|
|
1854
|
+
if len(parts) >= 3 and parts[0] == '/' and parts[1] == 'home':
|
|
1855
|
+
inferred_users.append(parts[2])
|
|
1856
|
+
|
|
1857
|
+
# Also infer from the opencode binary path (often /home/<user>/.opencode/bin/opencode)
|
|
1858
|
+
opencode_bin = self.config.opencode_bin
|
|
1859
|
+
if opencode_bin:
|
|
1860
|
+
try:
|
|
1861
|
+
bin_parts = Path(opencode_bin).parts
|
|
1862
|
+
if (
|
|
1863
|
+
len(bin_parts) >= 3
|
|
1864
|
+
and bin_parts[0] == '/'
|
|
1865
|
+
and bin_parts[1] == 'home'
|
|
1866
|
+
):
|
|
1867
|
+
inferred_users.append(bin_parts[2])
|
|
1868
|
+
except Exception as e:
|
|
1869
|
+
logger.debug(
|
|
1870
|
+
f'Failed to infer user from opencode binary path: {e}'
|
|
1871
|
+
)
|
|
1872
|
+
|
|
1873
|
+
for user in dict.fromkeys(inferred_users): # preserve order, de-dupe
|
|
1874
|
+
candidates.append(
|
|
1875
|
+
Path('/home')
|
|
1876
|
+
/ user
|
|
1877
|
+
/ '.local'
|
|
1878
|
+
/ 'share'
|
|
1879
|
+
/ 'opencode'
|
|
1880
|
+
/ 'storage'
|
|
1881
|
+
)
|
|
1882
|
+
|
|
1883
|
+
inferred_candidate_paths = {
|
|
1884
|
+
(
|
|
1885
|
+
Path('/home')
|
|
1886
|
+
/ user
|
|
1887
|
+
/ '.local'
|
|
1888
|
+
/ 'share'
|
|
1889
|
+
/ 'opencode'
|
|
1890
|
+
/ 'storage'
|
|
1891
|
+
).resolve()
|
|
1892
|
+
for user in dict.fromkeys(inferred_users)
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
# Pick the best existing candidate.
|
|
1896
|
+
first_existing: Optional[Path] = None
|
|
1897
|
+
best_match: Optional[Path] = None
|
|
1898
|
+
best_tuple: Optional[tuple] = None
|
|
1899
|
+
for c in candidates:
|
|
1900
|
+
try:
|
|
1901
|
+
if c.exists() and c.is_dir():
|
|
1902
|
+
if first_existing is None:
|
|
1903
|
+
first_existing = c
|
|
1904
|
+
|
|
1905
|
+
# Explicit override wins if it exists.
|
|
1906
|
+
if override_path is not None and c == override_path:
|
|
1907
|
+
self._opencode_storage_path = c
|
|
1908
|
+
logger.info(
|
|
1909
|
+
f'Using OpenCode storage at (override): {c}'
|
|
1910
|
+
)
|
|
1911
|
+
return c
|
|
1912
|
+
|
|
1913
|
+
# Otherwise, score by how many registered codebases this storage contains.
|
|
1914
|
+
score_codebases = _storage_match_score(c)
|
|
1915
|
+
has_message_data = 1 if _storage_has_message_data(c) else 0
|
|
1916
|
+
inferred_bonus = (
|
|
1917
|
+
1 if c.resolve() in inferred_candidate_paths else 0
|
|
1918
|
+
)
|
|
1919
|
+
|
|
1920
|
+
# Prefer:
|
|
1921
|
+
# 1) Storage that matches registered codebases
|
|
1922
|
+
# 2) Storage that actually contains message/part data (for session detail UI)
|
|
1923
|
+
# 3) Inferred /home/<user> storage over service-account storage when tied
|
|
1924
|
+
score_tuple = (
|
|
1925
|
+
score_codebases,
|
|
1926
|
+
has_message_data,
|
|
1927
|
+
inferred_bonus,
|
|
1928
|
+
)
|
|
1929
|
+
|
|
1930
|
+
if best_tuple is None or score_tuple > best_tuple:
|
|
1931
|
+
best_tuple = score_tuple
|
|
1932
|
+
best_match = c
|
|
1933
|
+
except Exception as e:
|
|
1934
|
+
logger.debug(f'Error evaluating storage candidate {c}: {e}')
|
|
1935
|
+
continue
|
|
1936
|
+
|
|
1937
|
+
if best_match is not None and best_tuple is not None:
|
|
1938
|
+
if best_tuple[0] > 0:
|
|
1939
|
+
self._opencode_storage_path = best_match
|
|
1940
|
+
logger.info(
|
|
1941
|
+
f'Using OpenCode storage at: {best_match} (matched {best_tuple[0]} codebase(s))'
|
|
1942
|
+
)
|
|
1943
|
+
return best_match
|
|
1944
|
+
|
|
1945
|
+
# No project→codebase matches found. Prefer a storage that still looks
|
|
1946
|
+
# "real" (has message/part data) and/or was inferred from /home/<user>.
|
|
1947
|
+
if best_tuple[1] > 0 or best_tuple[2] > 0:
|
|
1948
|
+
self._opencode_storage_path = best_match
|
|
1949
|
+
logger.info(
|
|
1950
|
+
'Using OpenCode storage at: %s (best available; message_data=%s, inferred_home=%s)',
|
|
1951
|
+
best_match,
|
|
1952
|
+
bool(best_tuple[1]),
|
|
1953
|
+
bool(best_tuple[2]),
|
|
1954
|
+
)
|
|
1955
|
+
return best_match
|
|
1956
|
+
|
|
1957
|
+
if first_existing is not None:
|
|
1958
|
+
# Fall back to *something* that exists, but warn because it might be empty/wrong.
|
|
1959
|
+
self._opencode_storage_path = first_existing
|
|
1960
|
+
logger.warning(
|
|
1961
|
+
'OpenCode storage path exists but did not match any registered codebase projects; '
|
|
1962
|
+
f'falling back to: {first_existing}'
|
|
1963
|
+
)
|
|
1964
|
+
return first_existing
|
|
1965
|
+
|
|
1966
|
+
# Final fallback (even if it doesn't exist yet)
|
|
1967
|
+
self._opencode_storage_path = (
|
|
1968
|
+
candidates[0]
|
|
1969
|
+
if candidates
|
|
1970
|
+
else (Path.home() / '.local' / 'share' / 'opencode' / 'storage')
|
|
1971
|
+
)
|
|
1972
|
+
logger.warning(
|
|
1973
|
+
f'OpenCode storage path not found on disk; defaulting to: {self._opencode_storage_path}'
|
|
1974
|
+
)
|
|
1975
|
+
return self._opencode_storage_path
|
|
1976
|
+
|
|
1977
|
+
|
|
1978
|
+
# =============================================================================
|
|
1979
|
+
# SessionSyncService - Session management and syncing
|
|
1980
|
+
# =============================================================================
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
class SessionSyncService:
|
|
1984
|
+
"""
|
|
1985
|
+
Handles session management and syncing with the server.
|
|
1986
|
+
|
|
1987
|
+
Responsibilities:
|
|
1988
|
+
- Reading sessions from OpenCode storage
|
|
1989
|
+
- Reporting sessions to server
|
|
1990
|
+
- Message sync for remote codebases
|
|
1991
|
+
"""
|
|
1992
|
+
|
|
1993
|
+
def __init__(
|
|
1994
|
+
self,
|
|
1995
|
+
config: WorkerConfig,
|
|
1996
|
+
config_manager: ConfigManager,
|
|
1997
|
+
client: WorkerClient,
|
|
1998
|
+
):
|
|
1999
|
+
self.config = config
|
|
2000
|
+
self.config_manager = config_manager
|
|
2001
|
+
self.client = client
|
|
2002
|
+
|
|
2003
|
+
def _get_project_id_for_path(self, codebase_path: str) -> Optional[str]:
|
|
2004
|
+
"""Get the OpenCode project ID (hash) for a given codebase path."""
|
|
2005
|
+
storage_path = self.config_manager.get_opencode_storage_path()
|
|
2006
|
+
project_dir = storage_path / 'project'
|
|
2007
|
+
|
|
2008
|
+
if not project_dir.exists():
|
|
2009
|
+
return None
|
|
2010
|
+
|
|
2011
|
+
# Read all project files to find the matching worktree
|
|
2012
|
+
for project_file in project_dir.glob('*.json'):
|
|
2013
|
+
if project_file.stem == 'global':
|
|
2014
|
+
continue
|
|
2015
|
+
try:
|
|
2016
|
+
with open(project_file, 'r', encoding='utf-8') as f:
|
|
2017
|
+
project = json.load(f)
|
|
2018
|
+
worktree = project.get('worktree')
|
|
2019
|
+
if worktree:
|
|
2020
|
+
try:
|
|
2021
|
+
if (
|
|
2022
|
+
Path(worktree).resolve()
|
|
2023
|
+
== Path(codebase_path).resolve()
|
|
2024
|
+
):
|
|
2025
|
+
return project.get('id')
|
|
2026
|
+
except Exception as e:
|
|
2027
|
+
logger.debug(
|
|
2028
|
+
f'Failed to resolve paths for comparison ({worktree} vs {codebase_path}): {e}'
|
|
2029
|
+
)
|
|
2030
|
+
if worktree == codebase_path:
|
|
2031
|
+
return project.get('id')
|
|
2032
|
+
except Exception as e:
|
|
2033
|
+
logger.debug(f'Error reading project file {project_file}: {e}')
|
|
2034
|
+
continue
|
|
2035
|
+
|
|
2036
|
+
return None
|
|
2037
|
+
|
|
2038
|
+
def get_sessions_for_codebase(
|
|
2039
|
+
self, codebase_path: str
|
|
2040
|
+
) -> List[Dict[str, Any]]:
|
|
2041
|
+
"""Get all OpenCode sessions for a codebase."""
|
|
2042
|
+
project_id = self._get_project_id_for_path(codebase_path)
|
|
2043
|
+
if not project_id:
|
|
2044
|
+
logger.debug(f'No OpenCode project ID found for {codebase_path}')
|
|
2045
|
+
return []
|
|
2046
|
+
|
|
2047
|
+
storage_path = self.config_manager.get_opencode_storage_path()
|
|
2048
|
+
session_dir = storage_path / 'session' / project_id
|
|
2049
|
+
|
|
2050
|
+
if not session_dir.exists():
|
|
2051
|
+
return []
|
|
2052
|
+
|
|
2053
|
+
sessions: List[Dict[str, Any]] = []
|
|
2054
|
+
for session_file in session_dir.glob('ses_*.json'):
|
|
2055
|
+
try:
|
|
2056
|
+
with open(session_file) as f:
|
|
2057
|
+
session_data = json.load(f)
|
|
2058
|
+
# Convert timestamps from milliseconds to ISO format
|
|
2059
|
+
time_data = session_data.get('time', {})
|
|
2060
|
+
created_ms = time_data.get('created', 0)
|
|
2061
|
+
updated_ms = time_data.get('updated', 0)
|
|
2062
|
+
|
|
2063
|
+
session_id = session_data.get('id')
|
|
2064
|
+
# OpenCode stores messages separately; count message files for UI convenience.
|
|
2065
|
+
msg_count = 0
|
|
2066
|
+
if session_id:
|
|
2067
|
+
msg_dir = storage_path / 'message' / str(session_id)
|
|
2068
|
+
try:
|
|
2069
|
+
if msg_dir.exists():
|
|
2070
|
+
msg_count = len(
|
|
2071
|
+
list(msg_dir.glob('msg_*.json'))
|
|
2072
|
+
)
|
|
2073
|
+
except Exception:
|
|
2074
|
+
msg_count = 0
|
|
2075
|
+
|
|
2076
|
+
created_iso = (
|
|
2077
|
+
datetime.fromtimestamp(created_ms / 1000).isoformat()
|
|
2078
|
+
if created_ms
|
|
2079
|
+
else None
|
|
2080
|
+
)
|
|
2081
|
+
updated_iso = (
|
|
2082
|
+
datetime.fromtimestamp(updated_ms / 1000).isoformat()
|
|
2083
|
+
if updated_ms
|
|
2084
|
+
else None
|
|
2085
|
+
)
|
|
2086
|
+
|
|
2087
|
+
sessions.append(
|
|
2088
|
+
{
|
|
2089
|
+
'id': session_id,
|
|
2090
|
+
'title': session_data.get('title', 'Untitled'),
|
|
2091
|
+
'directory': session_data.get('directory'),
|
|
2092
|
+
'project_id': project_id,
|
|
2093
|
+
# Match the UI expectations from monitor-tailwind.html
|
|
2094
|
+
'created': created_iso,
|
|
2095
|
+
'updated': updated_iso,
|
|
2096
|
+
'messageCount': msg_count,
|
|
2097
|
+
'summary': session_data.get('summary', {}),
|
|
2098
|
+
'version': session_data.get('version'),
|
|
2099
|
+
}
|
|
2100
|
+
)
|
|
2101
|
+
except Exception as e:
|
|
2102
|
+
logger.debug(f'Error reading session file {session_file}: {e}')
|
|
2103
|
+
continue
|
|
2104
|
+
|
|
2105
|
+
# Sort by updated time descending
|
|
2106
|
+
sessions.sort(key=lambda s: s.get('updated') or '', reverse=True)
|
|
2107
|
+
return sessions
|
|
2108
|
+
|
|
2109
|
+
def get_global_sessions(self) -> List[Dict[str, Any]]:
|
|
2110
|
+
"""Get all global OpenCode sessions (not associated with a specific project)."""
|
|
2111
|
+
storage_path = self.config_manager.get_opencode_storage_path()
|
|
2112
|
+
session_dir = storage_path / 'session' / 'global'
|
|
2113
|
+
|
|
2114
|
+
if not session_dir.exists():
|
|
2115
|
+
return []
|
|
2116
|
+
|
|
2117
|
+
sessions: List[Dict[str, Any]] = []
|
|
2118
|
+
for session_file in session_dir.glob('ses_*.json'):
|
|
2119
|
+
try:
|
|
2120
|
+
with open(session_file) as f:
|
|
2121
|
+
session_data = json.load(f)
|
|
2122
|
+
time_data = session_data.get('time', {})
|
|
2123
|
+
created_ms = time_data.get('created', 0)
|
|
2124
|
+
updated_ms = time_data.get('updated', 0)
|
|
2125
|
+
|
|
2126
|
+
session_id = session_data.get('id')
|
|
2127
|
+
msg_count = 0
|
|
2128
|
+
if session_id:
|
|
2129
|
+
msg_dir = storage_path / 'message' / str(session_id)
|
|
2130
|
+
try:
|
|
2131
|
+
if msg_dir.exists():
|
|
2132
|
+
msg_count = len(
|
|
2133
|
+
list(msg_dir.glob('msg_*.json'))
|
|
2134
|
+
)
|
|
2135
|
+
except Exception:
|
|
2136
|
+
msg_count = 0
|
|
2137
|
+
|
|
2138
|
+
created_iso = (
|
|
2139
|
+
datetime.fromtimestamp(created_ms / 1000).isoformat()
|
|
2140
|
+
if created_ms
|
|
2141
|
+
else None
|
|
2142
|
+
)
|
|
2143
|
+
updated_iso = (
|
|
2144
|
+
datetime.fromtimestamp(updated_ms / 1000).isoformat()
|
|
2145
|
+
if updated_ms
|
|
2146
|
+
else None
|
|
2147
|
+
)
|
|
2148
|
+
|
|
2149
|
+
sessions.append(
|
|
2150
|
+
{
|
|
2151
|
+
'id': session_id,
|
|
2152
|
+
'title': session_data.get('title', 'Untitled'),
|
|
2153
|
+
'directory': session_data.get('directory'),
|
|
2154
|
+
'project_id': SpecialCodebaseId.GLOBAL,
|
|
2155
|
+
'created': created_iso,
|
|
2156
|
+
'updated': updated_iso,
|
|
2157
|
+
'messageCount': msg_count,
|
|
2158
|
+
'summary': session_data.get('summary', {}),
|
|
2159
|
+
'version': session_data.get('version'),
|
|
2160
|
+
}
|
|
2161
|
+
)
|
|
2162
|
+
except Exception as e:
|
|
2163
|
+
logger.debug(
|
|
2164
|
+
f'Error reading global session file {session_file}: {e}'
|
|
2165
|
+
)
|
|
2166
|
+
continue
|
|
2167
|
+
|
|
2168
|
+
sessions.sort(key=lambda s: s.get('updated') or '', reverse=True)
|
|
2169
|
+
return sessions
|
|
2170
|
+
|
|
2171
|
+
def get_session_messages(
|
|
2172
|
+
self, session_id: str, max_messages: Optional[int] = None
|
|
2173
|
+
) -> List[Dict[str, Any]]:
|
|
2174
|
+
"""Get messages (including parts) for a specific session from OpenCode storage."""
|
|
2175
|
+
storage_path = self.config_manager.get_opencode_storage_path()
|
|
2176
|
+
message_dir = storage_path / 'message' / session_id
|
|
2177
|
+
|
|
2178
|
+
if not message_dir.exists():
|
|
2179
|
+
return []
|
|
2180
|
+
|
|
2181
|
+
msg_files = sorted(message_dir.glob('msg_*.json'))
|
|
2182
|
+
if (
|
|
2183
|
+
max_messages is not None
|
|
2184
|
+
and max_messages > 0
|
|
2185
|
+
and len(msg_files) > max_messages
|
|
2186
|
+
):
|
|
2187
|
+
msg_files = msg_files[-max_messages:]
|
|
2188
|
+
|
|
2189
|
+
messages: List[Dict[str, Any]] = []
|
|
2190
|
+
for msg_file in msg_files:
|
|
2191
|
+
try:
|
|
2192
|
+
with open(msg_file) as f:
|
|
2193
|
+
msg_data = json.load(f)
|
|
2194
|
+
|
|
2195
|
+
msg_id = msg_data.get('id')
|
|
2196
|
+
role = msg_data.get('role')
|
|
2197
|
+
agent = msg_data.get('agent')
|
|
2198
|
+
model_obj = msg_data.get('model') or {}
|
|
2199
|
+
model = None
|
|
2200
|
+
if isinstance(model_obj, dict):
|
|
2201
|
+
provider_id = model_obj.get('providerID')
|
|
2202
|
+
model_id = model_obj.get('modelID')
|
|
2203
|
+
if provider_id and model_id:
|
|
2204
|
+
model = f'{provider_id}/{model_id}'
|
|
2205
|
+
elif isinstance(model_obj, str):
|
|
2206
|
+
model = model_obj
|
|
2207
|
+
|
|
2208
|
+
time_data = msg_data.get('time', {}) or {}
|
|
2209
|
+
created_ms = time_data.get('created', 0)
|
|
2210
|
+
created_iso = (
|
|
2211
|
+
datetime.fromtimestamp(created_ms / 1000).isoformat()
|
|
2212
|
+
if created_ms
|
|
2213
|
+
else None
|
|
2214
|
+
)
|
|
2215
|
+
|
|
2216
|
+
# Load message parts (text/tool/step/etc)
|
|
2217
|
+
parts: List[Dict[str, Any]] = []
|
|
2218
|
+
if msg_id:
|
|
2219
|
+
parts_dir = storage_path / 'part' / str(msg_id)
|
|
2220
|
+
if parts_dir.exists() and parts_dir.is_dir():
|
|
2221
|
+
for part_file in sorted(parts_dir.glob('prt_*.json')):
|
|
2222
|
+
try:
|
|
2223
|
+
with open(
|
|
2224
|
+
part_file, 'r', encoding='utf-8'
|
|
2225
|
+
) as f:
|
|
2226
|
+
part_data = json.load(f)
|
|
2227
|
+
part_obj: Dict[str, Any] = {
|
|
2228
|
+
'id': part_data.get('id'),
|
|
2229
|
+
'type': part_data.get('type'),
|
|
2230
|
+
}
|
|
2231
|
+
for k in (
|
|
2232
|
+
'text',
|
|
2233
|
+
'tool',
|
|
2234
|
+
'state',
|
|
2235
|
+
'reason',
|
|
2236
|
+
'callID',
|
|
2237
|
+
'cost',
|
|
2238
|
+
'tokens',
|
|
2239
|
+
):
|
|
2240
|
+
if k in part_data:
|
|
2241
|
+
part_obj[k] = part_data.get(k)
|
|
2242
|
+
parts.append(part_obj)
|
|
2243
|
+
except Exception as e:
|
|
2244
|
+
logger.debug(
|
|
2245
|
+
f'Error reading part file {part_file}: {e}'
|
|
2246
|
+
)
|
|
2247
|
+
|
|
2248
|
+
messages.append(
|
|
2249
|
+
{
|
|
2250
|
+
'id': msg_id,
|
|
2251
|
+
'sessionID': msg_data.get('sessionID') or session_id,
|
|
2252
|
+
'role': role,
|
|
2253
|
+
'time': {'created': created_iso},
|
|
2254
|
+
'agent': agent,
|
|
2255
|
+
'model': model,
|
|
2256
|
+
# OpenCode message-level metadata (preferred for UI stats)
|
|
2257
|
+
'cost': msg_data.get('cost'),
|
|
2258
|
+
'tokens': msg_data.get('tokens'),
|
|
2259
|
+
'tool_calls': msg_data.get('tool_calls')
|
|
2260
|
+
or msg_data.get('toolCalls')
|
|
2261
|
+
or [],
|
|
2262
|
+
'parts': parts,
|
|
2263
|
+
}
|
|
2264
|
+
)
|
|
2265
|
+
except Exception as e:
|
|
2266
|
+
logger.debug(f'Error reading message file {msg_file}: {e}')
|
|
2267
|
+
continue
|
|
2268
|
+
|
|
2269
|
+
# Sort by created time ascending (ISO or None)
|
|
2270
|
+
messages.sort(key=lambda m: (m.get('time') or {}).get('created') or '')
|
|
2271
|
+
return messages
|
|
2272
|
+
|
|
2273
|
+
async def report_sessions_to_server(
|
|
2274
|
+
self,
|
|
2275
|
+
codebases: Dict[str, LocalCodebase],
|
|
2276
|
+
global_codebase_id: Optional[str],
|
|
2277
|
+
register_codebase_fn: Callable,
|
|
2278
|
+
):
|
|
2279
|
+
"""Report all sessions for registered codebases to the server."""
|
|
2280
|
+
# Iterate over a snapshot since we may update codebases if we need
|
|
2281
|
+
# to re-register a codebase (e.g., after a server restart).
|
|
2282
|
+
for codebase_id, codebase in list(codebases.items()):
|
|
2283
|
+
try:
|
|
2284
|
+
sessions = self.get_sessions_for_codebase(codebase.path)
|
|
2285
|
+
logger.info(
|
|
2286
|
+
f"Discovered {len(sessions)} OpenCode sessions for codebase '{codebase.name}' "
|
|
2287
|
+
f'(id={codebase_id}, path={codebase.path})'
|
|
2288
|
+
)
|
|
2289
|
+
if not sessions:
|
|
2290
|
+
continue
|
|
2291
|
+
|
|
2292
|
+
status = await self.client.sync_sessions(codebase_id, sessions)
|
|
2293
|
+
|
|
2294
|
+
# Self-heal common failure modes:
|
|
2295
|
+
# - 404: server lost codebase registry (restart / db reset)
|
|
2296
|
+
# - 403: worker_id mismatch for this codebase
|
|
2297
|
+
# In either case, re-register the codebase and retry once.
|
|
2298
|
+
if status in (403, 404):
|
|
2299
|
+
logger.info(
|
|
2300
|
+
"Attempting to re-register codebase '%s' after session sync %s (old_id=%s)",
|
|
2301
|
+
codebase.name,
|
|
2302
|
+
status,
|
|
2303
|
+
codebase_id,
|
|
2304
|
+
)
|
|
2305
|
+
new_codebase_id = await register_codebase_fn(
|
|
2306
|
+
name=codebase.name,
|
|
2307
|
+
path=codebase.path,
|
|
2308
|
+
description=codebase.description,
|
|
2309
|
+
)
|
|
2310
|
+
|
|
2311
|
+
# If a new ID was created/returned, drop the stale mapping.
|
|
2312
|
+
if new_codebase_id and new_codebase_id != codebase_id:
|
|
2313
|
+
codebases.pop(codebase_id, None)
|
|
2314
|
+
codebase_id = new_codebase_id
|
|
2315
|
+
|
|
2316
|
+
if new_codebase_id:
|
|
2317
|
+
await self.client.sync_sessions(codebase_id, sessions)
|
|
2318
|
+
|
|
2319
|
+
# Optionally sync recent session messages so the UI can show session details
|
|
2320
|
+
max_sessions = (
|
|
2321
|
+
getattr(self.config, 'session_message_sync_max_sessions', 0)
|
|
2322
|
+
or 0
|
|
2323
|
+
)
|
|
2324
|
+
max_messages = (
|
|
2325
|
+
getattr(self.config, 'session_message_sync_max_messages', 0)
|
|
2326
|
+
or 0
|
|
2327
|
+
)
|
|
2328
|
+
if max_sessions > 0 and max_messages > 0:
|
|
2329
|
+
await self._report_recent_session_messages_to_server(
|
|
2330
|
+
codebase_id=codebase_id,
|
|
2331
|
+
sessions=sessions[:max_sessions],
|
|
2332
|
+
max_messages=max_messages,
|
|
2333
|
+
)
|
|
2334
|
+
|
|
2335
|
+
except Exception as e:
|
|
2336
|
+
logger.debug(
|
|
2337
|
+
f'Failed to sync sessions for {codebase.name}: {e}'
|
|
2338
|
+
)
|
|
2339
|
+
|
|
2340
|
+
# Also sync global sessions (not associated with any specific project)
|
|
2341
|
+
await self._report_global_sessions_to_server(
|
|
2342
|
+
global_codebase_id, register_codebase_fn
|
|
2343
|
+
)
|
|
2344
|
+
|
|
2345
|
+
async def _report_global_sessions_to_server(
|
|
2346
|
+
self,
|
|
2347
|
+
global_codebase_id: Optional[str],
|
|
2348
|
+
register_codebase_fn: Callable,
|
|
2349
|
+
):
|
|
2350
|
+
"""Report global sessions to the server under a 'global' pseudo-codebase."""
|
|
2351
|
+
try:
|
|
2352
|
+
global_sessions = self.get_global_sessions()
|
|
2353
|
+
if not global_sessions:
|
|
2354
|
+
return
|
|
2355
|
+
|
|
2356
|
+
logger.info(
|
|
2357
|
+
f'Discovered {len(global_sessions)} global OpenCode sessions'
|
|
2358
|
+
)
|
|
2359
|
+
|
|
2360
|
+
# Ensure we have a "global" codebase registered
|
|
2361
|
+
if not global_codebase_id:
|
|
2362
|
+
return
|
|
2363
|
+
|
|
2364
|
+
status = await self.client.sync_sessions(
|
|
2365
|
+
global_codebase_id, global_sessions
|
|
2366
|
+
)
|
|
2367
|
+
|
|
2368
|
+
# Optionally sync recent session messages so the remote UI can show session detail.
|
|
2369
|
+
async def _sync_recent_global_messages(
|
|
2370
|
+
target_codebase_id: str,
|
|
2371
|
+
) -> None:
|
|
2372
|
+
max_sessions = (
|
|
2373
|
+
getattr(self.config, 'session_message_sync_max_sessions', 0)
|
|
2374
|
+
or 0
|
|
2375
|
+
)
|
|
2376
|
+
max_messages = (
|
|
2377
|
+
getattr(self.config, 'session_message_sync_max_messages', 0)
|
|
2378
|
+
or 0
|
|
2379
|
+
)
|
|
2380
|
+
if max_sessions > 0 and max_messages > 0:
|
|
2381
|
+
await self._report_recent_session_messages_to_server(
|
|
2382
|
+
codebase_id=target_codebase_id,
|
|
2383
|
+
sessions=global_sessions[:max_sessions],
|
|
2384
|
+
max_messages=max_messages,
|
|
2385
|
+
)
|
|
2386
|
+
|
|
2387
|
+
if status == 200:
|
|
2388
|
+
await _sync_recent_global_messages(global_codebase_id)
|
|
2389
|
+
elif status in (403, 404):
|
|
2390
|
+
# Re-register and retry
|
|
2391
|
+
new_id = await register_codebase_fn(
|
|
2392
|
+
name=SpecialCodebaseId.GLOBAL,
|
|
2393
|
+
path=str(Path.home()),
|
|
2394
|
+
description='Global OpenCode sessions (not project-specific)',
|
|
2395
|
+
)
|
|
2396
|
+
if new_id:
|
|
2397
|
+
retry_status = await self.client.sync_sessions(
|
|
2398
|
+
new_id, global_sessions
|
|
2399
|
+
)
|
|
2400
|
+
if retry_status == 200:
|
|
2401
|
+
await _sync_recent_global_messages(new_id)
|
|
2402
|
+
# Return new_id to caller to update global_codebase_id
|
|
2403
|
+
return new_id
|
|
2404
|
+
|
|
2405
|
+
except Exception as e:
|
|
2406
|
+
logger.warning(f'Failed to sync global sessions: {e}')
|
|
2407
|
+
|
|
2408
|
+
return None
|
|
2409
|
+
|
|
2410
|
+
async def _report_recent_session_messages_to_server(
|
|
2411
|
+
self,
|
|
2412
|
+
codebase_id: str,
|
|
2413
|
+
sessions: List[Dict[str, Any]],
|
|
2414
|
+
max_messages: int,
|
|
2415
|
+
):
|
|
2416
|
+
"""Best-effort sync for the most recent sessions' messages."""
|
|
2417
|
+
try:
|
|
2418
|
+
for ses in sessions:
|
|
2419
|
+
session_id = ses.get('id')
|
|
2420
|
+
if not session_id:
|
|
2421
|
+
continue
|
|
2422
|
+
|
|
2423
|
+
messages = self.get_session_messages(
|
|
2424
|
+
str(session_id), max_messages=max_messages
|
|
2425
|
+
)
|
|
2426
|
+
if not messages:
|
|
2427
|
+
continue
|
|
2428
|
+
|
|
2429
|
+
await self.client.sync_session_messages(
|
|
2430
|
+
codebase_id, str(session_id), messages
|
|
2431
|
+
)
|
|
2432
|
+
except Exception as e:
|
|
2433
|
+
logger.debug(
|
|
2434
|
+
f'Failed to sync session messages for codebase {codebase_id}: {e}'
|
|
2435
|
+
)
|
|
2436
|
+
|
|
2437
|
+
|
|
2438
|
+
# =============================================================================
|
|
2439
|
+
# ContextCompactionService - Auto-compaction and summarization for sessions
|
|
2440
|
+
# =============================================================================
|
|
2441
|
+
|
|
2442
|
+
|
|
2443
|
+
class ContextCompactionService:
|
|
2444
|
+
"""
|
|
2445
|
+
Handles automatic context compaction and summarization for task handoffs.
|
|
2446
|
+
|
|
2447
|
+
When sessions grow large or tasks are handed off between workers/agents,
|
|
2448
|
+
this service:
|
|
2449
|
+
1. Estimates token count from session messages
|
|
2450
|
+
2. Generates a summary of completed work
|
|
2451
|
+
3. Creates a compacted context for the next agent
|
|
2452
|
+
|
|
2453
|
+
This prevents context overflow errors and ensures clean handoffs.
|
|
2454
|
+
"""
|
|
2455
|
+
|
|
2456
|
+
# Rough estimate: 1 token ≈ 4 characters for English text
|
|
2457
|
+
CHARS_PER_TOKEN = 4
|
|
2458
|
+
|
|
2459
|
+
# Thresholds for compaction
|
|
2460
|
+
DEFAULT_MAX_TOKENS = 100000 # Trigger compaction above this
|
|
2461
|
+
DEFAULT_TARGET_TOKENS = 50000 # Target size after compaction
|
|
2462
|
+
SUMMARY_MAX_TOKENS = 2000 # Max tokens for summary
|
|
2463
|
+
|
|
2464
|
+
def __init__(
|
|
2465
|
+
self,
|
|
2466
|
+
session_sync: 'SessionSyncService',
|
|
2467
|
+
opencode_bin: str,
|
|
2468
|
+
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
2469
|
+
target_tokens: int = DEFAULT_TARGET_TOKENS,
|
|
2470
|
+
):
|
|
2471
|
+
self.session_sync = session_sync
|
|
2472
|
+
self.opencode_bin = opencode_bin
|
|
2473
|
+
self.max_tokens = max_tokens
|
|
2474
|
+
self.target_tokens = target_tokens
|
|
2475
|
+
|
|
2476
|
+
def estimate_tokens(self, text: str) -> int:
|
|
2477
|
+
"""Estimate token count from text length."""
|
|
2478
|
+
return len(text) // self.CHARS_PER_TOKEN
|
|
2479
|
+
|
|
2480
|
+
def estimate_session_tokens(self, messages: List[Dict[str, Any]]) -> int:
|
|
2481
|
+
"""Estimate total tokens in session messages."""
|
|
2482
|
+
total_chars = 0
|
|
2483
|
+
for msg in messages:
|
|
2484
|
+
# Count message content
|
|
2485
|
+
parts = msg.get('parts', [])
|
|
2486
|
+
for part in parts:
|
|
2487
|
+
if isinstance(part, dict):
|
|
2488
|
+
content = part.get('content', '') or part.get('text', '')
|
|
2489
|
+
if isinstance(content, str):
|
|
2490
|
+
total_chars += len(content)
|
|
2491
|
+
elif isinstance(part, str):
|
|
2492
|
+
total_chars += len(part)
|
|
2493
|
+
return total_chars // self.CHARS_PER_TOKEN
|
|
2494
|
+
|
|
2495
|
+
def needs_compaction(self, messages: List[Dict[str, Any]]) -> bool:
|
|
2496
|
+
"""Check if session needs compaction based on estimated tokens."""
|
|
2497
|
+
return self.estimate_session_tokens(messages) > self.max_tokens
|
|
2498
|
+
|
|
2499
|
+
def extract_key_context(
|
|
2500
|
+
self, messages: List[Dict[str, Any]]
|
|
2501
|
+
) -> Dict[str, Any]:
|
|
2502
|
+
"""
|
|
2503
|
+
Extract key context from messages for summarization.
|
|
2504
|
+
|
|
2505
|
+
Returns a structured summary of:
|
|
2506
|
+
- Files modified
|
|
2507
|
+
- Tools used
|
|
2508
|
+
- Key decisions made
|
|
2509
|
+
- Current state/progress
|
|
2510
|
+
"""
|
|
2511
|
+
context = {
|
|
2512
|
+
'files_modified': set(),
|
|
2513
|
+
'files_read': set(),
|
|
2514
|
+
'tools_used': set(),
|
|
2515
|
+
'errors_encountered': [],
|
|
2516
|
+
'key_outputs': [],
|
|
2517
|
+
'message_count': len(messages),
|
|
2518
|
+
}
|
|
2519
|
+
|
|
2520
|
+
for msg in messages:
|
|
2521
|
+
parts = msg.get('parts', [])
|
|
2522
|
+
for part in parts:
|
|
2523
|
+
if not isinstance(part, dict):
|
|
2524
|
+
continue
|
|
2525
|
+
|
|
2526
|
+
part_type = part.get('type', '')
|
|
2527
|
+
|
|
2528
|
+
# Track tool usage
|
|
2529
|
+
if part_type == 'tool-invocation':
|
|
2530
|
+
tool_name = part.get('toolInvocation', {}).get(
|
|
2531
|
+
'toolName', ''
|
|
2532
|
+
)
|
|
2533
|
+
if tool_name:
|
|
2534
|
+
context['tools_used'].add(tool_name)
|
|
2535
|
+
|
|
2536
|
+
# Track file operations
|
|
2537
|
+
args = part.get('toolInvocation', {}).get('args', {})
|
|
2538
|
+
if isinstance(args, dict):
|
|
2539
|
+
file_path = (
|
|
2540
|
+
args.get('filePath')
|
|
2541
|
+
or args.get('path')
|
|
2542
|
+
or args.get('file')
|
|
2543
|
+
)
|
|
2544
|
+
if file_path:
|
|
2545
|
+
if tool_name in ('write', 'edit', 'Write', 'Edit'):
|
|
2546
|
+
context['files_modified'].add(file_path)
|
|
2547
|
+
elif tool_name in ('read', 'Read', 'glob', 'Glob'):
|
|
2548
|
+
context['files_read'].add(file_path)
|
|
2549
|
+
|
|
2550
|
+
# Track errors
|
|
2551
|
+
if part_type == 'tool-result':
|
|
2552
|
+
result = part.get('toolResult', {})
|
|
2553
|
+
if isinstance(result, dict) and result.get('isError'):
|
|
2554
|
+
error_text = str(result.get('content', ''))[:200]
|
|
2555
|
+
context['errors_encountered'].append(error_text)
|
|
2556
|
+
|
|
2557
|
+
# Track key text outputs (assistant messages)
|
|
2558
|
+
if part_type == 'text' and msg.get('role') == 'assistant':
|
|
2559
|
+
text = part.get('text', '')
|
|
2560
|
+
if text and len(text) > 50:
|
|
2561
|
+
# Keep first 500 chars of significant outputs
|
|
2562
|
+
context['key_outputs'].append(text[:500])
|
|
2563
|
+
|
|
2564
|
+
# Convert sets to lists for JSON serialization
|
|
2565
|
+
context['files_modified'] = list(context['files_modified'])
|
|
2566
|
+
context['files_read'] = list(context['files_read'])
|
|
2567
|
+
context['tools_used'] = list(context['tools_used'])
|
|
2568
|
+
|
|
2569
|
+
# Limit key outputs to last 5
|
|
2570
|
+
context['key_outputs'] = context['key_outputs'][-5:]
|
|
2571
|
+
|
|
2572
|
+
return context
|
|
2573
|
+
|
|
2574
|
+
def generate_summary_prompt(
|
|
2575
|
+
self,
|
|
2576
|
+
messages: List[Dict[str, Any]],
|
|
2577
|
+
original_task: str,
|
|
2578
|
+
) -> str:
|
|
2579
|
+
"""
|
|
2580
|
+
Generate a prompt for the LLM to create a session summary.
|
|
2581
|
+
|
|
2582
|
+
This summary will be prepended to the next task for context.
|
|
2583
|
+
"""
|
|
2584
|
+
context = self.extract_key_context(messages)
|
|
2585
|
+
|
|
2586
|
+
# Get the last few assistant messages for recent context
|
|
2587
|
+
recent_outputs = []
|
|
2588
|
+
for msg in reversed(messages[-10:]):
|
|
2589
|
+
if msg.get('role') == 'assistant':
|
|
2590
|
+
for part in msg.get('parts', []):
|
|
2591
|
+
if isinstance(part, dict) and part.get('type') == 'text':
|
|
2592
|
+
text = part.get('text', '')
|
|
2593
|
+
if text:
|
|
2594
|
+
recent_outputs.append(text[:1000])
|
|
2595
|
+
if len(recent_outputs) >= 3:
|
|
2596
|
+
break
|
|
2597
|
+
|
|
2598
|
+
summary_prompt = f"""Summarize the work done in this coding session for handoff to another agent.
|
|
2599
|
+
|
|
2600
|
+
ORIGINAL TASK: {original_task}
|
|
2601
|
+
|
|
2602
|
+
SESSION STATISTICS:
|
|
2603
|
+
- Total messages: {context['message_count']}
|
|
2604
|
+
- Files modified: {', '.join(context['files_modified'][:20]) or 'None'}
|
|
2605
|
+
- Files read: {', '.join(context['files_read'][:20]) or 'None'}
|
|
2606
|
+
- Tools used: {', '.join(context['tools_used']) or 'None'}
|
|
2607
|
+
- Errors encountered: {len(context['errors_encountered'])}
|
|
2608
|
+
|
|
2609
|
+
RECENT WORK:
|
|
2610
|
+
{chr(10).join(recent_outputs[:3])}
|
|
2611
|
+
|
|
2612
|
+
Please provide a concise summary (max 500 words) covering:
|
|
2613
|
+
1. What was accomplished
|
|
2614
|
+
2. Current state of the work
|
|
2615
|
+
3. Any blockers or issues encountered
|
|
2616
|
+
4. What remains to be done
|
|
2617
|
+
5. Key files/code that was changed
|
|
2618
|
+
|
|
2619
|
+
Format as a handoff note for the next agent."""
|
|
2620
|
+
|
|
2621
|
+
return summary_prompt
|
|
2622
|
+
|
|
2623
|
+
async def generate_summary(
|
|
2624
|
+
self,
|
|
2625
|
+
session_id: str,
|
|
2626
|
+
codebase_path: str,
|
|
2627
|
+
original_task: str,
|
|
2628
|
+
) -> Optional[str]:
|
|
2629
|
+
"""
|
|
2630
|
+
Generate a summary of the session using OpenCode.
|
|
2631
|
+
|
|
2632
|
+
Returns the summary text or None if generation fails.
|
|
2633
|
+
"""
|
|
2634
|
+
messages = self.session_sync.get_session_messages(
|
|
2635
|
+
session_id, max_messages=100
|
|
2636
|
+
)
|
|
2637
|
+
if not messages:
|
|
2638
|
+
return None
|
|
2639
|
+
|
|
2640
|
+
# Check if compaction is needed
|
|
2641
|
+
if not self.needs_compaction(messages):
|
|
2642
|
+
logger.debug(f'Session {session_id} does not need compaction')
|
|
2643
|
+
return None
|
|
2644
|
+
|
|
2645
|
+
logger.info(
|
|
2646
|
+
f'Generating summary for session {session_id} (estimated {self.estimate_session_tokens(messages)} tokens)'
|
|
2647
|
+
)
|
|
2648
|
+
|
|
2649
|
+
summary_prompt = self.generate_summary_prompt(messages, original_task)
|
|
2650
|
+
|
|
2651
|
+
# Run a quick summarization using OpenCode with a fast model
|
|
2652
|
+
try:
|
|
2653
|
+
cmd = [
|
|
2654
|
+
self.opencode_bin,
|
|
2655
|
+
'run',
|
|
2656
|
+
'--agent',
|
|
2657
|
+
'general', # Use general agent for summarization
|
|
2658
|
+
'--model',
|
|
2659
|
+
'anthropic/claude-3-5-haiku-latest', # Fast, cheap model
|
|
2660
|
+
'--format',
|
|
2661
|
+
'json',
|
|
2662
|
+
'--',
|
|
2663
|
+
summary_prompt,
|
|
2664
|
+
]
|
|
2665
|
+
|
|
2666
|
+
process = await asyncio.create_subprocess_exec(
|
|
2667
|
+
*cmd,
|
|
2668
|
+
cwd=codebase_path,
|
|
2669
|
+
stdin=asyncio.subprocess.DEVNULL,
|
|
2670
|
+
stdout=asyncio.subprocess.PIPE,
|
|
2671
|
+
stderr=asyncio.subprocess.PIPE,
|
|
2672
|
+
env={**os.environ, 'NO_COLOR': '1'},
|
|
2673
|
+
limit=16 * 1024 * 1024,
|
|
2674
|
+
)
|
|
2675
|
+
|
|
2676
|
+
stdout, stderr = await asyncio.wait_for(
|
|
2677
|
+
process.communicate(),
|
|
2678
|
+
timeout=60, # 1 minute timeout for summarization
|
|
2679
|
+
)
|
|
2680
|
+
|
|
2681
|
+
if process.returncode == 0:
|
|
2682
|
+
output = stdout.decode('utf-8', errors='replace')
|
|
2683
|
+
# Try to extract the summary from JSON output
|
|
2684
|
+
for line in output.split('\n'):
|
|
2685
|
+
try:
|
|
2686
|
+
obj = json.loads(line)
|
|
2687
|
+
if isinstance(obj, dict):
|
|
2688
|
+
# Look for text content in the response
|
|
2689
|
+
content = (
|
|
2690
|
+
obj.get('content')
|
|
2691
|
+
or obj.get('text')
|
|
2692
|
+
or obj.get('output')
|
|
2693
|
+
)
|
|
2694
|
+
if content:
|
|
2695
|
+
return content[
|
|
2696
|
+
: self.SUMMARY_MAX_TOKENS
|
|
2697
|
+
* self.CHARS_PER_TOKEN
|
|
2698
|
+
]
|
|
2699
|
+
except json.JSONDecodeError:
|
|
2700
|
+
continue
|
|
2701
|
+
# Fallback: return raw output truncated
|
|
2702
|
+
return output[: self.SUMMARY_MAX_TOKENS * self.CHARS_PER_TOKEN]
|
|
2703
|
+
else:
|
|
2704
|
+
logger.warning(
|
|
2705
|
+
f'Summary generation failed: {stderr.decode("utf-8", errors="replace")[:500]}'
|
|
2706
|
+
)
|
|
2707
|
+
return None
|
|
2708
|
+
|
|
2709
|
+
except asyncio.TimeoutError:
|
|
2710
|
+
logger.warning(
|
|
2711
|
+
f'Summary generation timed out for session {session_id}'
|
|
2712
|
+
)
|
|
2713
|
+
return None
|
|
2714
|
+
except Exception as e:
|
|
2715
|
+
logger.warning(f'Summary generation error: {e}')
|
|
2716
|
+
return None
|
|
2717
|
+
|
|
2718
|
+
def create_handoff_context(
|
|
2719
|
+
self,
|
|
2720
|
+
original_prompt: str,
|
|
2721
|
+
summary: Optional[str],
|
|
2722
|
+
session_id: Optional[str],
|
|
2723
|
+
) -> str:
|
|
2724
|
+
"""
|
|
2725
|
+
Create a compacted context for task handoff.
|
|
2726
|
+
|
|
2727
|
+
Prepends summary and context to the original prompt.
|
|
2728
|
+
"""
|
|
2729
|
+
if not summary:
|
|
2730
|
+
return original_prompt
|
|
2731
|
+
|
|
2732
|
+
handoff_context = f"""## Previous Session Summary
|
|
2733
|
+
|
|
2734
|
+
{summary}
|
|
2735
|
+
|
|
2736
|
+
## Continuation Task
|
|
2737
|
+
|
|
2738
|
+
{original_prompt}
|
|
2739
|
+
|
|
2740
|
+
---
|
|
2741
|
+
Note: This task continues from a previous session. The summary above describes what was already done.
|
|
2742
|
+
Please review and continue the work, avoiding redundant actions on files already modified."""
|
|
2743
|
+
|
|
2744
|
+
return handoff_context
|
|
2745
|
+
|
|
2746
|
+
async def prepare_task_context(
|
|
2747
|
+
self,
|
|
2748
|
+
prompt: str,
|
|
2749
|
+
resume_session_id: Optional[str],
|
|
2750
|
+
codebase_path: str,
|
|
2751
|
+
auto_summarize: bool = True,
|
|
2752
|
+
) -> str:
|
|
2753
|
+
"""
|
|
2754
|
+
Prepare task context with auto-compaction if needed.
|
|
2755
|
+
|
|
2756
|
+
Args:
|
|
2757
|
+
prompt: The original task prompt
|
|
2758
|
+
resume_session_id: Session ID to resume (if any)
|
|
2759
|
+
codebase_path: Path to the codebase
|
|
2760
|
+
auto_summarize: Whether to auto-generate summary for large sessions
|
|
2761
|
+
|
|
2762
|
+
Returns:
|
|
2763
|
+
The (possibly enhanced) prompt with summary context
|
|
2764
|
+
"""
|
|
2765
|
+
if not resume_session_id or not auto_summarize:
|
|
2766
|
+
return prompt
|
|
2767
|
+
|
|
2768
|
+
# Check if session needs compaction
|
|
2769
|
+
messages = self.session_sync.get_session_messages(
|
|
2770
|
+
resume_session_id, max_messages=100
|
|
2771
|
+
)
|
|
2772
|
+
if not messages or not self.needs_compaction(messages):
|
|
2773
|
+
return prompt
|
|
2774
|
+
|
|
2775
|
+
# Generate summary
|
|
2776
|
+
summary = await self.generate_summary(
|
|
2777
|
+
session_id=resume_session_id,
|
|
2778
|
+
codebase_path=codebase_path,
|
|
2779
|
+
original_task=prompt,
|
|
2780
|
+
)
|
|
2781
|
+
|
|
2782
|
+
# Create handoff context
|
|
2783
|
+
return self.create_handoff_context(prompt, summary, resume_session_id)
|
|
2784
|
+
|
|
2785
|
+
|
|
2786
|
+
# =============================================================================
|
|
2787
|
+
# TaskExecutor - Task execution logic
|
|
2788
|
+
# =============================================================================
|
|
2789
|
+
|
|
2790
|
+
|
|
2791
|
+
class TaskExecutor:
|
|
2792
|
+
"""
|
|
2793
|
+
Handles task execution logic.
|
|
2794
|
+
|
|
2795
|
+
Responsibilities:
|
|
2796
|
+
- OpenCode subprocess management
|
|
2797
|
+
- Task claiming/releasing (via client)
|
|
2798
|
+
- Special task handlers (register_codebase, echo, noop)
|
|
2799
|
+
- Semaphore-based concurrency control
|
|
2800
|
+
- Email notifications on task completion/failure
|
|
2801
|
+
- Auto-compaction and summarization for task handoffs
|
|
2802
|
+
"""
|
|
2803
|
+
|
|
2804
|
+
def __init__(
|
|
2805
|
+
self,
|
|
2806
|
+
config: WorkerConfig,
|
|
2807
|
+
client: WorkerClient,
|
|
2808
|
+
config_manager: ConfigManager,
|
|
2809
|
+
session_sync: SessionSyncService,
|
|
2810
|
+
opencode_bin: str,
|
|
2811
|
+
email_service: Optional[EmailNotificationService] = None,
|
|
2812
|
+
):
|
|
2813
|
+
self.config = config
|
|
2814
|
+
self.client = client
|
|
2815
|
+
self.config_manager = config_manager
|
|
2816
|
+
self.session_sync = session_sync
|
|
2817
|
+
self.opencode_bin = opencode_bin
|
|
2818
|
+
self.email_service = email_service
|
|
2819
|
+
self.active_processes: Dict[str, asyncio.subprocess.Process] = {}
|
|
2820
|
+
# Task processing state
|
|
2821
|
+
self._task_semaphore: Optional[asyncio.Semaphore] = None
|
|
2822
|
+
self._active_task_ids: Set[str] = set()
|
|
2823
|
+
# Context compaction service for auto-summarization
|
|
2824
|
+
self.compaction_service = ContextCompactionService(
|
|
2825
|
+
session_sync=session_sync,
|
|
2826
|
+
opencode_bin=opencode_bin,
|
|
2827
|
+
max_tokens=getattr(config, 'compaction_max_tokens', 100000),
|
|
2828
|
+
target_tokens=getattr(config, 'compaction_target_tokens', 50000),
|
|
2829
|
+
)
|
|
2830
|
+
|
|
2831
|
+
def init_semaphore(self):
|
|
2832
|
+
"""Initialize the task semaphore for bounded concurrency."""
|
|
2833
|
+
if self._task_semaphore is None:
|
|
2834
|
+
self._task_semaphore = asyncio.Semaphore(
|
|
2835
|
+
self.config.max_concurrent_tasks
|
|
2836
|
+
)
|
|
2837
|
+
|
|
2838
|
+
async def terminate_all_processes(self):
|
|
2839
|
+
"""Terminate all active processes."""
|
|
2840
|
+
for task_id, process in list(self.active_processes.items()):
|
|
2841
|
+
logger.info(f'Terminating process for task {task_id}')
|
|
2842
|
+
process.terminate()
|
|
2843
|
+
try:
|
|
2844
|
+
await asyncio.wait_for(process.wait(), timeout=5)
|
|
2845
|
+
except asyncio.TimeoutError:
|
|
2846
|
+
process.kill()
|
|
2847
|
+
|
|
2848
|
+
async def process_task_with_semaphore(
|
|
2849
|
+
self,
|
|
2850
|
+
task: Dict[str, Any],
|
|
2851
|
+
codebases: Dict[str, LocalCodebase],
|
|
2852
|
+
global_codebase_id: Optional[str],
|
|
2853
|
+
register_codebase_fn: Callable,
|
|
2854
|
+
):
|
|
2855
|
+
"""Process a task with bounded concurrency using semaphore."""
|
|
2856
|
+
task_id = task.get('id') or task.get('task_id') or ''
|
|
2857
|
+
|
|
2858
|
+
if self._task_semaphore is None:
|
|
2859
|
+
self._task_semaphore = asyncio.Semaphore(
|
|
2860
|
+
self.config.max_concurrent_tasks
|
|
2861
|
+
)
|
|
2862
|
+
|
|
2863
|
+
if not task_id:
|
|
2864
|
+
logger.warning('Task has no ID, skipping')
|
|
2865
|
+
return
|
|
2866
|
+
|
|
2867
|
+
# -------------------------------------------------------------------------
|
|
2868
|
+
# Belt-and-suspenders validation: ensure this worker can handle the task
|
|
2869
|
+
# Server-side routing should already filter, but we validate here for
|
|
2870
|
+
# defense-in-depth to prevent workers from claiming tasks they can't execute.
|
|
2871
|
+
# -------------------------------------------------------------------------
|
|
2872
|
+
codebase_id = task.get('codebase_id', '')
|
|
2873
|
+
can_handle = (
|
|
2874
|
+
codebase_id in codebases
|
|
2875
|
+
or codebase_id == SpecialCodebaseId.PENDING
|
|
2876
|
+
or (
|
|
2877
|
+
codebase_id == SpecialCodebaseId.GLOBAL
|
|
2878
|
+
and global_codebase_id is not None
|
|
2879
|
+
)
|
|
2880
|
+
)
|
|
2881
|
+
if not can_handle:
|
|
2882
|
+
logger.warning(
|
|
2883
|
+
f'Task {task_id} has codebase_id={codebase_id!r} which this worker '
|
|
2884
|
+
f'cannot handle (registered: {list(codebases.keys())}). '
|
|
2885
|
+
f'Skipping to prevent incorrect claim.'
|
|
2886
|
+
)
|
|
2887
|
+
return
|
|
2888
|
+
|
|
2889
|
+
# Mark task as active
|
|
2890
|
+
self._active_task_ids.add(task_id)
|
|
2891
|
+
|
|
2892
|
+
try:
|
|
2893
|
+
async with self._task_semaphore:
|
|
2894
|
+
logger.debug(f'Acquired semaphore for task {task_id}')
|
|
2895
|
+
# Atomically claim the task before processing to prevent duplicate work
|
|
2896
|
+
claimed = await self.client.claim_task(task_id)
|
|
2897
|
+
if not claimed:
|
|
2898
|
+
logger.debug(
|
|
2899
|
+
f'Task {task_id} already claimed by another worker, skipping'
|
|
2900
|
+
)
|
|
2901
|
+
return
|
|
2902
|
+
try:
|
|
2903
|
+
await self.execute_task(
|
|
2904
|
+
task,
|
|
2905
|
+
codebases,
|
|
2906
|
+
global_codebase_id,
|
|
2907
|
+
register_codebase_fn,
|
|
2908
|
+
)
|
|
2909
|
+
finally:
|
|
2910
|
+
# Release the claim when done (success or failure)
|
|
2911
|
+
await self.client.release_task(task_id)
|
|
2912
|
+
finally:
|
|
2913
|
+
self._active_task_ids.discard(task_id)
|
|
2914
|
+
|
|
2915
|
+
def is_task_active(self, task_id: str) -> bool:
|
|
2916
|
+
"""Check if a task is currently being processed."""
|
|
2917
|
+
return task_id in self._active_task_ids
|
|
2918
|
+
|
|
2919
|
+
async def execute_task(
|
|
2920
|
+
self,
|
|
2921
|
+
task: Dict[str, Any],
|
|
2922
|
+
codebases: Dict[str, LocalCodebase],
|
|
2923
|
+
global_codebase_id: Optional[str],
|
|
2924
|
+
register_codebase_fn: Callable,
|
|
2925
|
+
):
|
|
2926
|
+
"""Execute a task using OpenCode or handle special task types."""
|
|
2927
|
+
task_id: str = task.get('id') or task.get('task_id') or ''
|
|
2928
|
+
codebase_id: str = task.get('codebase_id') or ''
|
|
2929
|
+
agent_type: str = (
|
|
2930
|
+
task.get('agent_type', AgentType.BUILD) or AgentType.BUILD
|
|
2931
|
+
)
|
|
2932
|
+
|
|
2933
|
+
if not task_id:
|
|
2934
|
+
logger.error('Task has no ID, cannot execute')
|
|
2935
|
+
return
|
|
2936
|
+
|
|
2937
|
+
# Handle special task types
|
|
2938
|
+
if agent_type == AgentType.REGISTER_CODEBASE:
|
|
2939
|
+
await self.handle_register_codebase_task(task, register_codebase_fn)
|
|
2940
|
+
return
|
|
2941
|
+
|
|
2942
|
+
# Lightweight test/utility agent types that do not require OpenCode.
|
|
2943
|
+
# Useful for end-to-end validation of the CodeTether task queue.
|
|
2944
|
+
if agent_type in (AgentType.ECHO, AgentType.NOOP):
|
|
2945
|
+
title = task.get('title')
|
|
2946
|
+
logger.info(
|
|
2947
|
+
f'Executing lightweight task {task_id}: {title} (agent_type={agent_type})'
|
|
2948
|
+
)
|
|
2949
|
+
|
|
2950
|
+
await self.client.update_task_status(task_id, TaskStatus.RUNNING)
|
|
2951
|
+
try:
|
|
2952
|
+
if agent_type == AgentType.NOOP:
|
|
2953
|
+
result = 'ok'
|
|
2954
|
+
else:
|
|
2955
|
+
# Echo returns the prompt/description verbatim.
|
|
2956
|
+
result = task.get('prompt', task.get('description', ''))
|
|
2957
|
+
|
|
2958
|
+
await self.client.update_task_status(
|
|
2959
|
+
task_id, TaskStatus.COMPLETED, result=result
|
|
2960
|
+
)
|
|
2961
|
+
logger.info(
|
|
2962
|
+
f'Task {task_id} completed successfully (agent_type={agent_type})'
|
|
2963
|
+
)
|
|
2964
|
+
except Exception as e:
|
|
2965
|
+
logger.error(
|
|
2966
|
+
f'Task {task_id} execution error (agent_type={agent_type}): {e}'
|
|
2967
|
+
)
|
|
2968
|
+
await self.client.update_task_status(
|
|
2969
|
+
task_id, TaskStatus.FAILED, error=str(e)
|
|
2970
|
+
)
|
|
2971
|
+
return
|
|
2972
|
+
|
|
2973
|
+
# Regular task - requires existing codebase
|
|
2974
|
+
# Handle special 'global' codebase_id from MCP/UI clients
|
|
2975
|
+
effective_codebase_id = codebase_id
|
|
2976
|
+
if codebase_id == SpecialCodebaseId.GLOBAL:
|
|
2977
|
+
if not global_codebase_id:
|
|
2978
|
+
logger.error(
|
|
2979
|
+
f'Cannot process global task {task_id}: worker has no global codebase registered'
|
|
2980
|
+
)
|
|
2981
|
+
return
|
|
2982
|
+
effective_codebase_id = global_codebase_id
|
|
2983
|
+
|
|
2984
|
+
codebase = codebases.get(effective_codebase_id)
|
|
2985
|
+
|
|
2986
|
+
if not codebase:
|
|
2987
|
+
logger.error(f'Codebase {codebase_id} not found for task {task_id}')
|
|
2988
|
+
return
|
|
2989
|
+
|
|
2990
|
+
# -------------------------------------------------------------------------
|
|
2991
|
+
# Defense-in-depth: verify codebase path exists on disk before executing
|
|
2992
|
+
# This catches stale registrations, mount issues, or path misconfigurations
|
|
2993
|
+
# -------------------------------------------------------------------------
|
|
2994
|
+
codebase_path = Path(codebase.path)
|
|
2995
|
+
if not codebase_path.exists():
|
|
2996
|
+
error_msg = (
|
|
2997
|
+
f'Codebase path does not exist on disk: {codebase.path} '
|
|
2998
|
+
f'(codebase_id={effective_codebase_id}, task_id={task_id})'
|
|
2999
|
+
)
|
|
3000
|
+
logger.error(error_msg)
|
|
3001
|
+
await self.client.update_task_status(
|
|
3002
|
+
task_id, TaskStatus.FAILED, error=error_msg
|
|
3003
|
+
)
|
|
3004
|
+
return
|
|
3005
|
+
|
|
3006
|
+
if not codebase_path.is_dir():
|
|
3007
|
+
error_msg = (
|
|
3008
|
+
f'Codebase path is not a directory: {codebase.path} '
|
|
3009
|
+
f'(codebase_id={effective_codebase_id}, task_id={task_id})'
|
|
3010
|
+
)
|
|
3011
|
+
logger.error(error_msg)
|
|
3012
|
+
await self.client.update_task_status(
|
|
3013
|
+
task_id, TaskStatus.FAILED, error=error_msg
|
|
3014
|
+
)
|
|
3015
|
+
return
|
|
3016
|
+
|
|
3017
|
+
logger.info(f'Executing task {task_id}: {task.get("title")}')
|
|
3018
|
+
|
|
3019
|
+
# Claim the task
|
|
3020
|
+
await self.client.update_task_status(task_id, TaskStatus.RUNNING)
|
|
3021
|
+
|
|
3022
|
+
start_time = time.time()
|
|
3023
|
+
try:
|
|
3024
|
+
# Build the prompt
|
|
3025
|
+
prompt = task.get('prompt', task.get('description', ''))
|
|
3026
|
+
metadata = task.get('metadata', {})
|
|
3027
|
+
model = metadata.get(
|
|
3028
|
+
'model'
|
|
3029
|
+
) # e.g., "anthropic/claude-sonnet-4-20250514"
|
|
3030
|
+
resume_session_id = metadata.get(
|
|
3031
|
+
'resume_session_id'
|
|
3032
|
+
) # Session to resume
|
|
3033
|
+
|
|
3034
|
+
# Auto-compaction: If resuming a session with large context,
|
|
3035
|
+
# generate a summary and prepend it to the prompt
|
|
3036
|
+
auto_summarize = metadata.get('auto_summarize', True)
|
|
3037
|
+
if resume_session_id and auto_summarize:
|
|
3038
|
+
try:
|
|
3039
|
+
enhanced_prompt = (
|
|
3040
|
+
await self.compaction_service.prepare_task_context(
|
|
3041
|
+
prompt=prompt,
|
|
3042
|
+
resume_session_id=resume_session_id,
|
|
3043
|
+
codebase_path=codebase.path,
|
|
3044
|
+
auto_summarize=True,
|
|
3045
|
+
)
|
|
3046
|
+
)
|
|
3047
|
+
if enhanced_prompt != prompt:
|
|
3048
|
+
logger.info(
|
|
3049
|
+
f'Task {task_id}: Added session summary for handoff'
|
|
3050
|
+
)
|
|
3051
|
+
prompt = enhanced_prompt
|
|
3052
|
+
except Exception as e:
|
|
3053
|
+
logger.warning(
|
|
3054
|
+
f'Auto-summarization failed for task {task_id}: {e}'
|
|
3055
|
+
)
|
|
3056
|
+
# Continue with original prompt
|
|
3057
|
+
|
|
3058
|
+
# Run OpenCode
|
|
3059
|
+
result = await self.run_opencode(
|
|
3060
|
+
codebase_id=codebase_id,
|
|
3061
|
+
codebase_path=codebase.path,
|
|
3062
|
+
prompt=prompt,
|
|
3063
|
+
agent_type=agent_type,
|
|
3064
|
+
task_id=task_id,
|
|
3065
|
+
model=model,
|
|
3066
|
+
session_id=resume_session_id,
|
|
3067
|
+
)
|
|
3068
|
+
|
|
3069
|
+
# Calculate duration
|
|
3070
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
3071
|
+
|
|
3072
|
+
if result['success']:
|
|
3073
|
+
await self.client.update_task_status(
|
|
3074
|
+
task_id,
|
|
3075
|
+
TaskStatus.COMPLETED,
|
|
3076
|
+
result=result.get('output', 'Task completed successfully'),
|
|
3077
|
+
)
|
|
3078
|
+
logger.info(f'Task {task_id} completed successfully')
|
|
3079
|
+
|
|
3080
|
+
# Send email notification
|
|
3081
|
+
if self.email_service:
|
|
3082
|
+
await self.email_service.send_task_report(
|
|
3083
|
+
task_id=task_id,
|
|
3084
|
+
title=task.get('title', 'Untitled'),
|
|
3085
|
+
status='completed',
|
|
3086
|
+
result=result.get('output'),
|
|
3087
|
+
duration_ms=duration_ms,
|
|
3088
|
+
session_id=resume_session_id,
|
|
3089
|
+
codebase_id=codebase_id,
|
|
3090
|
+
)
|
|
3091
|
+
else:
|
|
3092
|
+
error_msg = result.get('error', 'Unknown error')
|
|
3093
|
+
await self.client.update_task_status(
|
|
3094
|
+
task_id,
|
|
3095
|
+
TaskStatus.FAILED,
|
|
3096
|
+
error=error_msg,
|
|
3097
|
+
)
|
|
3098
|
+
logger.error(f'Task {task_id} failed: {error_msg}')
|
|
3099
|
+
|
|
3100
|
+
# Send email notification
|
|
3101
|
+
if self.email_service:
|
|
3102
|
+
await self.email_service.send_task_report(
|
|
3103
|
+
task_id=task_id,
|
|
3104
|
+
title=task.get('title', 'Untitled'),
|
|
3105
|
+
status='failed',
|
|
3106
|
+
error=error_msg,
|
|
3107
|
+
duration_ms=duration_ms,
|
|
3108
|
+
session_id=resume_session_id,
|
|
3109
|
+
codebase_id=codebase_id,
|
|
3110
|
+
)
|
|
3111
|
+
|
|
3112
|
+
except Exception as e:
|
|
3113
|
+
logger.error(f'Task {task_id} execution error: {e}')
|
|
3114
|
+
await self.client.update_task_status(
|
|
3115
|
+
task_id, TaskStatus.FAILED, error=str(e)
|
|
3116
|
+
)
|
|
3117
|
+
|
|
3118
|
+
# Send email notification for exception
|
|
3119
|
+
if self.email_service:
|
|
3120
|
+
await self.email_service.send_task_report(
|
|
3121
|
+
task_id=task_id,
|
|
3122
|
+
title=task.get('title', 'Untitled'),
|
|
3123
|
+
status='failed',
|
|
3124
|
+
error=str(e),
|
|
3125
|
+
codebase_id=codebase_id,
|
|
3126
|
+
)
|
|
3127
|
+
|
|
3128
|
+
async def handle_register_codebase_task(
|
|
3129
|
+
self,
|
|
3130
|
+
task: Dict[str, Any],
|
|
3131
|
+
register_codebase_fn: Callable,
|
|
3132
|
+
):
|
|
3133
|
+
"""
|
|
3134
|
+
Handle a codebase registration task from the server.
|
|
3135
|
+
|
|
3136
|
+
This validates the path exists locally and registers the codebase
|
|
3137
|
+
with this worker's ID.
|
|
3138
|
+
"""
|
|
3139
|
+
task_id: str = task.get('id') or task.get('task_id') or ''
|
|
3140
|
+
metadata: Dict[str, Any] = task.get('metadata', {}) or {}
|
|
3141
|
+
|
|
3142
|
+
name = metadata.get('name', 'Unknown')
|
|
3143
|
+
path = metadata.get('path')
|
|
3144
|
+
description = metadata.get('description', '')
|
|
3145
|
+
|
|
3146
|
+
logger.info(f'Handling registration task {task_id}: {name} at {path}')
|
|
3147
|
+
|
|
3148
|
+
# Claim the task
|
|
3149
|
+
await self.client.update_task_status(task_id, TaskStatus.RUNNING)
|
|
3150
|
+
|
|
3151
|
+
try:
|
|
3152
|
+
# Validate path exists locally on this worker
|
|
3153
|
+
if not path:
|
|
3154
|
+
await self.client.update_task_status(
|
|
3155
|
+
task_id,
|
|
3156
|
+
TaskStatus.FAILED,
|
|
3157
|
+
error='No path provided in registration task',
|
|
3158
|
+
)
|
|
3159
|
+
return
|
|
3160
|
+
|
|
3161
|
+
if not os.path.isdir(path):
|
|
3162
|
+
await self.client.update_task_status(
|
|
3163
|
+
task_id,
|
|
3164
|
+
TaskStatus.FAILED,
|
|
3165
|
+
error=f'Path does not exist on this worker: {path}',
|
|
3166
|
+
)
|
|
3167
|
+
logger.warning(f'Registration failed - path not found: {path}')
|
|
3168
|
+
return
|
|
3169
|
+
|
|
3170
|
+
# Path exists! Register it with the server (with our worker_id)
|
|
3171
|
+
codebase_id = await register_codebase_fn(
|
|
3172
|
+
name=name,
|
|
3173
|
+
path=path,
|
|
3174
|
+
description=description,
|
|
3175
|
+
)
|
|
3176
|
+
|
|
3177
|
+
if codebase_id:
|
|
3178
|
+
await self.client.update_task_status(
|
|
3179
|
+
task_id,
|
|
3180
|
+
TaskStatus.COMPLETED,
|
|
3181
|
+
result=f'Codebase registered successfully with ID: {codebase_id}',
|
|
3182
|
+
)
|
|
3183
|
+
logger.info(
|
|
3184
|
+
f'Registration task {task_id} completed: {name} -> {codebase_id}'
|
|
3185
|
+
)
|
|
3186
|
+
else:
|
|
3187
|
+
await self.client.update_task_status(
|
|
3188
|
+
task_id,
|
|
3189
|
+
TaskStatus.FAILED,
|
|
3190
|
+
error='Failed to register codebase with server',
|
|
3191
|
+
)
|
|
3192
|
+
|
|
3193
|
+
except Exception as e:
|
|
3194
|
+
logger.error(f'Registration task {task_id} error: {e}')
|
|
3195
|
+
await self.client.update_task_status(
|
|
3196
|
+
task_id, TaskStatus.FAILED, error=str(e)
|
|
3197
|
+
)
|
|
3198
|
+
|
|
3199
|
+
async def run_opencode(
|
|
3200
|
+
self,
|
|
3201
|
+
codebase_id: str,
|
|
3202
|
+
codebase_path: str,
|
|
3203
|
+
prompt: str,
|
|
3204
|
+
agent_type: str = 'build',
|
|
3205
|
+
task_id: Optional[str] = None,
|
|
3206
|
+
model: Optional[str] = None,
|
|
3207
|
+
session_id: Optional[str] = None,
|
|
3208
|
+
) -> Dict[str, Any]:
|
|
3209
|
+
"""Run OpenCode agent on a codebase."""
|
|
3210
|
+
|
|
3211
|
+
def _extract_session_id(obj: Any) -> Optional[str]:
|
|
3212
|
+
"""Best-effort extraction of an OpenCode session id from JSON output."""
|
|
3213
|
+
if isinstance(obj, dict):
|
|
3214
|
+
for k in ('sessionID', 'session_id', 'sessionId', 'session'):
|
|
3215
|
+
v = obj.get(k)
|
|
3216
|
+
if isinstance(v, str) and v.startswith('ses_'):
|
|
3217
|
+
return v
|
|
3218
|
+
for v in obj.values():
|
|
3219
|
+
found = _extract_session_id(v)
|
|
3220
|
+
if found:
|
|
3221
|
+
return found
|
|
3222
|
+
elif isinstance(obj, list):
|
|
3223
|
+
for v in obj:
|
|
3224
|
+
found = _extract_session_id(v)
|
|
3225
|
+
if found:
|
|
3226
|
+
return found
|
|
3227
|
+
return None
|
|
3228
|
+
|
|
3229
|
+
async def _sync_session_messages_once(
|
|
3230
|
+
*,
|
|
3231
|
+
target_session_id: str,
|
|
3232
|
+
messages: List[Dict[str, Any]],
|
|
3233
|
+
) -> bool:
|
|
3234
|
+
"""Sync messages for a single session. Returns True on HTTP 200."""
|
|
3235
|
+
return await self.client.sync_session_messages(
|
|
3236
|
+
codebase_id, target_session_id, messages
|
|
3237
|
+
)
|
|
3238
|
+
|
|
3239
|
+
def _messages_fingerprint(messages: List[Dict[str, Any]]) -> str:
|
|
3240
|
+
"""Fingerprint that detects any message update for reliable sync.
|
|
3241
|
+
|
|
3242
|
+
Computes a fingerprint based on:
|
|
3243
|
+
- Total message count
|
|
3244
|
+
- Last message ID and parts count
|
|
3245
|
+
- Sum of all parts across all messages (detects updates to any message)
|
|
3246
|
+
- Last part ID from each message (detects new parts added anywhere)
|
|
3247
|
+
|
|
3248
|
+
This ensures the worker syncs to the database every time any message
|
|
3249
|
+
is updated, not just when new messages are added.
|
|
3250
|
+
"""
|
|
3251
|
+
if not messages:
|
|
3252
|
+
return ''
|
|
3253
|
+
last = messages[-1]
|
|
3254
|
+
last_id = last.get('id') or ''
|
|
3255
|
+
last_parts = last.get('parts')
|
|
3256
|
+
last_parts_len = (
|
|
3257
|
+
len(last_parts) if isinstance(last_parts, list) else 0
|
|
3258
|
+
)
|
|
3259
|
+
# Include total message count and last created timestamp when available.
|
|
3260
|
+
created = (
|
|
3261
|
+
(last.get('time') or {})
|
|
3262
|
+
if isinstance(last.get('time'), dict)
|
|
3263
|
+
else {}
|
|
3264
|
+
).get('created') or ''
|
|
3265
|
+
|
|
3266
|
+
# Sum all parts across all messages to detect any message update
|
|
3267
|
+
total_parts = 0
|
|
3268
|
+
last_part_ids = []
|
|
3269
|
+
for msg in messages:
|
|
3270
|
+
parts = msg.get('parts')
|
|
3271
|
+
if isinstance(parts, list):
|
|
3272
|
+
total_parts += len(parts)
|
|
3273
|
+
# Track the last part ID from each message to detect new parts
|
|
3274
|
+
if parts:
|
|
3275
|
+
last_part = parts[-1]
|
|
3276
|
+
if isinstance(last_part, dict):
|
|
3277
|
+
last_part_ids.append(last_part.get('id') or '')
|
|
3278
|
+
|
|
3279
|
+
# Include hash of last part IDs to detect updates within messages
|
|
3280
|
+
part_ids_hash = (
|
|
3281
|
+
hash(tuple(last_part_ids)) & 0xFFFFFFFF
|
|
3282
|
+
) # 32-bit hash
|
|
3283
|
+
|
|
3284
|
+
return f'{len(messages)}|{last_id}|{last_parts_len}|{created}|{total_parts}|{part_ids_hash}'
|
|
3285
|
+
|
|
3286
|
+
async def _infer_active_session_id(
|
|
3287
|
+
*,
|
|
3288
|
+
known_before: set[str],
|
|
3289
|
+
start_epoch_s: float,
|
|
3290
|
+
) -> Optional[str]:
|
|
3291
|
+
"""Infer the active session by looking for the most recently updated session."""
|
|
3292
|
+
try:
|
|
3293
|
+
sessions = self.session_sync.get_sessions_for_codebase(
|
|
3294
|
+
codebase_path
|
|
3295
|
+
)
|
|
3296
|
+
if not sessions:
|
|
3297
|
+
return None
|
|
3298
|
+
top = sessions[0]
|
|
3299
|
+
sid = top.get('id')
|
|
3300
|
+
if not isinstance(sid, str) or not sid:
|
|
3301
|
+
return None
|
|
3302
|
+
|
|
3303
|
+
# Prefer brand-new sessions.
|
|
3304
|
+
if sid not in known_before:
|
|
3305
|
+
return sid
|
|
3306
|
+
|
|
3307
|
+
# Or sessions updated after the task started.
|
|
3308
|
+
updated = top.get('updated')
|
|
3309
|
+
if isinstance(updated, str) and updated:
|
|
3310
|
+
try:
|
|
3311
|
+
# worker writes naive isoformat; treat as local time.
|
|
3312
|
+
updated_dt = datetime.fromisoformat(updated)
|
|
3313
|
+
if updated_dt.timestamp() >= (start_epoch_s - 2.0):
|
|
3314
|
+
return sid
|
|
3315
|
+
except Exception:
|
|
3316
|
+
return sid # best-effort
|
|
3317
|
+
return sid
|
|
3318
|
+
except Exception:
|
|
3319
|
+
return None
|
|
3320
|
+
|
|
3321
|
+
def _recent_opencode_log_hint(returncode: int) -> Optional[str]:
|
|
3322
|
+
"""Best-effort hint for failures where OpenCode logs to file.
|
|
3323
|
+
|
|
3324
|
+
Avoid dumping full logs into task output (can be huge / sensitive).
|
|
3325
|
+
Instead, point operators to the most recent log file and surface
|
|
3326
|
+
common actionable errors (like missing API keys).
|
|
3327
|
+
"""
|
|
3328
|
+
|
|
3329
|
+
try:
|
|
3330
|
+
data_home = os.environ.get(
|
|
3331
|
+
'XDG_DATA_HOME', str(Path.home() / '.local' / 'share')
|
|
3332
|
+
)
|
|
3333
|
+
log_dir = (
|
|
3334
|
+
Path(os.path.expanduser(data_home)) / 'opencode' / 'log'
|
|
3335
|
+
)
|
|
3336
|
+
if not log_dir.exists() or not log_dir.is_dir():
|
|
3337
|
+
return None
|
|
3338
|
+
|
|
3339
|
+
logs = list(log_dir.glob('*.log'))
|
|
3340
|
+
if not logs:
|
|
3341
|
+
return None
|
|
3342
|
+
|
|
3343
|
+
latest = max(logs, key=lambda p: p.stat().st_mtime)
|
|
3344
|
+
age_s = time.time() - latest.stat().st_mtime
|
|
3345
|
+
if age_s > 300: # don't point at stale logs
|
|
3346
|
+
return None
|
|
3347
|
+
|
|
3348
|
+
try:
|
|
3349
|
+
tail_lines = latest.read_text(
|
|
3350
|
+
encoding='utf-8', errors='replace'
|
|
3351
|
+
).splitlines()[-80:]
|
|
3352
|
+
except Exception:
|
|
3353
|
+
tail_lines = []
|
|
3354
|
+
|
|
3355
|
+
tail_text = '\n'.join(tail_lines)
|
|
3356
|
+
if (
|
|
3357
|
+
'API key is missing' in tail_text
|
|
3358
|
+
or 'AI_LoadAPIKeyError' in tail_text
|
|
3359
|
+
):
|
|
3360
|
+
return (
|
|
3361
|
+
'OpenCode is missing LLM credentials (e.g. ANTHROPIC_API_KEY). '
|
|
3362
|
+
'Set the required key(s) in /etc/a2a-worker/env and restart the worker. '
|
|
3363
|
+
f'OpenCode log: {latest}'
|
|
3364
|
+
)
|
|
3365
|
+
|
|
3366
|
+
return f'OpenCode exited with code {returncode}. See OpenCode log: {latest}'
|
|
3367
|
+
except Exception:
|
|
3368
|
+
return None
|
|
3369
|
+
|
|
3370
|
+
# Check if opencode exists
|
|
3371
|
+
if not Path(self.opencode_bin).exists():
|
|
3372
|
+
return {
|
|
3373
|
+
'success': False,
|
|
3374
|
+
'error': f'OpenCode not found at {self.opencode_bin}',
|
|
3375
|
+
}
|
|
3376
|
+
|
|
3377
|
+
# Build command using 'opencode run' with proper flags
|
|
3378
|
+
cmd = [
|
|
3379
|
+
self.opencode_bin,
|
|
3380
|
+
'run',
|
|
3381
|
+
'--agent',
|
|
3382
|
+
agent_type,
|
|
3383
|
+
'--format',
|
|
3384
|
+
'json',
|
|
3385
|
+
]
|
|
3386
|
+
|
|
3387
|
+
# Add model if specified (format: provider/model)
|
|
3388
|
+
if model:
|
|
3389
|
+
cmd.extend(['--model', model])
|
|
3390
|
+
|
|
3391
|
+
# Add session resumption if specified
|
|
3392
|
+
if session_id:
|
|
3393
|
+
cmd.extend(['--session', session_id])
|
|
3394
|
+
logger.info(f'Resuming session: {session_id}')
|
|
3395
|
+
|
|
3396
|
+
# Add '--' separator and then the prompt as positional message argument
|
|
3397
|
+
# This ensures the prompt isn't interpreted as a flag
|
|
3398
|
+
if prompt:
|
|
3399
|
+
cmd.append('--')
|
|
3400
|
+
cmd.append(prompt)
|
|
3401
|
+
|
|
3402
|
+
log_model = f' --model {model}' if model else ''
|
|
3403
|
+
log_session = f' --session {session_id}' if session_id else ''
|
|
3404
|
+
logger.info(
|
|
3405
|
+
f'Running: {self.opencode_bin} run --agent {agent_type}{log_model}{log_session} ...'
|
|
3406
|
+
)
|
|
3407
|
+
|
|
3408
|
+
try:
|
|
3409
|
+
start_epoch_s = time.time()
|
|
3410
|
+
known_sessions_before: set[str] = set()
|
|
3411
|
+
if not session_id:
|
|
3412
|
+
try:
|
|
3413
|
+
known_sessions_before = {
|
|
3414
|
+
str(s.get('id'))
|
|
3415
|
+
for s in self.session_sync.get_sessions_for_codebase(
|
|
3416
|
+
codebase_path
|
|
3417
|
+
)
|
|
3418
|
+
if s.get('id')
|
|
3419
|
+
}
|
|
3420
|
+
except Exception as e:
|
|
3421
|
+
logger.debug(
|
|
3422
|
+
f'Failed to get existing sessions before task start: {e}'
|
|
3423
|
+
)
|
|
3424
|
+
known_sessions_before = set()
|
|
3425
|
+
|
|
3426
|
+
active_session_id: Optional[str] = session_id
|
|
3427
|
+
|
|
3428
|
+
# Run the process using async subprocess to avoid blocking the event loop
|
|
3429
|
+
# Use a large buffer limit (16MB) to handle OpenCode's potentially very long
|
|
3430
|
+
# JSON output lines (e.g., file contents, large tool results). The default
|
|
3431
|
+
# 64KB limit causes "Separator is found, but chunk is longer than limit" errors.
|
|
3432
|
+
subprocess_limit = 16 * 1024 * 1024 # 16MB
|
|
3433
|
+
process = await asyncio.create_subprocess_exec(
|
|
3434
|
+
*cmd,
|
|
3435
|
+
cwd=codebase_path,
|
|
3436
|
+
stdin=asyncio.subprocess.DEVNULL,
|
|
3437
|
+
stdout=asyncio.subprocess.PIPE,
|
|
3438
|
+
stderr=asyncio.subprocess.PIPE,
|
|
3439
|
+
env={**os.environ, 'NO_COLOR': '1'},
|
|
3440
|
+
limit=subprocess_limit,
|
|
3441
|
+
)
|
|
3442
|
+
|
|
3443
|
+
if task_id:
|
|
3444
|
+
self.active_processes[task_id] = process
|
|
3445
|
+
|
|
3446
|
+
# Eagerly sync the *active* session messages while the task runs.
|
|
3447
|
+
eager_sync_interval = 1.0
|
|
3448
|
+
try:
|
|
3449
|
+
eager_sync_interval = float(
|
|
3450
|
+
os.environ.get(
|
|
3451
|
+
'A2A_ACTIVE_SESSION_SYNC_INTERVAL', eager_sync_interval
|
|
3452
|
+
)
|
|
3453
|
+
)
|
|
3454
|
+
except Exception:
|
|
3455
|
+
eager_sync_interval = 1.0
|
|
3456
|
+
|
|
3457
|
+
max_messages = (
|
|
3458
|
+
getattr(self.config, 'session_message_sync_max_messages', 0)
|
|
3459
|
+
or 0
|
|
3460
|
+
)
|
|
3461
|
+
if max_messages <= 0:
|
|
3462
|
+
max_messages = 100
|
|
3463
|
+
|
|
3464
|
+
async def _eager_sync_loop():
|
|
3465
|
+
nonlocal active_session_id
|
|
3466
|
+
last_fp: Optional[str] = None
|
|
3467
|
+
session_attached = False
|
|
3468
|
+
|
|
3469
|
+
while process.returncode is None:
|
|
3470
|
+
# Discover session id if needed.
|
|
3471
|
+
if not active_session_id:
|
|
3472
|
+
active_session_id = await _infer_active_session_id(
|
|
3473
|
+
known_before=known_sessions_before,
|
|
3474
|
+
start_epoch_s=start_epoch_s,
|
|
3475
|
+
)
|
|
3476
|
+
|
|
3477
|
+
if active_session_id and task_id and not session_attached:
|
|
3478
|
+
# Attach the session id to the running task so UIs can deep-link.
|
|
3479
|
+
await self.client.update_task_status(
|
|
3480
|
+
task_id,
|
|
3481
|
+
TaskStatus.RUNNING,
|
|
3482
|
+
session_id=active_session_id,
|
|
3483
|
+
)
|
|
3484
|
+
session_attached = True
|
|
3485
|
+
|
|
3486
|
+
if active_session_id:
|
|
3487
|
+
# Sync whenever the message fingerprint changes (any message update).
|
|
3488
|
+
try:
|
|
3489
|
+
current_messages = (
|
|
3490
|
+
self.session_sync.get_session_messages(
|
|
3491
|
+
str(active_session_id),
|
|
3492
|
+
max_messages=max_messages,
|
|
3493
|
+
)
|
|
3494
|
+
)
|
|
3495
|
+
fp = _messages_fingerprint(current_messages)
|
|
3496
|
+
if fp and fp != last_fp:
|
|
3497
|
+
ok = await _sync_session_messages_once(
|
|
3498
|
+
target_session_id=str(active_session_id),
|
|
3499
|
+
messages=current_messages,
|
|
3500
|
+
)
|
|
3501
|
+
if ok:
|
|
3502
|
+
last_fp = fp
|
|
3503
|
+
logger.debug(
|
|
3504
|
+
f'Synced messages for session {active_session_id} (fingerprint changed)'
|
|
3505
|
+
)
|
|
3506
|
+
except Exception as e:
|
|
3507
|
+
logger.debug(f'Eager sync loop read failed: {e}')
|
|
3508
|
+
|
|
3509
|
+
await asyncio.sleep(max(0.2, eager_sync_interval))
|
|
3510
|
+
|
|
3511
|
+
# Final flush after process ends.
|
|
3512
|
+
if active_session_id:
|
|
3513
|
+
try:
|
|
3514
|
+
final_messages = self.session_sync.get_session_messages(
|
|
3515
|
+
str(active_session_id),
|
|
3516
|
+
max_messages=max_messages,
|
|
3517
|
+
)
|
|
3518
|
+
await _sync_session_messages_once(
|
|
3519
|
+
target_session_id=str(active_session_id),
|
|
3520
|
+
messages=final_messages,
|
|
3521
|
+
)
|
|
3522
|
+
except Exception as e:
|
|
3523
|
+
logger.debug(
|
|
3524
|
+
f'Final message flush failed for session {active_session_id}: {e}'
|
|
3525
|
+
)
|
|
3526
|
+
|
|
3527
|
+
eager_task: Optional[asyncio.Task] = None
|
|
3528
|
+
if task_id:
|
|
3529
|
+
eager_task = asyncio.create_task(_eager_sync_loop())
|
|
3530
|
+
|
|
3531
|
+
# Stream output in real-time using async iteration
|
|
3532
|
+
output_lines: List[str] = []
|
|
3533
|
+
stderr_lines: List[str] = []
|
|
3534
|
+
|
|
3535
|
+
async def _read_stdout():
|
|
3536
|
+
"""Read stdout lines asynchronously.
|
|
3537
|
+
|
|
3538
|
+
Uses readline() with explicit error handling for very long lines.
|
|
3539
|
+
OpenCode can produce JSON lines >64KB when including file contents.
|
|
3540
|
+
"""
|
|
3541
|
+
nonlocal active_session_id
|
|
3542
|
+
if process.stdout is None:
|
|
3543
|
+
return
|
|
3544
|
+
|
|
3545
|
+
while True:
|
|
3546
|
+
try:
|
|
3547
|
+
# Read line with the increased buffer limit
|
|
3548
|
+
line_bytes = await process.stdout.readline()
|
|
3549
|
+
if not line_bytes:
|
|
3550
|
+
break # EOF
|
|
3551
|
+
|
|
3552
|
+
line = line_bytes.decode('utf-8', errors='replace')
|
|
3553
|
+
output_lines.append(line)
|
|
3554
|
+
|
|
3555
|
+
# Try to detect session id from OpenCode JSON output.
|
|
3556
|
+
if not active_session_id:
|
|
3557
|
+
try:
|
|
3558
|
+
obj = json.loads(line)
|
|
3559
|
+
active_session_id = (
|
|
3560
|
+
_extract_session_id(obj)
|
|
3561
|
+
or active_session_id
|
|
3562
|
+
)
|
|
3563
|
+
except json.JSONDecodeError:
|
|
3564
|
+
pass # Not JSON, skip session extraction
|
|
3565
|
+
|
|
3566
|
+
# Stream output to server (truncate very long lines to prevent issues)
|
|
3567
|
+
if task_id:
|
|
3568
|
+
# Truncate output for streaming to prevent overwhelming the server
|
|
3569
|
+
stream_line = line.strip()
|
|
3570
|
+
if len(stream_line) > 10000:
|
|
3571
|
+
stream_line = (
|
|
3572
|
+
stream_line[:10000] + '... [truncated]'
|
|
3573
|
+
)
|
|
3574
|
+
await self.client.stream_task_output(
|
|
3575
|
+
task_id, stream_line
|
|
3576
|
+
)
|
|
3577
|
+
|
|
3578
|
+
except ValueError as e:
|
|
3579
|
+
# Handle "Separator is found, but chunk is longer than limit"
|
|
3580
|
+
# by reading raw bytes and chunking
|
|
3581
|
+
logger.warning(
|
|
3582
|
+
f'Line too long for readline, reading raw: {e}'
|
|
3583
|
+
)
|
|
3584
|
+
try:
|
|
3585
|
+
raw_chunk = await process.stdout.read(
|
|
3586
|
+
1024 * 1024
|
|
3587
|
+
) # 1MB chunk
|
|
3588
|
+
if raw_chunk:
|
|
3589
|
+
line = raw_chunk.decode(
|
|
3590
|
+
'utf-8', errors='replace'
|
|
3591
|
+
)
|
|
3592
|
+
output_lines.append(line)
|
|
3593
|
+
else:
|
|
3594
|
+
break # EOF
|
|
3595
|
+
except Exception as chunk_err:
|
|
3596
|
+
logger.error(
|
|
3597
|
+
f'Failed to read raw chunk: {chunk_err}'
|
|
3598
|
+
)
|
|
3599
|
+
break
|
|
3600
|
+
|
|
3601
|
+
async def _read_stderr():
|
|
3602
|
+
"""Read stderr lines asynchronously with error handling for long lines."""
|
|
3603
|
+
if process.stderr is None:
|
|
3604
|
+
return
|
|
3605
|
+
|
|
3606
|
+
while True:
|
|
3607
|
+
try:
|
|
3608
|
+
line_bytes = await process.stderr.readline()
|
|
3609
|
+
if not line_bytes:
|
|
3610
|
+
break # EOF
|
|
3611
|
+
|
|
3612
|
+
line = line_bytes.decode('utf-8', errors='replace')
|
|
3613
|
+
stderr_lines.append(line)
|
|
3614
|
+
if task_id:
|
|
3615
|
+
# Truncate very long stderr lines
|
|
3616
|
+
stream_line = line.strip()
|
|
3617
|
+
if len(stream_line) > 10000:
|
|
3618
|
+
stream_line = (
|
|
3619
|
+
stream_line[:10000] + '... [truncated]'
|
|
3620
|
+
)
|
|
3621
|
+
await self.client.stream_task_output(
|
|
3622
|
+
task_id, f'[stderr] {stream_line}'
|
|
3623
|
+
)
|
|
3624
|
+
except ValueError:
|
|
3625
|
+
# Handle very long lines by reading raw
|
|
3626
|
+
try:
|
|
3627
|
+
raw_chunk = await process.stderr.read(1024 * 1024)
|
|
3628
|
+
if raw_chunk:
|
|
3629
|
+
stderr_lines.append(
|
|
3630
|
+
raw_chunk.decode('utf-8', errors='replace')
|
|
3631
|
+
)
|
|
3632
|
+
else:
|
|
3633
|
+
break
|
|
3634
|
+
except Exception:
|
|
3635
|
+
break
|
|
3636
|
+
|
|
3637
|
+
try:
|
|
3638
|
+
# Read stdout and stderr concurrently
|
|
3639
|
+
await asyncio.gather(_read_stdout(), _read_stderr())
|
|
3640
|
+
|
|
3641
|
+
# Wait for process to complete
|
|
3642
|
+
await process.wait()
|
|
3643
|
+
|
|
3644
|
+
stdout = ''.join(output_lines)
|
|
3645
|
+
stderr = ''.join(stderr_lines)
|
|
3646
|
+
|
|
3647
|
+
except asyncio.CancelledError:
|
|
3648
|
+
process.kill()
|
|
3649
|
+
await process.wait()
|
|
3650
|
+
stdout = ''.join(output_lines)
|
|
3651
|
+
stderr = ''.join(stderr_lines)
|
|
3652
|
+
return {
|
|
3653
|
+
'success': False,
|
|
3654
|
+
'error': 'Task was cancelled',
|
|
3655
|
+
}
|
|
3656
|
+
finally:
|
|
3657
|
+
if task_id and task_id in self.active_processes:
|
|
3658
|
+
del self.active_processes[task_id]
|
|
3659
|
+
if task_id and eager_task is not None:
|
|
3660
|
+
try:
|
|
3661
|
+
eager_task.cancel()
|
|
3662
|
+
await eager_task
|
|
3663
|
+
except asyncio.CancelledError:
|
|
3664
|
+
pass # Expected when cancelling
|
|
3665
|
+
except Exception as e:
|
|
3666
|
+
logger.debug(
|
|
3667
|
+
f'Error awaiting cancelled eager sync task: {e}'
|
|
3668
|
+
)
|
|
3669
|
+
|
|
3670
|
+
returncode = process.returncode or 0
|
|
3671
|
+
if returncode == 0:
|
|
3672
|
+
return {'success': True, 'output': stdout}
|
|
3673
|
+
else:
|
|
3674
|
+
hint = _recent_opencode_log_hint(returncode)
|
|
3675
|
+
err = (stderr or '').strip()
|
|
3676
|
+
return {
|
|
3677
|
+
'success': False,
|
|
3678
|
+
'error': err or hint or f'Exit code: {returncode}',
|
|
3679
|
+
}
|
|
3680
|
+
|
|
3681
|
+
except Exception as e:
|
|
3682
|
+
return {'success': False, 'error': str(e)}
|
|
3683
|
+
|
|
3684
|
+
|
|
3685
|
+
# =============================================================================
|
|
3686
|
+
# AgentWorker - Thin orchestrator composing all services
|
|
3687
|
+
# =============================================================================
|
|
3688
|
+
|
|
3689
|
+
|
|
3690
|
+
class AgentWorker:
|
|
3691
|
+
"""
|
|
3692
|
+
Agent worker that connects to A2A server and executes tasks locally.
|
|
3693
|
+
|
|
3694
|
+
Uses SSE (Server-Sent Events) for real-time task streaming instead of polling.
|
|
3695
|
+
This class acts as a thin orchestrator that composes the following services:
|
|
3696
|
+
- WorkerClient: HTTP/SSE communication
|
|
3697
|
+
- ConfigManager: Configuration and setup
|
|
3698
|
+
- SessionSyncService: Session management and syncing
|
|
3699
|
+
- TaskExecutor: Task execution logic
|
|
3700
|
+
"""
|
|
3701
|
+
|
|
3702
|
+
def __init__(self, config: WorkerConfig):
|
|
3703
|
+
self.config = config
|
|
3704
|
+
self.codebases: Dict[str, LocalCodebase] = {}
|
|
3705
|
+
self.running = False
|
|
3706
|
+
self._global_codebase_id: Optional[str] = (
|
|
3707
|
+
None # Cached ID for global sessions codebase
|
|
3708
|
+
)
|
|
3709
|
+
# Track tasks we've seen to avoid duplicates (LRU cache with max size)
|
|
3710
|
+
self._known_task_ids: OrderedDict[str, None] = OrderedDict()
|
|
3711
|
+
self._known_task_ids_max_size: int = 10000
|
|
3712
|
+
|
|
3713
|
+
# Initialize services
|
|
3714
|
+
self.client = WorkerClient(config)
|
|
3715
|
+
self.config_manager = ConfigManager(config)
|
|
3716
|
+
self.opencode_bin = (
|
|
3717
|
+
config.opencode_bin or self.config_manager.find_opencode_binary()
|
|
3718
|
+
)
|
|
3719
|
+
self.session_sync = SessionSyncService(
|
|
3720
|
+
config, self.config_manager, self.client
|
|
3721
|
+
)
|
|
3722
|
+
# Initialize email service if configured
|
|
3723
|
+
self.email_service = EmailNotificationService(config)
|
|
3724
|
+
if self.email_service.is_configured():
|
|
3725
|
+
logger.info(
|
|
3726
|
+
f'Email notifications enabled: {config.notification_email}'
|
|
3727
|
+
)
|
|
3728
|
+
else:
|
|
3729
|
+
logger.info('Email notifications not configured')
|
|
3730
|
+
|
|
3731
|
+
self.task_executor = TaskExecutor(
|
|
3732
|
+
config,
|
|
3733
|
+
self.client,
|
|
3734
|
+
self.config_manager,
|
|
3735
|
+
self.session_sync,
|
|
3736
|
+
self.opencode_bin,
|
|
3737
|
+
self.email_service if self.email_service.is_configured() else None,
|
|
3738
|
+
)
|
|
3739
|
+
|
|
3740
|
+
# -------------------------------------------------------------------------
|
|
3741
|
+
# Delegated methods for backward compatibility
|
|
3742
|
+
# -------------------------------------------------------------------------
|
|
3743
|
+
|
|
3744
|
+
async def _get_session(self) -> aiohttp.ClientSession:
|
|
3745
|
+
"""Get or create HTTP session with connection pooling."""
|
|
3746
|
+
return await self.client.get_session()
|
|
3747
|
+
|
|
3748
|
+
def _find_opencode_binary(self) -> str:
|
|
3749
|
+
"""Find the opencode binary."""
|
|
3750
|
+
return self.config_manager.find_opencode_binary()
|
|
3751
|
+
|
|
3752
|
+
def _get_authenticated_providers(self) -> set:
|
|
3753
|
+
"""Get set of provider IDs that have authentication configured."""
|
|
3754
|
+
return self.config_manager.get_authenticated_providers()
|
|
3755
|
+
|
|
3756
|
+
async def _get_available_models(self) -> List[Dict[str, Any]]:
|
|
3757
|
+
"""Fetch available models from local OpenCode instance."""
|
|
3758
|
+
return await self.config_manager.get_available_models(self.opencode_bin)
|
|
3759
|
+
|
|
3760
|
+
def _get_opencode_storage_path(self) -> Path:
|
|
3761
|
+
"""Get the OpenCode global storage path."""
|
|
3762
|
+
return self.config_manager.get_opencode_storage_path()
|
|
3763
|
+
|
|
3764
|
+
def _get_project_id_for_path(self, codebase_path: str) -> Optional[str]:
|
|
3765
|
+
"""Get the OpenCode project ID (hash) for a given codebase path."""
|
|
3766
|
+
return self.session_sync._get_project_id_for_path(codebase_path)
|
|
3767
|
+
|
|
3768
|
+
def get_sessions_for_codebase(
|
|
3769
|
+
self, codebase_path: str
|
|
3770
|
+
) -> List[Dict[str, Any]]:
|
|
3771
|
+
"""Get all OpenCode sessions for a codebase."""
|
|
3772
|
+
return self.session_sync.get_sessions_for_codebase(codebase_path)
|
|
3773
|
+
|
|
3774
|
+
def get_global_sessions(self) -> List[Dict[str, Any]]:
|
|
3775
|
+
"""Get all global OpenCode sessions (not associated with a specific project)."""
|
|
3776
|
+
return self.session_sync.get_global_sessions()
|
|
3777
|
+
|
|
3778
|
+
def get_session_messages(
|
|
3779
|
+
self, session_id: str, max_messages: Optional[int] = None
|
|
3780
|
+
) -> List[Dict[str, Any]]:
|
|
3781
|
+
"""Get messages (including parts) for a specific session from OpenCode storage."""
|
|
3782
|
+
return self.session_sync.get_session_messages(session_id, max_messages)
|
|
3783
|
+
|
|
3784
|
+
async def sync_api_keys_from_server(
|
|
3785
|
+
self, user_id: Optional[str] = None
|
|
3786
|
+
) -> bool:
|
|
3787
|
+
"""Sync API keys from the server to local OpenCode auth.json."""
|
|
3788
|
+
return await self.client.sync_api_keys_from_server(user_id)
|
|
3789
|
+
|
|
3790
|
+
async def stream_task_output(self, task_id: str, output: str):
|
|
3791
|
+
"""Stream output chunk to the server."""
|
|
3792
|
+
await self.client.stream_task_output(task_id, output)
|
|
3793
|
+
|
|
3794
|
+
async def update_task_status(
|
|
3795
|
+
self,
|
|
3796
|
+
task_id: str,
|
|
3797
|
+
status: str,
|
|
3798
|
+
result: Optional[str] = None,
|
|
3799
|
+
error: Optional[str] = None,
|
|
3800
|
+
session_id: Optional[str] = None,
|
|
3801
|
+
max_retries: int = 4,
|
|
3802
|
+
base_delay: float = 1.0,
|
|
3803
|
+
):
|
|
3804
|
+
"""Update task status on the server with exponential backoff retry."""
|
|
3805
|
+
await self.client.update_task_status(
|
|
3806
|
+
task_id, status, result, error, session_id, max_retries, base_delay
|
|
3807
|
+
)
|
|
3808
|
+
|
|
3809
|
+
async def _claim_task(self, task_id: str) -> bool:
|
|
3810
|
+
"""Atomically claim a task on the server."""
|
|
3811
|
+
return await self.client.claim_task(task_id)
|
|
3812
|
+
|
|
3813
|
+
async def _release_task(self, task_id: str) -> bool:
|
|
3814
|
+
"""Release a task claim on the server after processing."""
|
|
3815
|
+
return await self.client.release_task(task_id)
|
|
3816
|
+
|
|
3817
|
+
async def send_heartbeat(self) -> bool:
|
|
3818
|
+
"""Send heartbeat to the A2A server to indicate worker is alive."""
|
|
3819
|
+
return await self.client.send_heartbeat()
|
|
3820
|
+
|
|
3821
|
+
async def run_opencode(
|
|
3822
|
+
self,
|
|
3823
|
+
codebase_id: str,
|
|
3824
|
+
codebase_path: str,
|
|
3825
|
+
prompt: str,
|
|
3826
|
+
agent_type: str = 'build',
|
|
3827
|
+
task_id: Optional[str] = None,
|
|
3828
|
+
model: Optional[str] = None,
|
|
3829
|
+
session_id: Optional[str] = None,
|
|
3830
|
+
) -> Dict[str, Any]:
|
|
3831
|
+
"""Run OpenCode agent on a codebase."""
|
|
3832
|
+
return await self.task_executor.run_opencode(
|
|
3833
|
+
codebase_id,
|
|
3834
|
+
codebase_path,
|
|
3835
|
+
prompt,
|
|
3836
|
+
agent_type,
|
|
3837
|
+
task_id,
|
|
3838
|
+
model,
|
|
3839
|
+
session_id,
|
|
3840
|
+
)
|
|
3841
|
+
|
|
3842
|
+
async def execute_task(self, task: Dict[str, Any]):
|
|
3843
|
+
"""Execute a task using OpenCode or handle special task types."""
|
|
3844
|
+
await self.task_executor.execute_task(
|
|
3845
|
+
task,
|
|
3846
|
+
self.codebases,
|
|
3847
|
+
self._global_codebase_id,
|
|
3848
|
+
self.register_codebase,
|
|
3849
|
+
)
|
|
3850
|
+
|
|
3851
|
+
async def handle_register_codebase_task(self, task: Dict[str, Any]):
|
|
3852
|
+
"""Handle a codebase registration task from the server."""
|
|
3853
|
+
await self.task_executor.handle_register_codebase_task(
|
|
3854
|
+
task, self.register_codebase
|
|
3855
|
+
)
|
|
3856
|
+
|
|
3857
|
+
async def _process_task_with_semaphore(self, task: Dict[str, Any]):
|
|
3858
|
+
"""Process a task with bounded concurrency using semaphore."""
|
|
3859
|
+
await self.task_executor.process_task_with_semaphore(
|
|
3860
|
+
task,
|
|
3861
|
+
self.codebases,
|
|
3862
|
+
self._global_codebase_id,
|
|
3863
|
+
self.register_codebase,
|
|
3864
|
+
)
|
|
3865
|
+
|
|
3866
|
+
async def report_sessions_to_server(self):
|
|
3867
|
+
"""Report all sessions for registered codebases to the server."""
|
|
3868
|
+
result = await self.session_sync.report_sessions_to_server(
|
|
3869
|
+
self.codebases, self._global_codebase_id, self.register_codebase
|
|
3870
|
+
)
|
|
3871
|
+
# Update global_codebase_id if it was re-registered
|
|
3872
|
+
if result is not None:
|
|
3873
|
+
self._global_codebase_id = result
|
|
3874
|
+
|
|
3875
|
+
async def _report_global_sessions_to_server(self):
|
|
3876
|
+
"""Report global sessions to the server under a 'global' pseudo-codebase."""
|
|
3877
|
+
result = await self.session_sync._report_global_sessions_to_server(
|
|
3878
|
+
self._global_codebase_id, self.register_codebase
|
|
3879
|
+
)
|
|
3880
|
+
if result is not None:
|
|
3881
|
+
self._global_codebase_id = result
|
|
3882
|
+
|
|
3883
|
+
async def _report_recent_session_messages_to_server(
|
|
3884
|
+
self,
|
|
3885
|
+
codebase_id: str,
|
|
3886
|
+
sessions: List[Dict[str, Any]],
|
|
3887
|
+
max_messages: int,
|
|
3888
|
+
):
|
|
3889
|
+
"""Best-effort sync for the most recent sessions' messages."""
|
|
3890
|
+
await self.session_sync._report_recent_session_messages_to_server(
|
|
3891
|
+
codebase_id, sessions, max_messages
|
|
3892
|
+
)
|
|
3893
|
+
|
|
3894
|
+
# -------------------------------------------------------------------------
|
|
3895
|
+
# Core orchestration methods
|
|
3896
|
+
# -------------------------------------------------------------------------
|
|
3897
|
+
|
|
3898
|
+
async def start(self):
|
|
3899
|
+
"""Start the worker."""
|
|
3900
|
+
logger.info(
|
|
3901
|
+
f"Starting worker '{self.config.worker_name}' (ID: {self.config.worker_id})"
|
|
3902
|
+
)
|
|
3903
|
+
logger.info(f'Connecting to server: {self.config.server_url}')
|
|
3904
|
+
|
|
3905
|
+
# Surface OpenCode credential discovery issues early (common when running under systemd).
|
|
3906
|
+
try:
|
|
3907
|
+
data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
|
|
3908
|
+
'~/.local/share'
|
|
3909
|
+
)
|
|
3910
|
+
auth_path = (
|
|
3911
|
+
Path(os.path.expanduser(data_home)) / 'opencode' / 'auth.json'
|
|
3912
|
+
)
|
|
3913
|
+
if auth_path.exists():
|
|
3914
|
+
logger.info(f'OpenCode auth detected at: {auth_path}')
|
|
3915
|
+
else:
|
|
3916
|
+
logger.warning(
|
|
3917
|
+
'OpenCode auth.json not found for this worker. '
|
|
3918
|
+
f'Expected at: {auth_path}. '
|
|
3919
|
+
"OpenCode agents may fail with 'missing API key' unless you authenticate as this service user "
|
|
3920
|
+
"or import/copy auth.json into the worker's XDG data directory."
|
|
3921
|
+
)
|
|
3922
|
+
except Exception as e:
|
|
3923
|
+
logger.debug(f'Failed to check OpenCode auth.json presence: {e}')
|
|
3924
|
+
|
|
3925
|
+
self.running = True
|
|
3926
|
+
|
|
3927
|
+
# Initialize task semaphore for bounded concurrency
|
|
3928
|
+
self.task_executor.init_semaphore()
|
|
3929
|
+
|
|
3930
|
+
# Register global pseudo-codebase first so we can include its ID in worker registration
|
|
3931
|
+
logger.info('Registering global pseudo-codebase...')
|
|
3932
|
+
self._global_codebase_id = await self.register_codebase(
|
|
3933
|
+
name=SpecialCodebaseId.GLOBAL,
|
|
3934
|
+
path=str(Path.home()),
|
|
3935
|
+
description='Global OpenCode sessions (not project-specific)',
|
|
3936
|
+
)
|
|
3937
|
+
|
|
3938
|
+
# Register worker with server
|
|
3939
|
+
await self.register_worker()
|
|
3940
|
+
|
|
3941
|
+
# Register configured codebases
|
|
3942
|
+
for cb_config in self.config.codebases:
|
|
3943
|
+
await self.register_codebase(
|
|
3944
|
+
name=cb_config.get('name', Path(cb_config['path']).name),
|
|
3945
|
+
path=cb_config['path'],
|
|
3946
|
+
description=cb_config.get('description', ''),
|
|
3947
|
+
)
|
|
3948
|
+
|
|
3949
|
+
# Register as a discoverable agent (enables agent-to-agent communication)
|
|
3950
|
+
# This is done AFTER codebase registration so the worker is "ready"
|
|
3951
|
+
# Registration is best-effort and non-blocking
|
|
3952
|
+
if self.config.register_as_agent:
|
|
3953
|
+
logger.info('Registering as discoverable agent...')
|
|
3954
|
+
await self.client.register_as_agent(
|
|
3955
|
+
agent_name=self.config.agent_name,
|
|
3956
|
+
description=self.config.agent_description,
|
|
3957
|
+
url=self.config.agent_url,
|
|
3958
|
+
routing_capabilities=self.config.capabilities,
|
|
3959
|
+
)
|
|
3960
|
+
|
|
3961
|
+
# Sync API keys from server (allows web UI key management)
|
|
3962
|
+
logger.info('Syncing API keys from server...')
|
|
3963
|
+
await self.sync_api_keys_from_server()
|
|
3964
|
+
|
|
3965
|
+
# Immediately sync sessions on startup
|
|
3966
|
+
logger.info('Syncing sessions with server...')
|
|
3967
|
+
await self.report_sessions_to_server()
|
|
3968
|
+
|
|
3969
|
+
# Start SSE task stream with fallback to polling
|
|
3970
|
+
await self._run_with_sse_and_fallback()
|
|
3971
|
+
|
|
3972
|
+
async def stop(self):
|
|
3973
|
+
"""Stop the worker gracefully."""
|
|
3974
|
+
logger.info('Stopping worker...')
|
|
3975
|
+
self.running = False
|
|
3976
|
+
|
|
3977
|
+
# Kill any active processes
|
|
3978
|
+
await self.task_executor.terminate_all_processes()
|
|
3979
|
+
|
|
3980
|
+
# Unregister from server (best effort)
|
|
3981
|
+
try:
|
|
3982
|
+
await self.unregister_worker()
|
|
3983
|
+
except Exception as e:
|
|
3984
|
+
logger.debug(f'Failed to unregister worker during shutdown: {e}')
|
|
3985
|
+
|
|
3986
|
+
# Close sessions properly
|
|
3987
|
+
await self.client.close()
|
|
3988
|
+
await self.email_service.close()
|
|
3989
|
+
|
|
3990
|
+
logger.info('Worker stopped')
|
|
3991
|
+
|
|
3992
|
+
async def register_worker(self):
|
|
3993
|
+
"""Register this worker with the A2A server."""
|
|
3994
|
+
# Ensure global codebase is registered
|
|
3995
|
+
if not self._global_codebase_id:
|
|
3996
|
+
logger.info(
|
|
3997
|
+
'Global codebase not registered, attempting registration...'
|
|
3998
|
+
)
|
|
3999
|
+
self._global_codebase_id = await self.register_codebase(
|
|
4000
|
+
name=SpecialCodebaseId.GLOBAL,
|
|
4001
|
+
path=str(Path.home()),
|
|
4002
|
+
description='Global OpenCode sessions (not project-specific)',
|
|
4003
|
+
)
|
|
4004
|
+
|
|
4005
|
+
# Get available models before registering
|
|
4006
|
+
models = await self._get_available_models()
|
|
4007
|
+
logger.info(f'Models to register: {len(models)}')
|
|
4008
|
+
|
|
4009
|
+
await self.client.register_worker(models, self._global_codebase_id)
|
|
4010
|
+
|
|
4011
|
+
async def unregister_worker(self):
|
|
4012
|
+
"""Unregister this worker from the A2A server."""
|
|
4013
|
+
await self.client.unregister_worker()
|
|
4014
|
+
|
|
4015
|
+
async def register_codebase(
|
|
4016
|
+
self, name: str, path: str, description: str = ''
|
|
4017
|
+
) -> Optional[str]:
|
|
4018
|
+
"""Register a local codebase with the A2A server."""
|
|
4019
|
+
# Normalize for comparisons / de-duping when re-registering.
|
|
4020
|
+
normalized_path = os.path.abspath(os.path.expanduser(path))
|
|
4021
|
+
|
|
4022
|
+
codebase_id = await self.client.register_codebase(
|
|
4023
|
+
name, path, description
|
|
4024
|
+
)
|
|
4025
|
+
|
|
4026
|
+
if codebase_id:
|
|
4027
|
+
# If we're re-registering after a server restart, the
|
|
4028
|
+
# server may assign a new codebase ID for the same path.
|
|
4029
|
+
# Remove any stale local entries for this path.
|
|
4030
|
+
stale_ids = [
|
|
4031
|
+
cid
|
|
4032
|
+
for cid, cb in self.codebases.items()
|
|
4033
|
+
if os.path.abspath(os.path.expanduser(cb.path))
|
|
4034
|
+
== normalized_path
|
|
4035
|
+
and cid != codebase_id
|
|
4036
|
+
]
|
|
4037
|
+
for cid in stale_ids:
|
|
4038
|
+
self.codebases.pop(cid, None)
|
|
4039
|
+
|
|
4040
|
+
self.codebases[codebase_id] = LocalCodebase(
|
|
4041
|
+
id=codebase_id,
|
|
4042
|
+
name=name,
|
|
4043
|
+
path=normalized_path,
|
|
4044
|
+
description=description,
|
|
4045
|
+
)
|
|
4046
|
+
|
|
4047
|
+
return codebase_id
|
|
4048
|
+
|
|
4049
|
+
async def get_pending_tasks(self) -> List[Dict[str, Any]]:
|
|
4050
|
+
"""Get pending tasks from the server (fallback polling method)."""
|
|
4051
|
+
codebase_ids = list(self.codebases.keys())
|
|
4052
|
+
tasks = await self.client.get_pending_tasks(codebase_ids)
|
|
4053
|
+
# Filter to:
|
|
4054
|
+
# 1. Tasks for our registered codebases
|
|
4055
|
+
# 2. Registration tasks (codebase_id = '__pending__') that any worker can claim
|
|
4056
|
+
# 3. Global tasks (codebase_id = 'global') for workers with global codebase
|
|
4057
|
+
matching = [
|
|
4058
|
+
t
|
|
4059
|
+
for t in tasks
|
|
4060
|
+
if t.get('codebase_id') in self.codebases
|
|
4061
|
+
or t.get('codebase_id') == SpecialCodebaseId.PENDING
|
|
4062
|
+
or (
|
|
4063
|
+
t.get('codebase_id') == SpecialCodebaseId.GLOBAL
|
|
4064
|
+
and self._global_codebase_id is not None
|
|
4065
|
+
)
|
|
4066
|
+
]
|
|
4067
|
+
if matching:
|
|
4068
|
+
logger.info(
|
|
4069
|
+
f'Found {len(matching)} pending tasks for our codebases'
|
|
4070
|
+
)
|
|
4071
|
+
return matching
|
|
4072
|
+
|
|
4073
|
+
async def _run_with_sse_and_fallback(self):
|
|
4074
|
+
"""Run the main loop with SSE streaming, falling back to polling if needed."""
|
|
4075
|
+
session_sync_counter = 0
|
|
4076
|
+
session_sync_interval = 12 # Sync sessions every 12 cycles (60s at 5s)
|
|
4077
|
+
|
|
4078
|
+
while self.running:
|
|
4079
|
+
try:
|
|
4080
|
+
# Try SSE streaming first
|
|
4081
|
+
logger.info('Attempting SSE connection for task streaming...')
|
|
4082
|
+
await self._sse_task_stream()
|
|
4083
|
+
except asyncio.CancelledError:
|
|
4084
|
+
break
|
|
4085
|
+
except Exception as e:
|
|
4086
|
+
logger.warning(f'SSE connection failed: {e}')
|
|
4087
|
+
self.client.sse_connected = False
|
|
4088
|
+
|
|
4089
|
+
if not self.running:
|
|
4090
|
+
break
|
|
4091
|
+
|
|
4092
|
+
# SSE failed or disconnected - fall back to polling temporarily
|
|
4093
|
+
logger.info(
|
|
4094
|
+
f'Falling back to polling (reconnect in {self.client.sse_reconnect_delay}s)...'
|
|
4095
|
+
)
|
|
4096
|
+
|
|
4097
|
+
# Do one poll cycle while waiting to reconnect
|
|
4098
|
+
try:
|
|
4099
|
+
tasks = await self.get_pending_tasks()
|
|
4100
|
+
for task in tasks:
|
|
4101
|
+
if not self.running:
|
|
4102
|
+
break
|
|
4103
|
+
codebase_id = task.get('codebase_id')
|
|
4104
|
+
if (
|
|
4105
|
+
codebase_id in self.codebases
|
|
4106
|
+
or codebase_id == SpecialCodebaseId.PENDING
|
|
4107
|
+
or (
|
|
4108
|
+
codebase_id == SpecialCodebaseId.GLOBAL
|
|
4109
|
+
and self._global_codebase_id is not None
|
|
4110
|
+
)
|
|
4111
|
+
):
|
|
4112
|
+
# Process task with bounded concurrency
|
|
4113
|
+
asyncio.create_task(
|
|
4114
|
+
self._process_task_with_semaphore(task)
|
|
4115
|
+
)
|
|
4116
|
+
|
|
4117
|
+
# Periodic maintenance
|
|
4118
|
+
session_sync_counter += 1
|
|
4119
|
+
if session_sync_counter >= session_sync_interval:
|
|
4120
|
+
session_sync_counter = 0
|
|
4121
|
+
await self.register_worker()
|
|
4122
|
+
for cb_config in self.config.codebases:
|
|
4123
|
+
await self.register_codebase(
|
|
4124
|
+
name=cb_config.get(
|
|
4125
|
+
'name', Path(cb_config['path']).name
|
|
4126
|
+
),
|
|
4127
|
+
path=cb_config['path'],
|
|
4128
|
+
description=cb_config.get('description', ''),
|
|
4129
|
+
)
|
|
4130
|
+
await self.report_sessions_to_server()
|
|
4131
|
+
|
|
4132
|
+
except Exception as e:
|
|
4133
|
+
logger.error(f'Error in fallback poll: {e}')
|
|
4134
|
+
|
|
4135
|
+
# Wait before trying SSE again (with exponential backoff)
|
|
4136
|
+
await asyncio.sleep(self.client.sse_reconnect_delay)
|
|
4137
|
+
self.client.sse_reconnect_delay = min(
|
|
4138
|
+
self.client.sse_reconnect_delay * 2,
|
|
4139
|
+
self.config.sse_max_reconnect_delay,
|
|
4140
|
+
)
|
|
4141
|
+
|
|
4142
|
+
async def _sse_task_stream(self):
|
|
4143
|
+
"""Connect to SSE endpoint and receive task assignments in real-time."""
|
|
4144
|
+
session = await self._get_session()
|
|
4145
|
+
|
|
4146
|
+
# Build SSE URL with worker_id and agent_name
|
|
4147
|
+
# Use agent_name if set, otherwise fall back to worker_name
|
|
4148
|
+
# This ensures SSE routing identity matches discovery identity
|
|
4149
|
+
sse_url = f'{self.config.server_url}/v1/worker/tasks/stream'
|
|
4150
|
+
resolved_agent_name = self.config.agent_name or self.config.worker_name
|
|
4151
|
+
params = {
|
|
4152
|
+
'worker_id': self.config.worker_id,
|
|
4153
|
+
'agent_name': resolved_agent_name, # Required by SSE endpoint
|
|
4154
|
+
}
|
|
4155
|
+
|
|
4156
|
+
logger.info(f'Connecting to SSE stream: {sse_url}')
|
|
4157
|
+
|
|
4158
|
+
# Use a longer timeout for SSE connections
|
|
4159
|
+
sse_timeout = aiohttp.ClientTimeout(
|
|
4160
|
+
total=None, # No total timeout
|
|
4161
|
+
connect=30,
|
|
4162
|
+
sock_read=self.config.sse_heartbeat_timeout
|
|
4163
|
+
+ 15, # Allow some slack
|
|
4164
|
+
)
|
|
4165
|
+
|
|
4166
|
+
# Build headers including auth token if available
|
|
4167
|
+
sse_headers = {'Accept': 'text/event-stream'}
|
|
4168
|
+
if self.config.auth_token:
|
|
4169
|
+
sse_headers['Authorization'] = f'Bearer {self.config.auth_token}'
|
|
4170
|
+
|
|
4171
|
+
# Add codebase IDs as header for SSE routing
|
|
4172
|
+
# Always include 'global' so worker accepts tasks for any codebase
|
|
4173
|
+
codebase_ids = list(self.codebases.keys())
|
|
4174
|
+
codebase_ids.append('global')
|
|
4175
|
+
sse_headers['X-Codebases'] = ','.join(codebase_ids)
|
|
4176
|
+
|
|
4177
|
+
# Add capabilities header
|
|
4178
|
+
sse_headers['X-Capabilities'] = 'opencode,build,deploy,test'
|
|
4179
|
+
|
|
4180
|
+
async with session.get(
|
|
4181
|
+
sse_url,
|
|
4182
|
+
params=params,
|
|
4183
|
+
timeout=sse_timeout,
|
|
4184
|
+
headers=sse_headers,
|
|
4185
|
+
) as response:
|
|
4186
|
+
if response.status != 200:
|
|
4187
|
+
text = await response.text()
|
|
4188
|
+
raise Exception(
|
|
4189
|
+
f'SSE connection failed: {response.status} - {text}'
|
|
4190
|
+
)
|
|
4191
|
+
|
|
4192
|
+
self.client.sse_connected = True
|
|
4193
|
+
self.client.sse_reconnect_delay = (
|
|
4194
|
+
self.config.sse_reconnect_delay
|
|
4195
|
+
) # Reset backoff
|
|
4196
|
+
self.client.last_heartbeat = time.time()
|
|
4197
|
+
logger.info('SSE connection established')
|
|
4198
|
+
|
|
4199
|
+
# Start background tasks
|
|
4200
|
+
heartbeat_checker = asyncio.create_task(
|
|
4201
|
+
self._check_heartbeat_timeout()
|
|
4202
|
+
)
|
|
4203
|
+
periodic_maintenance = asyncio.create_task(
|
|
4204
|
+
self._periodic_maintenance()
|
|
4205
|
+
)
|
|
4206
|
+
|
|
4207
|
+
try:
|
|
4208
|
+
event_type = None
|
|
4209
|
+
event_data_lines = []
|
|
4210
|
+
|
|
4211
|
+
async for line in response.content:
|
|
4212
|
+
if not self.running:
|
|
4213
|
+
break
|
|
4214
|
+
|
|
4215
|
+
line = line.decode('utf-8').rstrip('\r\n')
|
|
4216
|
+
|
|
4217
|
+
if line.startswith('event:'):
|
|
4218
|
+
event_type = line[6:].strip()
|
|
4219
|
+
elif line.startswith('data:'):
|
|
4220
|
+
event_data_lines.append(line[5:].strip())
|
|
4221
|
+
elif line == '':
|
|
4222
|
+
# Empty line signals end of event
|
|
4223
|
+
if event_data_lines:
|
|
4224
|
+
event_data = '\n'.join(event_data_lines)
|
|
4225
|
+
await self._handle_sse_event(event_type, event_data)
|
|
4226
|
+
event_data_lines = []
|
|
4227
|
+
event_type = None
|
|
4228
|
+
# Handle comment lines (heartbeats often sent as : comment)
|
|
4229
|
+
elif line.startswith(':'):
|
|
4230
|
+
self.client.last_heartbeat = time.time()
|
|
4231
|
+
logger.debug('Received SSE heartbeat (comment)')
|
|
4232
|
+
|
|
4233
|
+
finally:
|
|
4234
|
+
heartbeat_checker.cancel()
|
|
4235
|
+
periodic_maintenance.cancel()
|
|
4236
|
+
try:
|
|
4237
|
+
await heartbeat_checker
|
|
4238
|
+
except asyncio.CancelledError:
|
|
4239
|
+
pass
|
|
4240
|
+
try:
|
|
4241
|
+
await periodic_maintenance
|
|
4242
|
+
except asyncio.CancelledError:
|
|
4243
|
+
pass
|
|
4244
|
+
|
|
4245
|
+
async def _handle_sse_event(self, event_type: Optional[str], data: str):
|
|
4246
|
+
"""Handle an SSE event from the server."""
|
|
4247
|
+
self.client.last_heartbeat = time.time()
|
|
4248
|
+
|
|
4249
|
+
# Handle heartbeat events
|
|
4250
|
+
if event_type == 'heartbeat' or event_type == 'ping':
|
|
4251
|
+
logger.debug('Received SSE heartbeat event')
|
|
4252
|
+
return
|
|
4253
|
+
|
|
4254
|
+
# Handle task events
|
|
4255
|
+
if event_type in (
|
|
4256
|
+
'task',
|
|
4257
|
+
'task_available',
|
|
4258
|
+
'task_assigned',
|
|
4259
|
+
'task.created',
|
|
4260
|
+
'task.assigned',
|
|
4261
|
+
):
|
|
4262
|
+
try:
|
|
4263
|
+
task = json.loads(data)
|
|
4264
|
+
task_id = task.get('id') or task.get('task_id')
|
|
4265
|
+
|
|
4266
|
+
# Skip if we've already seen this task (LRU deduplication)
|
|
4267
|
+
if task_id in self._known_task_ids:
|
|
4268
|
+
logger.debug(f'Skipping duplicate task: {task_id}')
|
|
4269
|
+
return
|
|
4270
|
+
# Add to LRU cache, evicting oldest if at capacity
|
|
4271
|
+
self._known_task_ids[task_id] = None
|
|
4272
|
+
if len(self._known_task_ids) > self._known_task_ids_max_size:
|
|
4273
|
+
self._known_task_ids.popitem(last=False)
|
|
4274
|
+
|
|
4275
|
+
# Skip if already processing
|
|
4276
|
+
if self.task_executor.is_task_active(task_id):
|
|
4277
|
+
logger.debug(f'Task already being processed: {task_id}')
|
|
4278
|
+
return
|
|
4279
|
+
|
|
4280
|
+
codebase_id = task.get('codebase_id')
|
|
4281
|
+
if (
|
|
4282
|
+
codebase_id in self.codebases
|
|
4283
|
+
or codebase_id == SpecialCodebaseId.PENDING
|
|
4284
|
+
or (
|
|
4285
|
+
codebase_id == SpecialCodebaseId.GLOBAL
|
|
4286
|
+
and self._global_codebase_id is not None
|
|
4287
|
+
)
|
|
4288
|
+
):
|
|
4289
|
+
logger.info(
|
|
4290
|
+
f'Received task via SSE: {task_id} - {task.get("title", "Untitled")}'
|
|
4291
|
+
)
|
|
4292
|
+
# Process task with bounded concurrency (don't await)
|
|
4293
|
+
asyncio.create_task(self._process_task_with_semaphore(task))
|
|
4294
|
+
else:
|
|
4295
|
+
logger.debug(
|
|
4296
|
+
f'Task {task_id} not for our codebases, ignoring'
|
|
4297
|
+
)
|
|
4298
|
+
|
|
4299
|
+
except json.JSONDecodeError as e:
|
|
4300
|
+
logger.warning(f'Failed to parse task data: {e}')
|
|
4301
|
+
except Exception as e:
|
|
4302
|
+
logger.error(f'Error handling task event: {e}')
|
|
4303
|
+
|
|
4304
|
+
elif event_type == 'connected':
|
|
4305
|
+
logger.info(f'SSE connection confirmed: {data}')
|
|
4306
|
+
|
|
4307
|
+
elif event_type == 'error':
|
|
4308
|
+
logger.warning(f'SSE server error: {data}')
|
|
4309
|
+
|
|
4310
|
+
else:
|
|
4311
|
+
logger.debug(
|
|
4312
|
+
f'Unknown SSE event type: {event_type}, data: {data[:100]}...'
|
|
4313
|
+
)
|
|
4314
|
+
|
|
4315
|
+
async def _check_heartbeat_timeout(self):
|
|
4316
|
+
"""Check if we've received a heartbeat recently."""
|
|
4317
|
+
while self.running and self.client.sse_connected:
|
|
4318
|
+
await asyncio.sleep(10)
|
|
4319
|
+
|
|
4320
|
+
if not self.client.sse_connected:
|
|
4321
|
+
break
|
|
4322
|
+
|
|
4323
|
+
elapsed = time.time() - self.client.last_heartbeat
|
|
4324
|
+
if elapsed > self.config.sse_heartbeat_timeout:
|
|
4325
|
+
logger.warning(
|
|
4326
|
+
f'No SSE heartbeat for {elapsed:.1f}s (timeout: {self.config.sse_heartbeat_timeout}s)'
|
|
4327
|
+
)
|
|
4328
|
+
# Force reconnection by breaking the SSE loop
|
|
4329
|
+
self.client.sse_connected = False
|
|
4330
|
+
break
|
|
4331
|
+
|
|
4332
|
+
async def _periodic_maintenance(self):
|
|
4333
|
+
"""Perform periodic maintenance tasks while SSE is connected."""
|
|
4334
|
+
sync_interval = 60 # seconds
|
|
4335
|
+
heartbeat_interval = 15 # seconds
|
|
4336
|
+
agent_heartbeat_interval = 45 # seconds (must be < 120s TTL)
|
|
4337
|
+
last_sync = time.time()
|
|
4338
|
+
last_heartbeat = time.time()
|
|
4339
|
+
last_agent_heartbeat = time.time()
|
|
4340
|
+
|
|
4341
|
+
while self.running and self.client.sse_connected:
|
|
4342
|
+
await asyncio.sleep(5)
|
|
4343
|
+
|
|
4344
|
+
now = time.time()
|
|
4345
|
+
|
|
4346
|
+
# Send heartbeat to server periodically (worker heartbeat)
|
|
4347
|
+
if now - last_heartbeat >= heartbeat_interval:
|
|
4348
|
+
last_heartbeat = now
|
|
4349
|
+
await self.send_heartbeat()
|
|
4350
|
+
|
|
4351
|
+
# Refresh agent discovery heartbeat (keeps agent visible in discover_agents)
|
|
4352
|
+
if (
|
|
4353
|
+
self.config.register_as_agent
|
|
4354
|
+
and now - last_agent_heartbeat >= agent_heartbeat_interval
|
|
4355
|
+
):
|
|
4356
|
+
last_agent_heartbeat = now
|
|
4357
|
+
await self.client.refresh_agent_heartbeat()
|
|
4358
|
+
|
|
4359
|
+
# Sync sessions and re-register periodically
|
|
4360
|
+
if now - last_sync >= sync_interval:
|
|
4361
|
+
last_sync = now
|
|
4362
|
+
try:
|
|
4363
|
+
await self.register_worker()
|
|
4364
|
+
for cb_config in self.config.codebases:
|
|
4365
|
+
await self.register_codebase(
|
|
4366
|
+
name=cb_config.get(
|
|
4367
|
+
'name', Path(cb_config['path']).name
|
|
4368
|
+
),
|
|
4369
|
+
path=cb_config['path'],
|
|
4370
|
+
description=cb_config.get('description', ''),
|
|
4371
|
+
)
|
|
4372
|
+
await self.report_sessions_to_server()
|
|
4373
|
+
except Exception as e:
|
|
4374
|
+
logger.warning(f'Periodic maintenance error: {e}')
|
|
4375
|
+
|
|
4376
|
+
|
|
4377
|
+
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
|
|
4378
|
+
"""Load configuration from file."""
|
|
4379
|
+
if config_path and Path(config_path).exists():
|
|
4380
|
+
try:
|
|
4381
|
+
with open(config_path) as f:
|
|
4382
|
+
return json.load(f)
|
|
4383
|
+
except Exception as e:
|
|
4384
|
+
logger.warning(f'Failed to load config from {config_path}: {e}')
|
|
4385
|
+
|
|
4386
|
+
# Check default locations
|
|
4387
|
+
default_paths = [
|
|
4388
|
+
Path.home() / '.config' / 'a2a-worker' / 'config.json',
|
|
4389
|
+
Path('/etc/a2a-worker/config.json'),
|
|
4390
|
+
Path('worker-config.json'),
|
|
4391
|
+
]
|
|
4392
|
+
|
|
4393
|
+
for path in default_paths:
|
|
4394
|
+
try:
|
|
4395
|
+
if path.exists():
|
|
4396
|
+
with open(path) as f:
|
|
4397
|
+
return json.load(f)
|
|
4398
|
+
except Exception:
|
|
4399
|
+
# Skip if we can't read it (e.g. permission denied)
|
|
4400
|
+
continue
|
|
4401
|
+
|
|
4402
|
+
return {}
|
|
4403
|
+
|
|
4404
|
+
|
|
4405
|
+
async def main():
|
|
4406
|
+
parser = argparse.ArgumentParser(description='A2A Agent Worker')
|
|
4407
|
+
parser.add_argument('--server', '-s', default=None, help='A2A server URL')
|
|
4408
|
+
parser.add_argument('--name', '-n', default=None, help='Worker name')
|
|
4409
|
+
parser.add_argument(
|
|
4410
|
+
'--worker-id',
|
|
4411
|
+
default=None,
|
|
4412
|
+
help='Stable worker id (recommended for systemd/k8s). If omitted, a random id is generated.',
|
|
4413
|
+
)
|
|
4414
|
+
parser.add_argument('--config', '-c', help='Path to config file')
|
|
4415
|
+
parser.add_argument(
|
|
4416
|
+
'--codebase',
|
|
4417
|
+
'-b',
|
|
4418
|
+
action='append',
|
|
4419
|
+
help='Codebase to register (format: name:path or just path)',
|
|
4420
|
+
)
|
|
4421
|
+
parser.add_argument(
|
|
4422
|
+
'--poll-interval',
|
|
4423
|
+
'-i',
|
|
4424
|
+
type=int,
|
|
4425
|
+
default=None,
|
|
4426
|
+
help='Fallback poll interval in seconds (when SSE unavailable)',
|
|
4427
|
+
)
|
|
4428
|
+
parser.add_argument('--opencode', help='Path to opencode binary')
|
|
4429
|
+
|
|
4430
|
+
parser.add_argument(
|
|
4431
|
+
'--opencode-storage-path',
|
|
4432
|
+
default=None,
|
|
4433
|
+
help='Override OpenCode storage path (directory containing project/, session/, message/, part/)',
|
|
4434
|
+
)
|
|
4435
|
+
parser.add_argument(
|
|
4436
|
+
'--session-message-sync-max-sessions',
|
|
4437
|
+
type=int,
|
|
4438
|
+
default=None,
|
|
4439
|
+
help='How many most-recent sessions per codebase to sync messages for (0 disables)',
|
|
4440
|
+
)
|
|
4441
|
+
parser.add_argument(
|
|
4442
|
+
'--session-message-sync-max-messages',
|
|
4443
|
+
type=int,
|
|
4444
|
+
default=None,
|
|
4445
|
+
help='How many most-recent messages per session to sync (0 disables)',
|
|
4446
|
+
)
|
|
4447
|
+
parser.add_argument(
|
|
4448
|
+
'--max-concurrent-tasks',
|
|
4449
|
+
type=int,
|
|
4450
|
+
default=None,
|
|
4451
|
+
help='Maximum number of tasks to process concurrently (default: 2)',
|
|
4452
|
+
)
|
|
4453
|
+
parser.add_argument(
|
|
4454
|
+
'--sse-heartbeat-timeout',
|
|
4455
|
+
type=float,
|
|
4456
|
+
default=None,
|
|
4457
|
+
help='SSE heartbeat timeout in seconds (default: 45)',
|
|
4458
|
+
)
|
|
4459
|
+
# Email notification options
|
|
4460
|
+
parser.add_argument(
|
|
4461
|
+
'--email',
|
|
4462
|
+
'-e',
|
|
4463
|
+
default=None,
|
|
4464
|
+
help='Email address for task completion reports',
|
|
4465
|
+
)
|
|
4466
|
+
parser.add_argument(
|
|
4467
|
+
'--sendgrid-key',
|
|
4468
|
+
default=None,
|
|
4469
|
+
help='SendGrid API key (or set SENDGRID_API_KEY env var)',
|
|
4470
|
+
)
|
|
4471
|
+
parser.add_argument(
|
|
4472
|
+
'--sendgrid-from',
|
|
4473
|
+
default=None,
|
|
4474
|
+
help='SendGrid verified sender email (or set SENDGRID_FROM_EMAIL env var)',
|
|
4475
|
+
)
|
|
4476
|
+
# Email debugging and testing options
|
|
4477
|
+
parser.add_argument(
|
|
4478
|
+
'--test-email',
|
|
4479
|
+
action='store_true',
|
|
4480
|
+
help='Send a test notification email and exit (validates email config)',
|
|
4481
|
+
)
|
|
4482
|
+
parser.add_argument(
|
|
4483
|
+
'--email-dry-run',
|
|
4484
|
+
action='store_true',
|
|
4485
|
+
help='Log emails instead of sending them (dry run mode)',
|
|
4486
|
+
)
|
|
4487
|
+
parser.add_argument(
|
|
4488
|
+
'--email-verbose',
|
|
4489
|
+
action='store_true',
|
|
4490
|
+
help='Enable verbose logging for email operations',
|
|
4491
|
+
)
|
|
4492
|
+
# Agent registration options (for A2A network discovery)
|
|
4493
|
+
parser.add_argument(
|
|
4494
|
+
'--no-agent-registration',
|
|
4495
|
+
action='store_true',
|
|
4496
|
+
help='Disable automatic agent registration (worker will not be discoverable via discover_agents)',
|
|
4497
|
+
)
|
|
4498
|
+
parser.add_argument(
|
|
4499
|
+
'--agent-name',
|
|
4500
|
+
default=None,
|
|
4501
|
+
help='Name for agent discovery and routing (defaults to worker name). '
|
|
4502
|
+
'This is the identity used for discover_agents and send_to_agent.',
|
|
4503
|
+
)
|
|
4504
|
+
parser.add_argument(
|
|
4505
|
+
'--agent-description',
|
|
4506
|
+
default=None,
|
|
4507
|
+
help='Description for agent discovery (what this agent does)',
|
|
4508
|
+
)
|
|
4509
|
+
parser.add_argument(
|
|
4510
|
+
'--agent-url',
|
|
4511
|
+
default=None,
|
|
4512
|
+
help='URL where this agent can be reached directly (optional, defaults to server URL)',
|
|
4513
|
+
)
|
|
4514
|
+
|
|
4515
|
+
args = parser.parse_args()
|
|
4516
|
+
|
|
4517
|
+
# Load config from file
|
|
4518
|
+
file_config = load_config(args.config)
|
|
4519
|
+
|
|
4520
|
+
# Honor config file values when CLI flags are not explicitly provided.
|
|
4521
|
+
# Note: argparse does not tell us whether a value came from a default or
|
|
4522
|
+
# from an explicit flag, so we detect explicit flags via sys.argv.
|
|
4523
|
+
server_flag_set = ('--server' in sys.argv) or ('-s' in sys.argv)
|
|
4524
|
+
name_flag_set = ('--name' in sys.argv) or ('-n' in sys.argv)
|
|
4525
|
+
worker_id_flag_set = '--worker-id' in sys.argv
|
|
4526
|
+
poll_flag_set = ('--poll-interval' in sys.argv) or ('-i' in sys.argv)
|
|
4527
|
+
|
|
4528
|
+
# Resolve server_url with precedence: CLI flag > env > config > default
|
|
4529
|
+
if server_flag_set and args.server:
|
|
4530
|
+
server_url = args.server
|
|
4531
|
+
elif os.environ.get('A2A_SERVER_URL'):
|
|
4532
|
+
server_url = os.environ['A2A_SERVER_URL']
|
|
4533
|
+
elif file_config.get('server_url'):
|
|
4534
|
+
server_url = file_config['server_url']
|
|
4535
|
+
else:
|
|
4536
|
+
server_url = 'https://api.codetether.run'
|
|
4537
|
+
|
|
4538
|
+
# Resolve worker_name with precedence: CLI flag > env > config > hostname
|
|
4539
|
+
if name_flag_set and args.name:
|
|
4540
|
+
worker_name = args.name
|
|
4541
|
+
elif os.environ.get('A2A_WORKER_NAME'):
|
|
4542
|
+
worker_name = os.environ['A2A_WORKER_NAME']
|
|
4543
|
+
elif file_config.get('worker_name'):
|
|
4544
|
+
worker_name = file_config['worker_name']
|
|
4545
|
+
else:
|
|
4546
|
+
import platform
|
|
4547
|
+
|
|
4548
|
+
worker_name = platform.node() # Cross-platform (works on Windows)
|
|
4549
|
+
|
|
4550
|
+
# Resolve worker_id with precedence: CLI flag > env > config > default
|
|
4551
|
+
worker_id: Optional[str] = None
|
|
4552
|
+
if worker_id_flag_set and args.worker_id:
|
|
4553
|
+
worker_id = args.worker_id
|
|
4554
|
+
elif os.environ.get('A2A_WORKER_ID'):
|
|
4555
|
+
worker_id = os.environ['A2A_WORKER_ID']
|
|
4556
|
+
elif file_config.get('worker_id'):
|
|
4557
|
+
worker_id = file_config['worker_id']
|
|
4558
|
+
|
|
4559
|
+
# Resolve poll_interval with precedence: CLI flag > env > config > default
|
|
4560
|
+
poll_interval_raw = None
|
|
4561
|
+
if poll_flag_set and (args.poll_interval is not None):
|
|
4562
|
+
poll_interval_raw = args.poll_interval
|
|
4563
|
+
elif os.environ.get('A2A_POLL_INTERVAL'):
|
|
4564
|
+
poll_interval_raw = os.environ.get('A2A_POLL_INTERVAL')
|
|
4565
|
+
elif file_config.get('poll_interval') is not None:
|
|
4566
|
+
poll_interval_raw = file_config.get('poll_interval')
|
|
4567
|
+
else:
|
|
4568
|
+
poll_interval_raw = 5
|
|
4569
|
+
|
|
4570
|
+
try:
|
|
4571
|
+
poll_interval = (
|
|
4572
|
+
int(poll_interval_raw) if poll_interval_raw is not None else 5
|
|
4573
|
+
)
|
|
4574
|
+
except (TypeError, ValueError):
|
|
4575
|
+
poll_interval = 5
|
|
4576
|
+
logger.warning('Invalid poll_interval value; falling back to 5 seconds')
|
|
4577
|
+
|
|
4578
|
+
capabilities = file_config.get('capabilities')
|
|
4579
|
+
if not isinstance(capabilities, list):
|
|
4580
|
+
capabilities = None
|
|
4581
|
+
|
|
4582
|
+
# Build codebase list
|
|
4583
|
+
codebases = file_config.get('codebases', [])
|
|
4584
|
+
if args.codebase:
|
|
4585
|
+
for cb in args.codebase:
|
|
4586
|
+
if ':' in cb:
|
|
4587
|
+
name, path = cb.split(':', 1)
|
|
4588
|
+
else:
|
|
4589
|
+
name = Path(cb).name
|
|
4590
|
+
path = cb
|
|
4591
|
+
codebases.append({'name': name, 'path': os.path.abspath(path)})
|
|
4592
|
+
|
|
4593
|
+
# Create config
|
|
4594
|
+
config_kwargs: Dict[str, Any] = {
|
|
4595
|
+
'server_url': server_url,
|
|
4596
|
+
'worker_name': worker_name,
|
|
4597
|
+
'codebases': codebases,
|
|
4598
|
+
'poll_interval': poll_interval,
|
|
4599
|
+
'opencode_bin': args.opencode or file_config.get('opencode_bin'),
|
|
4600
|
+
'opencode_storage_path': (
|
|
4601
|
+
args.opencode_storage_path
|
|
4602
|
+
or os.environ.get('A2A_OPENCODE_STORAGE_PATH')
|
|
4603
|
+
or file_config.get('opencode_storage_path')
|
|
4604
|
+
),
|
|
4605
|
+
}
|
|
4606
|
+
|
|
4607
|
+
if worker_id:
|
|
4608
|
+
config_kwargs['worker_id'] = worker_id
|
|
4609
|
+
|
|
4610
|
+
# Optional session message sync tuning
|
|
4611
|
+
if args.session_message_sync_max_sessions is not None:
|
|
4612
|
+
config_kwargs['session_message_sync_max_sessions'] = (
|
|
4613
|
+
args.session_message_sync_max_sessions
|
|
4614
|
+
)
|
|
4615
|
+
elif os.environ.get('A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS'):
|
|
4616
|
+
try:
|
|
4617
|
+
config_kwargs['session_message_sync_max_sessions'] = int(
|
|
4618
|
+
os.environ['A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS']
|
|
4619
|
+
)
|
|
4620
|
+
except ValueError as e:
|
|
4621
|
+
logger.warning(
|
|
4622
|
+
f'Invalid A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS value: {e}'
|
|
4623
|
+
)
|
|
4624
|
+
elif file_config.get('session_message_sync_max_sessions') is not None:
|
|
4625
|
+
config_kwargs['session_message_sync_max_sessions'] = file_config.get(
|
|
4626
|
+
'session_message_sync_max_sessions'
|
|
4627
|
+
)
|
|
4628
|
+
|
|
4629
|
+
if args.session_message_sync_max_messages is not None:
|
|
4630
|
+
config_kwargs['session_message_sync_max_messages'] = (
|
|
4631
|
+
args.session_message_sync_max_messages
|
|
4632
|
+
)
|
|
4633
|
+
elif os.environ.get('A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES'):
|
|
4634
|
+
try:
|
|
4635
|
+
config_kwargs['session_message_sync_max_messages'] = int(
|
|
4636
|
+
os.environ['A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES']
|
|
4637
|
+
)
|
|
4638
|
+
except ValueError as e:
|
|
4639
|
+
logger.warning(
|
|
4640
|
+
f'Invalid A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES value: {e}'
|
|
4641
|
+
)
|
|
4642
|
+
elif file_config.get('session_message_sync_max_messages') is not None:
|
|
4643
|
+
config_kwargs['session_message_sync_max_messages'] = file_config.get(
|
|
4644
|
+
'session_message_sync_max_messages'
|
|
4645
|
+
)
|
|
4646
|
+
|
|
4647
|
+
# Max concurrent tasks
|
|
4648
|
+
if args.max_concurrent_tasks is not None:
|
|
4649
|
+
config_kwargs['max_concurrent_tasks'] = args.max_concurrent_tasks
|
|
4650
|
+
elif os.environ.get('A2A_MAX_CONCURRENT_TASKS'):
|
|
4651
|
+
try:
|
|
4652
|
+
config_kwargs['max_concurrent_tasks'] = int(
|
|
4653
|
+
os.environ['A2A_MAX_CONCURRENT_TASKS']
|
|
4654
|
+
)
|
|
4655
|
+
except ValueError as e:
|
|
4656
|
+
logger.warning(f'Invalid A2A_MAX_CONCURRENT_TASKS value: {e}')
|
|
4657
|
+
elif file_config.get('max_concurrent_tasks') is not None:
|
|
4658
|
+
config_kwargs['max_concurrent_tasks'] = file_config.get(
|
|
4659
|
+
'max_concurrent_tasks'
|
|
4660
|
+
)
|
|
4661
|
+
|
|
4662
|
+
# SSE heartbeat timeout
|
|
4663
|
+
if args.sse_heartbeat_timeout is not None:
|
|
4664
|
+
config_kwargs['sse_heartbeat_timeout'] = args.sse_heartbeat_timeout
|
|
4665
|
+
elif os.environ.get('A2A_SSE_HEARTBEAT_TIMEOUT'):
|
|
4666
|
+
try:
|
|
4667
|
+
config_kwargs['sse_heartbeat_timeout'] = float(
|
|
4668
|
+
os.environ['A2A_SSE_HEARTBEAT_TIMEOUT']
|
|
4669
|
+
)
|
|
4670
|
+
except ValueError as e:
|
|
4671
|
+
logger.warning(f'Invalid A2A_SSE_HEARTBEAT_TIMEOUT value: {e}')
|
|
4672
|
+
elif file_config.get('sse_heartbeat_timeout') is not None:
|
|
4673
|
+
config_kwargs['sse_heartbeat_timeout'] = file_config.get(
|
|
4674
|
+
'sse_heartbeat_timeout'
|
|
4675
|
+
)
|
|
4676
|
+
|
|
4677
|
+
if capabilities is not None:
|
|
4678
|
+
config_kwargs['capabilities'] = capabilities
|
|
4679
|
+
|
|
4680
|
+
# Auth token for SSE endpoint
|
|
4681
|
+
auth_token = os.environ.get('A2A_AUTH_TOKEN')
|
|
4682
|
+
if auth_token:
|
|
4683
|
+
config_kwargs['auth_token'] = auth_token
|
|
4684
|
+
|
|
4685
|
+
# SendGrid email notification config
|
|
4686
|
+
# Precedence: CLI flag > env var > Vault > config file
|
|
4687
|
+
sendgrid_key = (
|
|
4688
|
+
args.sendgrid_key
|
|
4689
|
+
or os.environ.get('SENDGRID_API_KEY')
|
|
4690
|
+
or file_config.get('sendgrid_api_key')
|
|
4691
|
+
)
|
|
4692
|
+
sendgrid_from = (
|
|
4693
|
+
args.sendgrid_from
|
|
4694
|
+
or os.environ.get('SENDGRID_FROM_EMAIL')
|
|
4695
|
+
or file_config.get('sendgrid_from_email')
|
|
4696
|
+
)
|
|
4697
|
+
notification_email = (
|
|
4698
|
+
args.email
|
|
4699
|
+
or os.environ.get('A2A_NOTIFICATION_EMAIL')
|
|
4700
|
+
or file_config.get('notification_email')
|
|
4701
|
+
)
|
|
4702
|
+
|
|
4703
|
+
# Try to fetch from Vault if not configured via CLI/env/config
|
|
4704
|
+
vault_path = file_config.get(
|
|
4705
|
+
'vault_sendgrid_path', 'secret/spotlessbinco/sendgrid'
|
|
4706
|
+
)
|
|
4707
|
+
if not sendgrid_key or not sendgrid_from or not notification_email:
|
|
4708
|
+
vault = VaultClient()
|
|
4709
|
+
if vault.is_configured():
|
|
4710
|
+
logger.info(f'Fetching SendGrid config from Vault: {vault_path}')
|
|
4711
|
+
try:
|
|
4712
|
+
# Run sync in event loop
|
|
4713
|
+
import asyncio as _asyncio
|
|
4714
|
+
|
|
4715
|
+
async def _fetch_vault():
|
|
4716
|
+
try:
|
|
4717
|
+
secrets = await vault.get_secret(vault_path)
|
|
4718
|
+
return secrets
|
|
4719
|
+
finally:
|
|
4720
|
+
await vault.close()
|
|
4721
|
+
|
|
4722
|
+
loop = _asyncio.new_event_loop()
|
|
4723
|
+
vault_secrets = loop.run_until_complete(_fetch_vault())
|
|
4724
|
+
loop.close()
|
|
4725
|
+
|
|
4726
|
+
if vault_secrets:
|
|
4727
|
+
if not sendgrid_key:
|
|
4728
|
+
sendgrid_key = vault_secrets.get('SENDGRID_API_KEY')
|
|
4729
|
+
if not sendgrid_from:
|
|
4730
|
+
sendgrid_from = vault_secrets.get('SENDGRID_FROM_EMAIL')
|
|
4731
|
+
if not notification_email:
|
|
4732
|
+
notification_email = vault_secrets.get(
|
|
4733
|
+
'NOTIFICATION_EMAIL'
|
|
4734
|
+
)
|
|
4735
|
+
logger.info('Loaded SendGrid config from Vault')
|
|
4736
|
+
except Exception as e:
|
|
4737
|
+
logger.warning(
|
|
4738
|
+
f'Failed to fetch SendGrid config from Vault: {e}'
|
|
4739
|
+
)
|
|
4740
|
+
|
|
4741
|
+
if sendgrid_key:
|
|
4742
|
+
config_kwargs['sendgrid_api_key'] = sendgrid_key
|
|
4743
|
+
if sendgrid_from:
|
|
4744
|
+
config_kwargs['sendgrid_from_email'] = sendgrid_from
|
|
4745
|
+
if notification_email:
|
|
4746
|
+
config_kwargs['notification_email'] = notification_email
|
|
4747
|
+
|
|
4748
|
+
# Email debugging flags
|
|
4749
|
+
if args.email_dry_run:
|
|
4750
|
+
config_kwargs['email_dry_run'] = True
|
|
4751
|
+
logger.info(
|
|
4752
|
+
'Email dry-run mode enabled (emails will be logged, not sent)'
|
|
4753
|
+
)
|
|
4754
|
+
|
|
4755
|
+
if args.email_verbose:
|
|
4756
|
+
config_kwargs['email_verbose'] = True
|
|
4757
|
+
logger.info('Email verbose logging enabled')
|
|
4758
|
+
|
|
4759
|
+
# Add email inbound domain from config if available
|
|
4760
|
+
email_inbound_domain = os.environ.get(
|
|
4761
|
+
'EMAIL_INBOUND_DOMAIN'
|
|
4762
|
+
) or file_config.get('email_inbound_domain')
|
|
4763
|
+
if email_inbound_domain:
|
|
4764
|
+
config_kwargs['email_inbound_domain'] = email_inbound_domain
|
|
4765
|
+
|
|
4766
|
+
email_reply_prefix = os.environ.get(
|
|
4767
|
+
'EMAIL_REPLY_PREFIX'
|
|
4768
|
+
) or file_config.get('email_reply_prefix')
|
|
4769
|
+
if email_reply_prefix:
|
|
4770
|
+
config_kwargs['email_reply_prefix'] = email_reply_prefix
|
|
4771
|
+
|
|
4772
|
+
# Agent registration options
|
|
4773
|
+
# Disable agent registration if --no-agent-registration flag is set
|
|
4774
|
+
if args.no_agent_registration:
|
|
4775
|
+
config_kwargs['register_as_agent'] = False
|
|
4776
|
+
logger.info(
|
|
4777
|
+
'Agent registration disabled (worker will not be discoverable)'
|
|
4778
|
+
)
|
|
4779
|
+
else:
|
|
4780
|
+
# Default to True (register as discoverable agent)
|
|
4781
|
+
register_as_agent = file_config.get('register_as_agent', True)
|
|
4782
|
+
config_kwargs['register_as_agent'] = register_as_agent
|
|
4783
|
+
|
|
4784
|
+
# Agent name (identity for discovery and routing - should match SSE agent_name)
|
|
4785
|
+
# This is the key identity used by discover_agents and send_to_agent
|
|
4786
|
+
agent_name = (
|
|
4787
|
+
args.agent_name
|
|
4788
|
+
or os.environ.get('A2A_AGENT_NAME')
|
|
4789
|
+
or file_config.get('agent_name')
|
|
4790
|
+
)
|
|
4791
|
+
if agent_name:
|
|
4792
|
+
config_kwargs['agent_name'] = agent_name
|
|
4793
|
+
logger.info(f"Agent name set to: '{agent_name}'")
|
|
4794
|
+
|
|
4795
|
+
# Agent description (what this agent does)
|
|
4796
|
+
agent_description = (
|
|
4797
|
+
args.agent_description
|
|
4798
|
+
or os.environ.get('A2A_AGENT_DESCRIPTION')
|
|
4799
|
+
or file_config.get('agent_description')
|
|
4800
|
+
)
|
|
4801
|
+
if agent_description:
|
|
4802
|
+
config_kwargs['agent_description'] = agent_description
|
|
4803
|
+
|
|
4804
|
+
# Agent URL (where this agent can be reached directly)
|
|
4805
|
+
agent_url = (
|
|
4806
|
+
args.agent_url
|
|
4807
|
+
or os.environ.get('A2A_AGENT_URL')
|
|
4808
|
+
or file_config.get('agent_url')
|
|
4809
|
+
)
|
|
4810
|
+
if agent_url:
|
|
4811
|
+
config_kwargs['agent_url'] = agent_url
|
|
4812
|
+
|
|
4813
|
+
config = WorkerConfig(**config_kwargs)
|
|
4814
|
+
|
|
4815
|
+
# Handle --test-email flag: send test email and exit
|
|
4816
|
+
if args.test_email:
|
|
4817
|
+
logger.info('=== Email Configuration Test ===')
|
|
4818
|
+
email_service = EmailNotificationService(config)
|
|
4819
|
+
|
|
4820
|
+
# Print configuration status
|
|
4821
|
+
config_status = email_service.get_config_status()
|
|
4822
|
+
logger.info(f'Configuration status:')
|
|
4823
|
+
logger.info(f' Configured: {config_status["configured"]}')
|
|
4824
|
+
logger.info(f' Dry-run mode: {config_status["dry_run"]}')
|
|
4825
|
+
logger.info(f' Verbose mode: {config_status["verbose"]}')
|
|
4826
|
+
logger.info(
|
|
4827
|
+
f' SendGrid API key set: {config_status["sendgrid_api_key_set"]}'
|
|
4828
|
+
)
|
|
4829
|
+
logger.info(f' From email: {config_status["sendgrid_from_email"]}')
|
|
4830
|
+
logger.info(f' To email: {config_status["notification_email"]}')
|
|
4831
|
+
logger.info(f' Inbound domain: {config_status["inbound_domain"]}')
|
|
4832
|
+
logger.info(f' Reply prefix: {config_status["reply_prefix"]}')
|
|
4833
|
+
|
|
4834
|
+
if config_status['issues']:
|
|
4835
|
+
logger.warning(f'Issues found:')
|
|
4836
|
+
for issue in config_status['issues']:
|
|
4837
|
+
logger.warning(f' - {issue}')
|
|
4838
|
+
|
|
4839
|
+
# Send test email
|
|
4840
|
+
result = await email_service.send_test_email()
|
|
4841
|
+
|
|
4842
|
+
if result['success']:
|
|
4843
|
+
logger.info(f'SUCCESS: {result["message"]}')
|
|
4844
|
+
else:
|
|
4845
|
+
logger.error(f'FAILED: {result["message"]}')
|
|
4846
|
+
|
|
4847
|
+
await email_service.close()
|
|
4848
|
+
|
|
4849
|
+
# Exit after test
|
|
4850
|
+
return
|
|
4851
|
+
|
|
4852
|
+
# Create and start worker
|
|
4853
|
+
worker = AgentWorker(config)
|
|
4854
|
+
|
|
4855
|
+
# Handle signals
|
|
4856
|
+
loop = asyncio.get_event_loop()
|
|
4857
|
+
|
|
4858
|
+
def signal_handler():
|
|
4859
|
+
logger.info('Received shutdown signal')
|
|
4860
|
+
worker.running = False
|
|
4861
|
+
|
|
4862
|
+
for sig in (signal.SIGTERM, signal.SIGINT):
|
|
4863
|
+
loop.add_signal_handler(sig, signal_handler)
|
|
4864
|
+
|
|
4865
|
+
try:
|
|
4866
|
+
await worker.start()
|
|
4867
|
+
except KeyboardInterrupt:
|
|
4868
|
+
pass
|
|
4869
|
+
finally:
|
|
4870
|
+
# Always ensure clean shutdown
|
|
4871
|
+
await worker.stop()
|
|
4872
|
+
# Give aiohttp time to close connections gracefully
|
|
4873
|
+
await asyncio.sleep(0.25)
|
|
4874
|
+
|
|
4875
|
+
|
|
4876
|
+
if __name__ == '__main__':
|
|
4877
|
+
asyncio.run(main())
|