codetether 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. a2a_server/__init__.py +29 -0
  2. a2a_server/a2a_agent_card.py +365 -0
  3. a2a_server/a2a_errors.py +1133 -0
  4. a2a_server/a2a_executor.py +926 -0
  5. a2a_server/a2a_router.py +1033 -0
  6. a2a_server/a2a_types.py +344 -0
  7. a2a_server/agent_card.py +408 -0
  8. a2a_server/agents_server.py +271 -0
  9. a2a_server/auth_api.py +349 -0
  10. a2a_server/billing_api.py +638 -0
  11. a2a_server/billing_service.py +712 -0
  12. a2a_server/billing_webhooks.py +501 -0
  13. a2a_server/config.py +96 -0
  14. a2a_server/database.py +2165 -0
  15. a2a_server/email_inbound.py +398 -0
  16. a2a_server/email_notifications.py +486 -0
  17. a2a_server/enhanced_agents.py +919 -0
  18. a2a_server/enhanced_server.py +160 -0
  19. a2a_server/hosted_worker.py +1049 -0
  20. a2a_server/integrated_agents_server.py +347 -0
  21. a2a_server/keycloak_auth.py +750 -0
  22. a2a_server/livekit_bridge.py +439 -0
  23. a2a_server/marketing_tools.py +1364 -0
  24. a2a_server/mcp_client.py +196 -0
  25. a2a_server/mcp_http_server.py +2256 -0
  26. a2a_server/mcp_server.py +191 -0
  27. a2a_server/message_broker.py +725 -0
  28. a2a_server/mock_mcp.py +273 -0
  29. a2a_server/models.py +494 -0
  30. a2a_server/monitor_api.py +5904 -0
  31. a2a_server/opencode_bridge.py +1594 -0
  32. a2a_server/redis_task_manager.py +518 -0
  33. a2a_server/server.py +726 -0
  34. a2a_server/task_manager.py +668 -0
  35. a2a_server/task_queue.py +742 -0
  36. a2a_server/tenant_api.py +333 -0
  37. a2a_server/tenant_middleware.py +219 -0
  38. a2a_server/tenant_service.py +760 -0
  39. a2a_server/user_auth.py +721 -0
  40. a2a_server/vault_client.py +576 -0
  41. a2a_server/worker_sse.py +873 -0
  42. agent_worker/__init__.py +8 -0
  43. agent_worker/worker.py +4877 -0
  44. codetether/__init__.py +10 -0
  45. codetether/__main__.py +4 -0
  46. codetether/cli.py +112 -0
  47. codetether/worker_cli.py +57 -0
  48. codetether-1.2.2.dist-info/METADATA +570 -0
  49. codetether-1.2.2.dist-info/RECORD +66 -0
  50. codetether-1.2.2.dist-info/WHEEL +5 -0
  51. codetether-1.2.2.dist-info/entry_points.txt +4 -0
  52. codetether-1.2.2.dist-info/licenses/LICENSE +202 -0
  53. codetether-1.2.2.dist-info/top_level.txt +5 -0
  54. codetether_voice_agent/__init__.py +6 -0
  55. codetether_voice_agent/agent.py +445 -0
  56. codetether_voice_agent/codetether_mcp.py +345 -0
  57. codetether_voice_agent/config.py +16 -0
  58. codetether_voice_agent/functiongemma_caller.py +380 -0
  59. codetether_voice_agent/session_playback.py +247 -0
  60. codetether_voice_agent/tools/__init__.py +21 -0
  61. codetether_voice_agent/tools/definitions.py +135 -0
  62. codetether_voice_agent/tools/handlers.py +380 -0
  63. run_server.py +314 -0
  64. ui/monitor-tailwind.html +1790 -0
  65. ui/monitor.html +1775 -0
  66. ui/monitor.js +2662 -0
agent_worker/worker.py ADDED
@@ -0,0 +1,4877 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ A2A Agent Worker - Runs on machines with codebases, connects to A2A server
4
+
5
+ This worker:
6
+ 1. Registers itself with the A2A server
7
+ 2. Registers local codebases it can work on
8
+ 3. Connects via SSE to receive task assignments pushed from server
9
+ 4. Executes tasks using OpenCode
10
+ 5. Reports results back to the server
11
+ 6. Reports OpenCode session history to the server
12
+
13
+ Usage:
14
+ python worker.py --server https://api.codetether.run --name "dev-vm-worker"
15
+ """
16
+
17
+ import argparse
18
+ import asyncio
19
+ import json
20
+ import logging
21
+ import os
22
+ import signal
23
+ import subprocess
24
+ import sys
25
+ import time
26
+ import uuid
27
+ from dataclasses import dataclass, field
28
+ from datetime import datetime
29
+ from enum import StrEnum
30
+ from pathlib import Path
31
+ from collections import OrderedDict
32
+ from typing import Any, Dict, List, Optional, Callable, Set
33
+
34
+ import aiohttp
35
+
36
+
37
+ # =============================================================================
38
+ # VaultClient - HashiCorp Vault integration for secrets
39
+ # =============================================================================
40
+
41
+
42
+ class VaultClient:
43
+ """
44
+ Simple Vault client for fetching secrets.
45
+
46
+ Supports KV v2 secrets engine.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ addr: Optional[str] = None,
52
+ token: Optional[str] = None,
53
+ ):
54
+ self.addr = addr or os.environ.get('VAULT_ADDR')
55
+ self.token = token or os.environ.get('VAULT_TOKEN')
56
+ self._session: Optional[aiohttp.ClientSession] = None
57
+
58
+ def is_configured(self) -> bool:
59
+ """Check if Vault is configured."""
60
+ return bool(self.addr and self.token)
61
+
62
+ async def _get_session(self) -> aiohttp.ClientSession:
63
+ """Get or create HTTP session."""
64
+ if self._session is None or self._session.closed:
65
+ self._session = aiohttp.ClientSession(
66
+ timeout=aiohttp.ClientTimeout(total=10),
67
+ headers={'X-Vault-Token': self.token or ''},
68
+ )
69
+ return self._session
70
+
71
+ async def close(self):
72
+ """Close the HTTP session."""
73
+ if self._session and not self._session.closed:
74
+ await self._session.close()
75
+
76
+ async def get_secret(self, path: str) -> Optional[Dict[str, Any]]:
77
+ """
78
+ Get a secret from Vault KV v2.
79
+
80
+ Args:
81
+ path: Secret path (e.g., 'secret/spotlessbinco/sendgrid')
82
+
83
+ Returns:
84
+ Dictionary of secret data, or None if not found.
85
+ """
86
+ if not self.is_configured():
87
+ return None
88
+
89
+ try:
90
+ session = await self._get_session()
91
+
92
+ # KV v2 requires /data/ in the path
93
+ # Convert 'secret/foo' to 'secret/data/foo'
94
+ parts = path.split('/', 1)
95
+ if len(parts) == 2:
96
+ mount = parts[0]
97
+ secret_path = parts[1]
98
+ url = f'{self.addr}/v1/{mount}/data/{secret_path}'
99
+ else:
100
+ url = f'{self.addr}/v1/{path}'
101
+
102
+ async with session.get(url) as resp:
103
+ if resp.status == 200:
104
+ data = await resp.json()
105
+ return data.get('data', {}).get('data', {})
106
+ else:
107
+ return None
108
+
109
+ except Exception as e:
110
+ logging.getLogger('a2a-worker').warning(
111
+ f'Failed to fetch secret from Vault: {e}'
112
+ )
113
+ return None
114
+
115
+
116
+ class TaskStatus(StrEnum):
117
+ """Status values for tasks in the task queue."""
118
+
119
+ PENDING = 'pending'
120
+ RUNNING = 'running'
121
+ COMPLETED = 'completed'
122
+ FAILED = 'failed'
123
+
124
+
125
+ class SpecialCodebaseId(StrEnum):
126
+ """Special codebase ID values with semantic meaning."""
127
+
128
+ PENDING = '__pending__' # Tasks awaiting codebase assignment
129
+ GLOBAL = 'global' # Global sessions not tied to a specific project
130
+
131
+
132
+ class AgentType(StrEnum):
133
+ """Agent types that determine how tasks are executed."""
134
+
135
+ BUILD = 'build' # Default OpenCode build agent
136
+ ECHO = 'echo' # Lightweight test agent that echoes input
137
+ NOOP = 'noop' # Lightweight test agent that does nothing
138
+ REGISTER_CODEBASE = (
139
+ 'register_codebase' # Special task for codebase registration
140
+ )
141
+
142
+
143
+ # Configure logging
144
+ logging.basicConfig(
145
+ level=logging.INFO,
146
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
147
+ handlers=[
148
+ logging.StreamHandler(sys.stdout),
149
+ ],
150
+ )
151
+ logger = logging.getLogger('a2a-worker')
152
+
153
+
154
+ @dataclass
155
+ class WorkerConfig:
156
+ """Configuration for the agent worker."""
157
+
158
+ server_url: str
159
+ worker_name: str
160
+ worker_id: str = field(default_factory=lambda: str(uuid.uuid4())[:12])
161
+ codebases: List[Dict[str, str]] = field(default_factory=list)
162
+ poll_interval: int = 5 # Fallback poll interval when SSE is unavailable
163
+ opencode_bin: Optional[str] = None
164
+ # Optional override for OpenCode storage location (directory that contains
165
+ # subdirs like project/, session/, message/, part/).
166
+ opencode_storage_path: Optional[str] = None
167
+ # Optional message sync (for session detail view on remote codebases)
168
+ session_message_sync_max_sessions: int = 3
169
+ session_message_sync_max_messages: int = 100
170
+ capabilities: List[str] = field(
171
+ default_factory=lambda: ['opencode', 'build', 'deploy']
172
+ )
173
+ # Max concurrent tasks (bounded worker pool)
174
+ max_concurrent_tasks: int = 2
175
+ # SSE reconnection settings
176
+ sse_reconnect_delay: float = 1.0
177
+ sse_max_reconnect_delay: float = 60.0
178
+ sse_heartbeat_timeout: float = (
179
+ 45.0 # Server should send heartbeats every 30s
180
+ )
181
+ # Auth token for SSE endpoint (from A2A_AUTH_TOKEN env var)
182
+ auth_token: Optional[str] = None
183
+ # Email notifications via SendGrid
184
+ sendgrid_api_key: Optional[str] = None
185
+ sendgrid_from_email: Optional[str] = None
186
+ notification_email: Optional[str] = None # Recipient for task reports
187
+ # Email reply-to configuration for task continuation
188
+ email_inbound_domain: Optional[str] = None # e.g., 'inbound.codetether.run'
189
+ email_reply_prefix: str = 'task' # Prefix for reply-to addresses
190
+ # Email debugging options
191
+ email_dry_run: bool = False # Log emails instead of sending
192
+ email_verbose: bool = False # Verbose logging for email operations
193
+ # Auto-compaction settings for task handoffs
194
+ compaction_max_tokens: int = 100000 # Trigger compaction above this
195
+ compaction_target_tokens: int = 50000 # Target size after compaction
196
+ auto_summarize_handoffs: bool = (
197
+ True # Enable auto-summarization for session resumes
198
+ )
199
+ # Agent registration: Register worker as a discoverable agent in the A2A network
200
+ # This allows other agents to find this worker via discover_agents MCP tool
201
+ register_as_agent: bool = True # Auto-register as discoverable agent
202
+ agent_name: Optional[str] = (
203
+ None # Name for agent discovery (defaults to worker_name)
204
+ )
205
+ agent_description: Optional[str] = None # Description for agent discovery
206
+ agent_url: Optional[str] = (
207
+ None # URL where this agent can be reached (optional)
208
+ )
209
+ # Instance ID for unique agent identity (role:instance pattern)
210
+ # If not set, generated as hostname:short_uuid
211
+ agent_instance_id: Optional[str] = None
212
+ agent_description: Optional[str] = None # Description for agent discovery
213
+ agent_url: Optional[str] = (
214
+ None # URL where this agent can be reached (optional)
215
+ )
216
+
217
+
218
+ @dataclass
219
+ class LocalCodebase:
220
+ """A codebase registered with this worker."""
221
+
222
+ id: str # Server-assigned ID
223
+ name: str
224
+ path: str
225
+ description: str = ''
226
+
227
+
228
+ # =============================================================================
229
+ # WorkerClient - HTTP/SSE communication with the A2A server
230
+ # =============================================================================
231
+
232
+
233
+ class WorkerClient:
234
+ """
235
+ Handles HTTP and SSE communication with the A2A server.
236
+
237
+ Responsibilities:
238
+ - Manage aiohttp session lifecycle and connection pooling
239
+ - SSE connection establishment and event handling
240
+ - API calls for task status updates, output streaming
241
+ - Worker registration/unregistration and heartbeat management
242
+ """
243
+
244
+ def __init__(self, config: WorkerConfig):
245
+ self.config = config
246
+ self.session: Optional[aiohttp.ClientSession] = None
247
+ # SSE connection state
248
+ self._sse_connected = False
249
+ self._sse_reconnect_delay = config.sse_reconnect_delay
250
+ self._last_heartbeat: float = 0.0
251
+
252
+ async def get_session(self) -> aiohttp.ClientSession:
253
+ """Get or create HTTP session with connection pooling."""
254
+ if self.session is None or self.session.closed:
255
+ # Configure connection pool for better performance under load
256
+ connector = aiohttp.TCPConnector(
257
+ limit=100, # Total connection pool size
258
+ limit_per_host=30, # Max connections per host
259
+ ttl_dns_cache=300, # DNS cache TTL in seconds
260
+ enable_cleanup_closed=True, # Clean up closed connections
261
+ )
262
+ self.session = aiohttp.ClientSession(
263
+ connector=connector,
264
+ timeout=aiohttp.ClientTimeout(total=30),
265
+ headers={'Content-Type': 'application/json'},
266
+ )
267
+ return self.session
268
+
269
+ async def close(self):
270
+ """Close the HTTP session."""
271
+ if self.session is not None and not self.session.closed:
272
+ await self.session.close()
273
+ # Wait for underlying connector to close
274
+ await asyncio.sleep(0.1)
275
+
276
+ async def register_worker(
277
+ self,
278
+ models: List[Dict[str, Any]],
279
+ global_codebase_id: Optional[str],
280
+ ) -> bool:
281
+ """Register this worker with the A2A server."""
282
+ try:
283
+ session = await self.get_session()
284
+ url = f'{self.config.server_url}/v1/opencode/workers/register'
285
+
286
+ import platform
287
+
288
+ payload = {
289
+ 'worker_id': self.config.worker_id,
290
+ 'name': self.config.worker_name,
291
+ 'capabilities': self.config.capabilities,
292
+ 'hostname': platform.node(), # Cross-platform (works on Windows)
293
+ 'models': models,
294
+ 'global_codebase_id': global_codebase_id,
295
+ }
296
+
297
+ async with session.post(url, json=payload) as resp:
298
+ if resp.status == 200:
299
+ data = await resp.json()
300
+ logger.info(f'Worker registered successfully: {data}')
301
+ return True
302
+ else:
303
+ text = await resp.text()
304
+ logger.warning(
305
+ f'Worker registration returned {resp.status}: {text}'
306
+ )
307
+ return False
308
+
309
+ except Exception as e:
310
+ logger.warning(
311
+ f'Failed to register worker (continuing anyway): {e}'
312
+ )
313
+ return False
314
+
315
+ async def unregister_worker(self):
316
+ """Unregister this worker from the A2A server."""
317
+ try:
318
+ session = await self.get_session()
319
+ url = f'{self.config.server_url}/v1/opencode/workers/{self.config.worker_id}/unregister'
320
+
321
+ async with session.post(url) as resp:
322
+ if resp.status == 200:
323
+ logger.info('Worker unregistered successfully')
324
+
325
+ except Exception as e:
326
+ logger.debug(f'Failed to unregister worker: {e}')
327
+
328
+ async def send_heartbeat(self) -> bool:
329
+ """Send heartbeat to the A2A server to indicate worker is alive.
330
+
331
+ Returns True if heartbeat was successful, False otherwise.
332
+ """
333
+ try:
334
+ session = await self.get_session()
335
+ url = f'{self.config.server_url}/v1/opencode/workers/{self.config.worker_id}/heartbeat'
336
+
337
+ async with session.post(
338
+ url, timeout=aiohttp.ClientTimeout(total=10)
339
+ ) as resp:
340
+ if resp.status == 200:
341
+ logger.debug('Heartbeat sent successfully')
342
+ return True
343
+ else:
344
+ logger.warning(f'Heartbeat returned {resp.status}')
345
+ return False
346
+
347
+ except Exception as e:
348
+ logger.debug(f'Failed to send heartbeat: {e}')
349
+ return False
350
+
351
+ async def register_as_agent(
352
+ self,
353
+ agent_name: Optional[str] = None,
354
+ description: Optional[str] = None,
355
+ url: Optional[str] = None,
356
+ routing_capabilities: Optional[List[str]] = None,
357
+ ) -> bool:
358
+ """
359
+ Register this worker as a discoverable agent in the A2A network.
360
+
361
+ This makes the worker visible to other agents via the discover_agents
362
+ MCP tool, enabling agent-to-agent communication.
363
+
364
+ Uses the role:instance pattern for unique identity:
365
+ - role (agent_name): stable role like "code-reviewer" for routing
366
+ - instance_id: unique per-worker instance for disambiguation
367
+
368
+ The full discovery name is "{role}:{instance_id}" to handle multiple
369
+ workers with the same role. Routing (send_to_agent) uses the role.
370
+
371
+ Args:
372
+ agent_name: Name for agent discovery (defaults to config.agent_name or worker_name)
373
+ description: Human-readable description of what this agent does
374
+ url: Optional URL where this agent can be reached directly
375
+ routing_capabilities: List of task routing capabilities (e.g., ["pytest", "terraform"])
376
+
377
+ Returns:
378
+ True if registration succeeded, False otherwise.
379
+ """
380
+ import platform
381
+
382
+ try:
383
+ session = await self.get_session()
384
+ # Use the proper MCP JSON-RPC endpoint
385
+ url_endpoint = f'{self.config.server_url}/mcp/v1/rpc'
386
+
387
+ # Use platform.node() for cross-platform hostname (works on Windows too)
388
+ hostname = platform.node()
389
+
390
+ # Build instance_id for unique identity
391
+ # Format: hostname:short_uuid (e.g., "dev-vm:a1b2c3")
392
+ instance_id = (
393
+ self.config.agent_instance_id
394
+ or f'{hostname}:{self.config.worker_id[:6]}'
395
+ )
396
+
397
+ # Role is the routing identity (used by send_to_agent)
398
+ role = (
399
+ agent_name or self.config.agent_name or self.config.worker_name
400
+ )
401
+
402
+ # Full discovery name is role:instance for uniqueness
403
+ # This prevents registry collisions when multiple workers have the same role
404
+ discovery_name = f'{role}:{instance_id}'
405
+
406
+ # Store the resolved names for heartbeat refresh
407
+ self._agent_role = role
408
+ self._agent_discovery_name = discovery_name
409
+
410
+ # Build routing capabilities list for task matching
411
+ caps_list = routing_capabilities or self.config.capabilities or []
412
+ caps_str = ', '.join(caps_list) if caps_list else 'general'
413
+
414
+ agent_description = description or (
415
+ f'OpenCode worker agent (role={role}, instance={instance_id}). '
416
+ f'Routing capabilities: {caps_str}'
417
+ )
418
+ agent_url = url or self.config.agent_url or self.config.server_url
419
+
420
+ # JSON-RPC 2.0 request to call register_agent tool
421
+ # Note: 'capabilities' here is the A2A protocol AgentCapabilities (dict)
422
+ # for streaming/push_notifications - NOT the routing capabilities (list)
423
+ payload = {
424
+ 'jsonrpc': '2.0',
425
+ 'id': str(uuid.uuid4()),
426
+ 'method': 'tools/call',
427
+ 'params': {
428
+ 'name': 'register_agent',
429
+ 'arguments': {
430
+ 'name': discovery_name,
431
+ 'description': agent_description,
432
+ 'url': agent_url,
433
+ # A2A protocol capabilities (dict) - for streaming/push features
434
+ 'capabilities': {
435
+ 'streaming': True,
436
+ 'push_notifications': True,
437
+ },
438
+ },
439
+ },
440
+ }
441
+
442
+ headers = {'Content-Type': 'application/json'}
443
+ if self.config.auth_token:
444
+ headers['Authorization'] = f'Bearer {self.config.auth_token}'
445
+
446
+ async with session.post(
447
+ url_endpoint,
448
+ json=payload,
449
+ headers=headers,
450
+ timeout=aiohttp.ClientTimeout(
451
+ total=10
452
+ ), # Best-effort, don't block startup
453
+ ) as resp:
454
+ if resp.status == 200:
455
+ result = await resp.json()
456
+ if result.get('error'):
457
+ logger.warning(
458
+ f'Agent registration failed: {result["error"]}'
459
+ )
460
+ return False
461
+ logger.info(
462
+ f"Registered as discoverable agent: '{discovery_name}' (role='{role}')"
463
+ )
464
+ return True
465
+ else:
466
+ text = await resp.text()
467
+ logger.warning(
468
+ f'Agent registration returned {resp.status}: {text}'
469
+ )
470
+ return False
471
+
472
+ except asyncio.TimeoutError:
473
+ logger.warning(
474
+ 'Agent registration timed out (continuing without discovery registration)'
475
+ )
476
+ return False
477
+ except Exception as e:
478
+ logger.warning(f'Failed to register as agent (non-fatal): {e}')
479
+ return False
480
+
481
+ async def refresh_agent_heartbeat(self) -> bool:
482
+ """
483
+ Refresh the agent's last_seen timestamp to keep it visible in discovery.
484
+
485
+ Should be called periodically (every 30-60s). Agents not seen within
486
+ 120s are filtered from discover_agents results.
487
+
488
+ Returns:
489
+ True if heartbeat was refreshed, False otherwise.
490
+ """
491
+ if (
492
+ not hasattr(self, '_agent_discovery_name')
493
+ or not self._agent_discovery_name
494
+ ):
495
+ return False # Not registered as agent
496
+
497
+ try:
498
+ session = await self.get_session()
499
+ url_endpoint = f'{self.config.server_url}/mcp/v1/rpc'
500
+
501
+ payload = {
502
+ 'jsonrpc': '2.0',
503
+ 'id': str(uuid.uuid4()),
504
+ 'method': 'tools/call',
505
+ 'params': {
506
+ 'name': 'refresh_agent_heartbeat',
507
+ 'arguments': {
508
+ 'agent_name': self._agent_discovery_name,
509
+ },
510
+ },
511
+ }
512
+
513
+ headers = {'Content-Type': 'application/json'}
514
+ if self.config.auth_token:
515
+ headers['Authorization'] = f'Bearer {self.config.auth_token}'
516
+
517
+ async with session.post(
518
+ url_endpoint,
519
+ json=payload,
520
+ headers=headers,
521
+ timeout=aiohttp.ClientTimeout(total=5),
522
+ ) as resp:
523
+ if resp.status == 200:
524
+ result = await resp.json()
525
+ if result.get('error'):
526
+ logger.debug(
527
+ f'Agent heartbeat failed: {result["error"]}'
528
+ )
529
+ return False
530
+ logger.debug(
531
+ f'Agent heartbeat refreshed: {self._agent_discovery_name}'
532
+ )
533
+ return True
534
+ return False
535
+
536
+ except Exception as e:
537
+ logger.debug(f'Agent heartbeat error: {e}')
538
+ return False
539
+
540
+ async def unregister_agent(self) -> bool:
541
+ """
542
+ Unregister this worker from the agent discovery registry.
543
+
544
+ Called during graceful shutdown.
545
+
546
+ Returns:
547
+ True if unregistration succeeded, False otherwise.
548
+ """
549
+ # Note: There's no explicit unregister_agent MCP tool currently,
550
+ # but the agent will be marked inactive when heartbeats stop (TTL).
551
+ agent_name = (
552
+ getattr(self, '_agent_discovery_name', None)
553
+ or self.config.agent_name
554
+ or self.config.worker_name
555
+ )
556
+ logger.debug(
557
+ f"Agent '{agent_name}' will be filtered from discovery after TTL expires"
558
+ )
559
+ return True
560
+
561
+ async def register_codebase(
562
+ self, name: str, path: str, description: str = ''
563
+ ) -> Optional[str]:
564
+ """Register a local codebase with the A2A server.
565
+
566
+ Returns the server-assigned codebase ID, or None on failure.
567
+ """
568
+ # Validate path exists locally
569
+ if not os.path.isdir(path):
570
+ logger.error(f'Codebase path does not exist: {path}')
571
+ return None
572
+
573
+ # Normalize for comparisons / de-duping when re-registering.
574
+ normalized_path = os.path.abspath(os.path.expanduser(path))
575
+
576
+ try:
577
+ session = await self.get_session()
578
+ url = f'{self.config.server_url}/v1/opencode/codebases'
579
+
580
+ payload = {
581
+ 'name': name,
582
+ 'path': normalized_path,
583
+ 'description': description,
584
+ 'worker_id': self.config.worker_id, # Associate with this worker
585
+ }
586
+
587
+ async with session.post(url, json=payload) as resp:
588
+ if resp.status == 200:
589
+ data = await resp.json()
590
+ codebase_data = data.get('codebase', data)
591
+ codebase_id = codebase_data.get('id')
592
+
593
+ logger.info(
594
+ f"Registered codebase '{name}' (ID: {codebase_id}) at {path}"
595
+ )
596
+ return codebase_id
597
+ else:
598
+ text = await resp.text()
599
+ logger.error(
600
+ f'Failed to register codebase: {resp.status} - {text}'
601
+ )
602
+ return None
603
+
604
+ except Exception as e:
605
+ logger.error(f'Failed to register codebase: {e}')
606
+ return None
607
+
608
+ async def get_pending_tasks(
609
+ self, codebase_ids: List[str]
610
+ ) -> List[Dict[str, Any]]:
611
+ """Get pending tasks from the server (fallback polling method)."""
612
+ try:
613
+ session = await self.get_session()
614
+
615
+ url = f'{self.config.server_url}/v1/opencode/tasks'
616
+ params = {
617
+ 'status': TaskStatus.PENDING,
618
+ }
619
+
620
+ async with session.get(url, params=params) as resp:
621
+ if resp.status == 200:
622
+ tasks = await resp.json()
623
+ return tasks
624
+ else:
625
+ return []
626
+
627
+ except Exception as e:
628
+ logger.debug(f'Failed to get pending tasks: {e}')
629
+ return []
630
+
631
+ async def claim_task(self, task_id: str) -> bool:
632
+ """
633
+ Atomically claim a task on the server.
634
+
635
+ Returns True if claim succeeded, False if task was already claimed
636
+ by another worker.
637
+ """
638
+ try:
639
+ session = await self.get_session()
640
+ url = f'{self.config.server_url}/v1/worker/tasks/claim'
641
+
642
+ # Build headers including auth token if available
643
+ headers = {'Content-Type': 'application/json'}
644
+ if self.config.auth_token:
645
+ headers['Authorization'] = f'Bearer {self.config.auth_token}'
646
+ headers['X-Worker-ID'] = self.config.worker_id
647
+
648
+ payload = {'task_id': task_id}
649
+
650
+ async with session.post(
651
+ url,
652
+ json=payload,
653
+ headers=headers,
654
+ timeout=aiohttp.ClientTimeout(total=10),
655
+ ) as resp:
656
+ if resp.status == 200:
657
+ logger.info(f'Successfully claimed task {task_id}')
658
+ return True
659
+ elif resp.status == 409:
660
+ # Task already claimed by another worker
661
+ logger.debug(
662
+ f'Task {task_id} already claimed by another worker'
663
+ )
664
+ return False
665
+ else:
666
+ text = await resp.text()
667
+ logger.warning(
668
+ f'Failed to claim task {task_id}: {resp.status} - {text}'
669
+ )
670
+ # On unexpected errors, don't process to be safe
671
+ return False
672
+
673
+ except Exception as e:
674
+ logger.warning(f'Error claiming task {task_id}: {e}')
675
+ # On network errors, don't process to avoid potential duplicates
676
+ return False
677
+
678
+ async def release_task(self, task_id: str) -> bool:
679
+ """
680
+ Release a task claim on the server after processing.
681
+
682
+ This notifies the server that the worker is done with the task
683
+ (whether successful or failed).
684
+ """
685
+ try:
686
+ session = await self.get_session()
687
+ url = f'{self.config.server_url}/v1/worker/tasks/release'
688
+
689
+ # Build headers including auth token if available
690
+ headers = {'Content-Type': 'application/json'}
691
+ if self.config.auth_token:
692
+ headers['Authorization'] = f'Bearer {self.config.auth_token}'
693
+ headers['X-Worker-ID'] = self.config.worker_id
694
+
695
+ payload = {'task_id': task_id}
696
+
697
+ async with session.post(
698
+ url,
699
+ json=payload,
700
+ headers=headers,
701
+ timeout=aiohttp.ClientTimeout(total=10),
702
+ ) as resp:
703
+ if resp.status == 200:
704
+ logger.debug(f'Released task {task_id}')
705
+ return True
706
+ else:
707
+ text = await resp.text()
708
+ logger.debug(
709
+ f'Failed to release task {task_id}: {resp.status} - {text}'
710
+ )
711
+ return False
712
+
713
+ except Exception as e:
714
+ logger.debug(f'Error releasing task {task_id}: {e}')
715
+ return False
716
+
717
+ async def stream_task_output(self, task_id: str, output: str):
718
+ """Stream output chunk to the server."""
719
+ if not output:
720
+ return
721
+ try:
722
+ session = await self.get_session()
723
+ url = f'{self.config.server_url}/v1/opencode/tasks/{task_id}/output'
724
+
725
+ payload = {
726
+ 'worker_id': self.config.worker_id,
727
+ 'output': output,
728
+ 'timestamp': datetime.now().isoformat(),
729
+ }
730
+
731
+ async with session.post(url, json=payload) as resp:
732
+ if resp.status != 200:
733
+ logger.debug(f'Failed to stream output: {resp.status}')
734
+ except Exception as e:
735
+ logger.debug(f'Failed to stream output: {e}')
736
+
737
+ async def update_task_status(
738
+ self,
739
+ task_id: str,
740
+ status: str,
741
+ result: Optional[str] = None,
742
+ error: Optional[str] = None,
743
+ session_id: Optional[str] = None,
744
+ max_retries: int = 4,
745
+ base_delay: float = 1.0,
746
+ ):
747
+ """Update task status on the server with exponential backoff retry.
748
+
749
+ Status updates are critical for maintaining consistency between worker
750
+ and server state. This method retries failed updates with exponential
751
+ backoff to handle transient network issues.
752
+
753
+ The operation is idempotent - multiple updates to the same status are
754
+ safe as the server will simply acknowledge the current state.
755
+
756
+ Args:
757
+ task_id: The task ID to update
758
+ status: New status value
759
+ result: Optional result data
760
+ error: Optional error message
761
+ session_id: Optional session ID
762
+ max_retries: Maximum number of retry attempts (default: 4, total 5 attempts)
763
+ base_delay: Initial delay in seconds before first retry (default: 1.0)
764
+ """
765
+ url = f'{self.config.server_url}/v1/opencode/tasks/{task_id}/status'
766
+
767
+ payload = {
768
+ 'status': status,
769
+ 'worker_id': self.config.worker_id,
770
+ }
771
+ if session_id:
772
+ payload['session_id'] = session_id
773
+ if result:
774
+ payload['result'] = result
775
+ if error:
776
+ payload['error'] = error
777
+
778
+ last_exception: Optional[Exception] = None
779
+ last_status_code: Optional[int] = None
780
+ last_response_text: Optional[str] = None
781
+
782
+ for attempt in range(max_retries + 1):
783
+ try:
784
+ session = await self.get_session()
785
+ async with session.put(url, json=payload) as resp:
786
+ if resp.status == 200:
787
+ if attempt > 0:
788
+ logger.info(
789
+ f'Task {task_id} status update to "{status}" succeeded on retry {attempt}'
790
+ )
791
+ return # Success
792
+
793
+ last_status_code = resp.status
794
+ last_response_text = await resp.text()
795
+
796
+ # Don't retry client errors (4xx) except 429 (rate limit)
797
+ if 400 <= resp.status < 500 and resp.status != 429:
798
+ logger.warning(
799
+ f'Task {task_id} status update failed with client error: '
800
+ f'{resp.status} - {last_response_text}'
801
+ )
802
+ return # Don't retry client errors
803
+
804
+ except asyncio.CancelledError:
805
+ raise # Don't retry on cancellation
806
+ except Exception as e:
807
+ last_exception = e
808
+
809
+ # Calculate delay with exponential backoff (1s, 2s, 4s, 8s)
810
+ if attempt < max_retries:
811
+ delay = base_delay * (2**attempt)
812
+ logger.warning(
813
+ f'Task {task_id} status update to "{status}" failed '
814
+ f'(attempt {attempt + 1}/{max_retries + 1}), '
815
+ f'retrying in {delay:.1f}s...'
816
+ )
817
+ await asyncio.sleep(delay)
818
+
819
+ # All retries exhausted - log the final failure
820
+ if last_exception:
821
+ logger.error(
822
+ f'Task {task_id} status update to "{status}" failed after '
823
+ f'{max_retries + 1} attempts. Last error: {last_exception}'
824
+ )
825
+ elif last_status_code:
826
+ logger.error(
827
+ f'Task {task_id} status update to "{status}" failed after '
828
+ f'{max_retries + 1} attempts. Last response: {last_status_code} - '
829
+ f'{last_response_text}'
830
+ )
831
+
832
+ async def sync_api_keys_from_server(
833
+ self, user_id: Optional[str] = None
834
+ ) -> bool:
835
+ """
836
+ Sync API keys from the server (Vault-backed) to local OpenCode auth.json.
837
+
838
+ This allows users to manage their API keys in the web UI and have them
839
+ automatically synced to workers.
840
+
841
+ Args:
842
+ user_id: Optional user ID to sync keys for. If not provided,
843
+ syncs keys for the codebase owner.
844
+
845
+ Returns:
846
+ True if sync was successful, False otherwise.
847
+ """
848
+ try:
849
+ session = await self.get_session()
850
+
851
+ # Build sync URL with optional user_id
852
+ sync_url = f'{self.config.server_url}/v1/opencode/api-keys/sync'
853
+ params = {'worker_id': self.config.worker_id}
854
+ if user_id:
855
+ params['user_id'] = user_id
856
+
857
+ async with session.get(sync_url, params=params) as resp:
858
+ if resp.status != 200:
859
+ logger.warning(
860
+ f'Failed to sync API keys: HTTP {resp.status}'
861
+ )
862
+ return False
863
+
864
+ data = await resp.json()
865
+
866
+ # Get paths for auth.json and opencode.json
867
+ data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
868
+ '~/.local/share'
869
+ )
870
+ config_home = os.environ.get(
871
+ 'XDG_CONFIG_HOME'
872
+ ) or os.path.expanduser('~/.config')
873
+
874
+ auth_path = Path(data_home) / 'opencode' / 'auth.json'
875
+ config_path = Path(config_home) / 'opencode' / 'opencode.json'
876
+
877
+ # Merge server keys with existing local auth.json
878
+ server_auth = data.get('auth', {})
879
+ if server_auth:
880
+ existing_auth = {}
881
+ if auth_path.exists():
882
+ try:
883
+ with open(auth_path, 'r', encoding='utf-8') as f:
884
+ existing_auth = json.load(f)
885
+ except Exception as e:
886
+ logger.warning(
887
+ f'Failed to read existing auth.json: {e}'
888
+ )
889
+
890
+ # Merge: server keys override local for same provider
891
+ merged_auth = {**existing_auth, **server_auth}
892
+
893
+ # Write merged auth
894
+ auth_path.parent.mkdir(parents=True, exist_ok=True)
895
+ with open(auth_path, 'w', encoding='utf-8') as f:
896
+ json.dump(merged_auth, f, indent=2)
897
+
898
+ logger.info(
899
+ f'Synced {len(server_auth)} API keys from server '
900
+ f'(total: {len(merged_auth)} providers)'
901
+ )
902
+
903
+ # Merge server provider configs with existing opencode.json
904
+ server_providers = data.get('providers', {})
905
+ if server_providers:
906
+ existing_config = {}
907
+ if config_path.exists():
908
+ try:
909
+ with open(config_path, 'r', encoding='utf-8') as f:
910
+ existing_config = json.load(f)
911
+ except Exception as e:
912
+ logger.warning(
913
+ f'Failed to read existing opencode.json: {e}'
914
+ )
915
+
916
+ # Merge provider configs
917
+ existing_providers = existing_config.get('provider', {})
918
+ merged_providers = {**existing_providers, **server_providers}
919
+ existing_config['provider'] = merged_providers
920
+
921
+ # Write merged config
922
+ config_path.parent.mkdir(parents=True, exist_ok=True)
923
+ with open(config_path, 'w', encoding='utf-8') as f:
924
+ json.dump(existing_config, f, indent=2)
925
+
926
+ logger.info(
927
+ f'Synced {len(server_providers)} provider configs from server'
928
+ )
929
+
930
+ return True
931
+
932
+ except Exception as e:
933
+ logger.error(f'Failed to sync API keys from server: {e}')
934
+ return False
935
+
936
+ async def sync_sessions(
937
+ self,
938
+ codebase_id: str,
939
+ sessions: List[Dict[str, Any]],
940
+ ) -> int:
941
+ """Sync sessions to the server for a codebase.
942
+
943
+ Returns the HTTP status code.
944
+ """
945
+ try:
946
+ session = await self.get_session()
947
+ url = f'{self.config.server_url}/v1/opencode/codebases/{codebase_id}/sessions/sync'
948
+ payload = {
949
+ 'worker_id': self.config.worker_id,
950
+ 'sessions': sessions,
951
+ }
952
+ async with session.post(url, json=payload) as resp:
953
+ if resp.status == 200:
954
+ logger.debug(
955
+ f'Synced {len(sessions)} sessions (codebase_id={codebase_id})'
956
+ )
957
+ else:
958
+ text = await resp.text()
959
+ logger.warning(
960
+ f'Session sync failed for codebase_id={codebase_id}: {resp.status} {text[:200]}'
961
+ )
962
+ return resp.status
963
+ except Exception as e:
964
+ logger.debug(f'Failed to sync sessions: {e}')
965
+ return 0
966
+
967
+ async def sync_session_messages(
968
+ self,
969
+ codebase_id: str,
970
+ session_id: str,
971
+ messages: List[Dict[str, Any]],
972
+ ) -> bool:
973
+ """Sync messages for a single session. Returns True on HTTP 200."""
974
+ try:
975
+ if not messages:
976
+ return False
977
+
978
+ session = await self.get_session()
979
+ url = (
980
+ f'{self.config.server_url}/v1/opencode/codebases/{codebase_id}'
981
+ f'/sessions/{session_id}/messages/sync'
982
+ )
983
+ payload = {
984
+ 'worker_id': self.config.worker_id,
985
+ 'messages': messages,
986
+ }
987
+ async with session.post(url, json=payload) as resp:
988
+ if resp.status == 200:
989
+ logger.debug(
990
+ f'Synced {len(messages)} messages for session {session_id}'
991
+ )
992
+ return True
993
+ else:
994
+ text = await resp.text()
995
+ logger.debug(f'Message sync returned {resp.status}: {text}')
996
+ return False
997
+ except Exception as e:
998
+ logger.debug(f'Message sync failed for session {session_id}: {e}')
999
+ return False
1000
+
1001
+ @property
1002
+ def sse_connected(self) -> bool:
1003
+ return self._sse_connected
1004
+
1005
+ @sse_connected.setter
1006
+ def sse_connected(self, value: bool):
1007
+ self._sse_connected = value
1008
+
1009
+ @property
1010
+ def sse_reconnect_delay(self) -> float:
1011
+ return self._sse_reconnect_delay
1012
+
1013
+ @sse_reconnect_delay.setter
1014
+ def sse_reconnect_delay(self, value: float):
1015
+ self._sse_reconnect_delay = value
1016
+
1017
+ @property
1018
+ def last_heartbeat(self) -> float:
1019
+ return self._last_heartbeat
1020
+
1021
+ @last_heartbeat.setter
1022
+ def last_heartbeat(self, value: float):
1023
+ self._last_heartbeat = value
1024
+
1025
+
1026
+ # =============================================================================
1027
+ # EmailNotificationService - SendGrid email notifications
1028
+ # =============================================================================
1029
+
1030
+
1031
+ def _sanitize_email_for_log(email: str) -> str:
1032
+ """
1033
+ Sanitize email address for logging (mask local part).
1034
+
1035
+ Security: Prevents sensitive email addresses from appearing in logs.
1036
+
1037
+ Args:
1038
+ email: Full email address
1039
+
1040
+ Returns:
1041
+ Sanitized email (e.g., 'r***y@example.com')
1042
+ """
1043
+ if not email or '@' not in email:
1044
+ return '***@***'
1045
+ local, domain = email.rsplit('@', 1)
1046
+ if len(local) <= 2:
1047
+ return f'***@{domain}'
1048
+ return f'{local[0]}***{local[-1]}@{domain}'
1049
+
1050
+
1051
+ class EmailNotificationService:
1052
+ """
1053
+ Handles email notifications via SendGrid.
1054
+
1055
+ Sends task completion/failure reports to configured recipients.
1056
+
1057
+ Supports dry-run mode for testing (logs instead of sends) and
1058
+ verbose logging for debugging email operations.
1059
+ """
1060
+
1061
+ def __init__(self, config: WorkerConfig):
1062
+ self.config = config
1063
+ self._session: Optional[aiohttp.ClientSession] = None
1064
+ self._dry_run = config.email_dry_run
1065
+ self._verbose = config.email_verbose
1066
+
1067
+ def is_configured(self) -> bool:
1068
+ """Check if email notifications are properly configured."""
1069
+ return bool(
1070
+ self.config.sendgrid_api_key
1071
+ and self.config.sendgrid_from_email
1072
+ and self.config.notification_email
1073
+ )
1074
+
1075
+ def get_config_status(self) -> Dict[str, Any]:
1076
+ """
1077
+ Get email configuration status for debugging.
1078
+
1079
+ Returns dict with configuration details and any issues.
1080
+ """
1081
+ issues = []
1082
+
1083
+ if not self.config.sendgrid_api_key:
1084
+ issues.append('SENDGRID_API_KEY not set')
1085
+ elif not self.config.sendgrid_api_key.startswith('SG.'):
1086
+ issues.append(
1087
+ 'SENDGRID_API_KEY does not appear valid (should start with SG.)'
1088
+ )
1089
+
1090
+ if not self.config.sendgrid_from_email:
1091
+ issues.append('SENDGRID_FROM_EMAIL not set')
1092
+ elif '@' not in self.config.sendgrid_from_email:
1093
+ issues.append(
1094
+ 'SENDGRID_FROM_EMAIL does not appear to be a valid email'
1095
+ )
1096
+
1097
+ if not self.config.notification_email:
1098
+ issues.append('notification_email not set')
1099
+
1100
+ return {
1101
+ 'configured': self.is_configured(),
1102
+ 'dry_run': self._dry_run,
1103
+ 'verbose': self._verbose,
1104
+ 'sendgrid_api_key_set': bool(self.config.sendgrid_api_key),
1105
+ 'sendgrid_from_email': _sanitize_email_for_log(
1106
+ self.config.sendgrid_from_email or ''
1107
+ ),
1108
+ 'notification_email': _sanitize_email_for_log(
1109
+ self.config.notification_email or ''
1110
+ ),
1111
+ 'inbound_domain': self.config.email_inbound_domain,
1112
+ 'reply_prefix': self.config.email_reply_prefix,
1113
+ 'issues': issues,
1114
+ }
1115
+
1116
+ async def _get_session(self) -> aiohttp.ClientSession:
1117
+ """Get or create HTTP session for SendGrid API."""
1118
+ if self._session is None or self._session.closed:
1119
+ self._session = aiohttp.ClientSession(
1120
+ timeout=aiohttp.ClientTimeout(total=30)
1121
+ )
1122
+ return self._session
1123
+
1124
+ async def close(self):
1125
+ """Close the HTTP session."""
1126
+ if self._session and not self._session.closed:
1127
+ await self._session.close()
1128
+
1129
+ def _build_reply_to_address(
1130
+ self,
1131
+ session_id: Optional[str],
1132
+ codebase_id: Optional[str] = None,
1133
+ ) -> Optional[str]:
1134
+ """
1135
+ Build the reply-to address for email replies to continue tasks.
1136
+
1137
+ Format: {prefix}+{session_id}@{domain}
1138
+ Or: {prefix}+{session_id}+{codebase_id}@{domain}
1139
+ """
1140
+ if not session_id:
1141
+ return None
1142
+ if not self.config.email_inbound_domain:
1143
+ return None
1144
+
1145
+ prefix = self.config.email_reply_prefix or 'task'
1146
+ domain = self.config.email_inbound_domain
1147
+
1148
+ if codebase_id:
1149
+ return f'{prefix}+{session_id}+{codebase_id}@{domain}'
1150
+ return f'{prefix}+{session_id}@{domain}'
1151
+
1152
+ async def send_task_report(
1153
+ self,
1154
+ task_id: str,
1155
+ title: str,
1156
+ status: str,
1157
+ result: Optional[str] = None,
1158
+ error: Optional[str] = None,
1159
+ duration_ms: Optional[int] = None,
1160
+ session_id: Optional[str] = None,
1161
+ codebase_id: Optional[str] = None,
1162
+ ) -> bool:
1163
+ """Send a task completion/failure email report.
1164
+
1165
+ Returns True if email was sent successfully (or logged in dry-run mode).
1166
+
1167
+ In dry-run mode (--email-dry-run), emails are logged instead of sent.
1168
+ In verbose mode (--email-verbose), additional debugging info is logged.
1169
+ """
1170
+ if self._verbose:
1171
+ logger.info(
1172
+ f'[EMAIL-DEBUG] send_task_report called: task_id={task_id}, '
1173
+ f'status={status}, session_id={session_id}'
1174
+ )
1175
+
1176
+ if not self.is_configured():
1177
+ if self._verbose:
1178
+ config_status = self.get_config_status()
1179
+ logger.info(
1180
+ f'[EMAIL-DEBUG] Not configured: {config_status["issues"]}'
1181
+ )
1182
+ else:
1183
+ logger.debug('Email notifications not configured, skipping')
1184
+ return False
1185
+
1186
+ try:
1187
+ session = await self._get_session()
1188
+
1189
+ # Format duration
1190
+ duration_str = 'N/A'
1191
+ if duration_ms:
1192
+ seconds = duration_ms // 1000
1193
+ minutes = seconds // 60
1194
+ if minutes > 0:
1195
+ duration_str = f'{minutes}m {seconds % 60}s'
1196
+ else:
1197
+ duration_str = f'{seconds}s'
1198
+
1199
+ # Build email content
1200
+ status_color = '#22c55e' if status == 'completed' else '#ef4444'
1201
+ status_icon = '✓' if status == 'completed' else '✗'
1202
+
1203
+ result_section = ''
1204
+ if result and status == 'completed':
1205
+ # Try to parse and extract meaningful content
1206
+ display_result = result
1207
+ import json as json_module
1208
+ import html as html_module
1209
+
1210
+ # Handle NDJSON (newline-delimited JSON) from OpenCode streaming
1211
+ # Extract text content from streaming events
1212
+ text_parts = []
1213
+ try:
1214
+ lines = result.strip().split('\n')
1215
+ for line in lines:
1216
+ line = line.strip()
1217
+ if not line:
1218
+ continue
1219
+ try:
1220
+ parsed = json_module.loads(line)
1221
+ # OpenCode streaming format: look for text events
1222
+ if isinstance(parsed, dict):
1223
+ event_type = parsed.get('type', '')
1224
+ part = parsed.get('part', {})
1225
+
1226
+ # Extract text from "text" type events
1227
+ if event_type == 'text' and isinstance(
1228
+ part, dict
1229
+ ):
1230
+ text = part.get('text', '')
1231
+ if text:
1232
+ text_parts.append(text)
1233
+ # Also check for direct text field
1234
+ elif 'text' in parsed and isinstance(
1235
+ parsed['text'], str
1236
+ ):
1237
+ text_parts.append(parsed['text'])
1238
+ # Check for result/output/message fields
1239
+ elif 'result' in parsed:
1240
+ text_parts.append(str(parsed['result']))
1241
+ elif 'output' in parsed:
1242
+ text_parts.append(str(parsed['output']))
1243
+ elif 'message' in parsed and isinstance(
1244
+ parsed['message'], str
1245
+ ):
1246
+ text_parts.append(parsed['message'])
1247
+ except json_module.JSONDecodeError:
1248
+ # Not JSON, might be plain text
1249
+ if line and not line.startswith('{'):
1250
+ text_parts.append(line)
1251
+
1252
+ if text_parts:
1253
+ # Join all extracted text
1254
+ display_result = ' '.join(text_parts)
1255
+ else:
1256
+ # Fallback: try parsing as single JSON
1257
+ try:
1258
+ parsed = json_module.loads(result)
1259
+ if isinstance(parsed, dict):
1260
+ for key in [
1261
+ 'result',
1262
+ 'output',
1263
+ 'message',
1264
+ 'content',
1265
+ 'response',
1266
+ 'text',
1267
+ ]:
1268
+ if key in parsed:
1269
+ display_result = str(parsed[key])
1270
+ break
1271
+ except json_module.JSONDecodeError:
1272
+ pass
1273
+
1274
+ except Exception:
1275
+ # If all parsing fails, use as-is
1276
+ pass
1277
+
1278
+ # Escape HTML for safety
1279
+ display_result = html_module.escape(display_result)
1280
+
1281
+ # Convert newlines to <br> for display
1282
+ display_result = display_result.replace('\n', '<br>')
1283
+
1284
+ truncated = (
1285
+ display_result[:3000] + '...'
1286
+ if len(display_result) > 3000
1287
+ else display_result
1288
+ )
1289
+ result_section = f"""
1290
+ <tr>
1291
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px; vertical-align: top;">Output</td>
1292
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">
1293
+ <div style="font-size: 14px; line-height: 1.6; color: #1f2937;">{truncated}</div>
1294
+ </td>
1295
+ </tr>"""
1296
+
1297
+ error_section = ''
1298
+ if error:
1299
+ truncated = error[:1000] + '...' if len(error) > 1000 else error
1300
+ error_section = f"""
1301
+ <tr>
1302
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px;">Error</td>
1303
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">
1304
+ <pre style="margin: 0; white-space: pre-wrap; word-break: break-word; font-family: monospace; font-size: 13px; background: #fef2f2; padding: 12px; border-radius: 6px; color: #dc2626;">{truncated}</pre>
1305
+ </td>
1306
+ </tr>"""
1307
+
1308
+ # Build footer with reply instructions if email reply is configured
1309
+ reply_enabled = bool(
1310
+ self.config.email_inbound_domain and session_id
1311
+ )
1312
+ if reply_enabled:
1313
+ footer_html = f"""
1314
+ <div style="background: #f9fafb; padding: 16px; text-align: center;">
1315
+ <p style="margin: 0 0 8px 0; font-size: 13px; color: #374151; font-weight: 500;">
1316
+ Reply to this email to continue the conversation
1317
+ </p>
1318
+ <p style="margin: 0; font-size: 12px; color: #6b7280;">
1319
+ Your reply will be sent to the worker to continue working on this task.
1320
+ </p>
1321
+ <p style="margin: 8px 0 0 0; font-size: 11px; color: #9ca3af;">
1322
+ Sent by A2A Worker - {self.config.worker_name}
1323
+ </p>
1324
+ </div>"""
1325
+ else:
1326
+ footer_html = f"""
1327
+ <div style="background: #f9fafb; padding: 16px; text-align: center; font-size: 12px; color: #6b7280;">
1328
+ Sent by A2A Worker - {self.config.worker_name}
1329
+ </div>"""
1330
+
1331
+ html = f"""
1332
+ <!DOCTYPE html>
1333
+ <html>
1334
+ <head>
1335
+ <meta charset="utf-8">
1336
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1337
+ </head>
1338
+ <body style="margin: 0; padding: 20px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; background-color: #f3f4f6;">
1339
+ <div style="max-width: 600px; margin: 0 auto; background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
1340
+ <div style="background: linear-gradient(135deg, #1e293b 0%, #334155 100%); padding: 24px; text-align: center;">
1341
+ <h1 style="margin: 0; color: white; font-size: 20px; font-weight: 600;">A2A Task Report</h1>
1342
+ </div>
1343
+ <div style="padding: 24px;">
1344
+ <div style="display: inline-block; padding: 6px 12px; border-radius: 20px; background: {status_color}20; color: {status_color}; font-weight: 600; font-size: 14px; margin-bottom: 16px;">
1345
+ {status_icon} {status.upper()}
1346
+ </div>
1347
+ <table style="width: 100%; border-collapse: collapse; margin-top: 16px;">
1348
+ <tr>
1349
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151; width: 140px;">Task ID</td>
1350
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-family: monospace; font-size: 13px;">{task_id}</td>
1351
+ </tr>
1352
+ <tr>
1353
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Title</td>
1354
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{title}</td>
1355
+ </tr>
1356
+ <tr>
1357
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Session ID</td>
1358
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-family: monospace; font-size: 13px;">{session_id or 'N/A'}</td>
1359
+ </tr>
1360
+ <tr>
1361
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Worker</td>
1362
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{self.config.worker_name}</td>
1363
+ </tr>
1364
+ <tr>
1365
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb; font-weight: 600; color: #374151;">Duration</td>
1366
+ <td style="padding: 12px; border-bottom: 1px solid #e5e7eb;">{duration_str}</td>
1367
+ </tr>
1368
+ {result_section}
1369
+ {error_section}
1370
+ </table>
1371
+ </div>
1372
+ {footer_html}
1373
+ </div>
1374
+ </body>
1375
+ </html>"""
1376
+
1377
+ subject = f'[A2A] Task {status}: {title}'
1378
+
1379
+ payload = {
1380
+ 'personalizations': [
1381
+ {'to': [{'email': self.config.notification_email}]}
1382
+ ],
1383
+ 'from': {'email': self.config.sendgrid_from_email},
1384
+ 'subject': subject,
1385
+ 'content': [{'type': 'text/html', 'value': html}],
1386
+ }
1387
+
1388
+ # Add reply-to address if configured for email reply continuation
1389
+ reply_to = self._build_reply_to_address(session_id, codebase_id)
1390
+ if reply_to:
1391
+ payload['reply_to'] = {'email': reply_to}
1392
+ if self._verbose:
1393
+ logger.info(f'[EMAIL-DEBUG] Reply-to set to: {reply_to}')
1394
+ else:
1395
+ logger.debug(f'Email reply-to set to: {reply_to}')
1396
+
1397
+ # Verbose logging of email details (with sanitized addresses)
1398
+ if self._verbose:
1399
+ logger.info(
1400
+ f'[EMAIL-DEBUG] Email payload: subject="{subject}", '
1401
+ f'from={_sanitize_email_for_log(self.config.sendgrid_from_email or "")}, '
1402
+ f'to={_sanitize_email_for_log(self.config.notification_email or "")}, '
1403
+ f'reply_to={reply_to or "none"}'
1404
+ )
1405
+
1406
+ # Dry-run mode: log instead of sending
1407
+ if self._dry_run:
1408
+ logger.info(
1409
+ f'[EMAIL-DRY-RUN] Would send email for task {task_id}:\n'
1410
+ f' Subject: {subject}\n'
1411
+ f' To: {_sanitize_email_for_log(self.config.notification_email or "")}\n'
1412
+ f' From: {_sanitize_email_for_log(self.config.sendgrid_from_email or "")}\n'
1413
+ f' Reply-To: {reply_to or "none"}\n'
1414
+ f' Status: {status}'
1415
+ )
1416
+ return True # Return success in dry-run mode
1417
+
1418
+ headers = {
1419
+ 'Authorization': f'Bearer {self.config.sendgrid_api_key}',
1420
+ 'Content-Type': 'application/json',
1421
+ }
1422
+
1423
+ if self._verbose:
1424
+ logger.info('[EMAIL-DEBUG] Sending to SendGrid API...')
1425
+
1426
+ async with session.post(
1427
+ 'https://api.sendgrid.com/v3/mail/send',
1428
+ json=payload,
1429
+ headers=headers,
1430
+ ) as resp:
1431
+ if resp.status in (200, 202):
1432
+ logger.info(
1433
+ f'Email report sent for task {task_id} to '
1434
+ f'{_sanitize_email_for_log(self.config.notification_email or "")}'
1435
+ )
1436
+ return True
1437
+ else:
1438
+ text = await resp.text()
1439
+ logger.error(
1440
+ f'Failed to send email: {resp.status} - {text}'
1441
+ )
1442
+ if self._verbose:
1443
+ logger.error(f'[EMAIL-DEBUG] SendGrid response: {text}')
1444
+ return False
1445
+
1446
+ except Exception as e:
1447
+ logger.error(f'Failed to send email notification: {e}')
1448
+ if self._verbose:
1449
+ import traceback
1450
+
1451
+ logger.error(
1452
+ f'[EMAIL-DEBUG] Traceback: {traceback.format_exc()}'
1453
+ )
1454
+ return False
1455
+
1456
+ async def send_test_email(self) -> Dict[str, Any]:
1457
+ """
1458
+ Send a test email to validate email configuration.
1459
+
1460
+ This sends a simple test email to verify SendGrid is properly configured.
1461
+ Returns a dict with success status and any errors.
1462
+
1463
+ Used by the --test-email CLI flag.
1464
+ """
1465
+ result: Dict[str, Any] = {
1466
+ 'success': False,
1467
+ 'configured': self.is_configured(),
1468
+ 'config_status': self.get_config_status(),
1469
+ 'dry_run': self._dry_run,
1470
+ 'message': '',
1471
+ }
1472
+
1473
+ if not self.is_configured():
1474
+ result['message'] = (
1475
+ 'Email not fully configured. Issues: '
1476
+ + ', '.join(result['config_status']['issues'])
1477
+ )
1478
+ return result
1479
+
1480
+ # Generate a test task ID
1481
+ test_task_id = f'test-{uuid.uuid4().hex[:8]}'
1482
+ test_session_id = f'ses_test_{uuid.uuid4().hex[:8]}'
1483
+
1484
+ logger.info(f'Sending test email (dry_run={self._dry_run})...')
1485
+ logger.info(
1486
+ f' To: {_sanitize_email_for_log(self.config.notification_email or "")}'
1487
+ )
1488
+ logger.info(
1489
+ f' From: {_sanitize_email_for_log(self.config.sendgrid_from_email or "")}'
1490
+ )
1491
+
1492
+ try:
1493
+ success = await self.send_task_report(
1494
+ task_id=test_task_id,
1495
+ title='Test Email from A2A Worker',
1496
+ status='completed',
1497
+ result='This is a test email to verify your email notification configuration is working correctly.',
1498
+ duration_ms=1234,
1499
+ session_id=test_session_id,
1500
+ )
1501
+
1502
+ if success:
1503
+ result['success'] = True
1504
+ if self._dry_run:
1505
+ result['message'] = (
1506
+ 'Test email logged successfully (dry-run mode - no email sent)'
1507
+ )
1508
+ else:
1509
+ result['message'] = (
1510
+ f'Test email sent successfully to {_sanitize_email_for_log(self.config.notification_email or "")}'
1511
+ )
1512
+ else:
1513
+ result['message'] = (
1514
+ 'Failed to send test email - check logs for details'
1515
+ )
1516
+
1517
+ except Exception as e:
1518
+ result['message'] = f'Exception while sending test email: {e}'
1519
+ logger.error(f'Test email failed: {e}')
1520
+
1521
+ return result
1522
+
1523
+
1524
+ # =============================================================================
1525
+ # ConfigManager - Configuration and setup
1526
+ # =============================================================================
1527
+
1528
+
1529
+ class ConfigManager:
1530
+ """
1531
+ Handles configuration and setup for the worker.
1532
+
1533
+ Responsibilities:
1534
+ - Finding OpenCode binary
1535
+ - Managing storage paths
1536
+ - Provider authentication discovery
1537
+ - Model discovery
1538
+ """
1539
+
1540
+ def __init__(self, config: WorkerConfig):
1541
+ self.config = config
1542
+ self._opencode_storage_path: Optional[Path] = None
1543
+
1544
+ def find_opencode_binary(self) -> str:
1545
+ """Find the opencode binary."""
1546
+ locations = [
1547
+ str(Path.home() / '.local' / 'bin' / 'opencode'),
1548
+ str(Path.home() / 'bin' / 'opencode'),
1549
+ '/usr/local/bin/opencode',
1550
+ '/usr/bin/opencode',
1551
+ # Check in the A2A project
1552
+ str(
1553
+ Path(__file__).parent.parent
1554
+ / 'opencode'
1555
+ / 'packages'
1556
+ / 'opencode'
1557
+ / 'bin'
1558
+ / 'opencode'
1559
+ ),
1560
+ ]
1561
+
1562
+ for loc in locations:
1563
+ if Path(loc).exists() and os.access(loc, os.X_OK):
1564
+ logger.info(f'Found opencode at: {loc}')
1565
+ return loc
1566
+
1567
+ # Try PATH
1568
+ try:
1569
+ result = subprocess.run(
1570
+ ['which', 'opencode'], capture_output=True, text=True
1571
+ )
1572
+ if result.returncode == 0:
1573
+ return result.stdout.strip()
1574
+ except Exception as e:
1575
+ logger.debug(f'Binary search via PATH failed: {e}')
1576
+
1577
+ logger.warning('OpenCode binary not found, some features may not work')
1578
+ return 'opencode'
1579
+
1580
+ def get_authenticated_providers(self) -> set:
1581
+ """Get set of provider IDs that have authentication configured."""
1582
+ authenticated = set()
1583
+ try:
1584
+ data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
1585
+ '~/.local/share'
1586
+ )
1587
+ auth_path = (
1588
+ Path(os.path.expanduser(data_home)) / 'opencode' / 'auth.json'
1589
+ )
1590
+ if auth_path.exists():
1591
+ with open(auth_path, 'r', encoding='utf-8') as f:
1592
+ auth_data = json.load(f)
1593
+ for provider_id, provider_auth in auth_data.items():
1594
+ if isinstance(provider_auth, dict):
1595
+ # Check if provider has valid auth (key or oauth tokens)
1596
+ has_key = bool(provider_auth.get('key'))
1597
+ has_oauth = bool(
1598
+ provider_auth.get('access')
1599
+ or provider_auth.get('refresh')
1600
+ )
1601
+ if has_key or has_oauth:
1602
+ authenticated.add(provider_id)
1603
+ logger.debug(
1604
+ f"Provider '{provider_id}' has authentication configured"
1605
+ )
1606
+ logger.info(
1607
+ f'Found {len(authenticated)} authenticated providers: {sorted(authenticated)}'
1608
+ )
1609
+ except Exception as e:
1610
+ logger.warning(f'Failed to read OpenCode auth.json: {e}')
1611
+ return authenticated
1612
+
1613
+ async def get_available_models(
1614
+ self, opencode_bin: str
1615
+ ) -> List[Dict[str, Any]]:
1616
+ """Fetch available models from local OpenCode instance.
1617
+
1618
+ Only returns models from providers that have authentication configured.
1619
+ """
1620
+ # Get authenticated providers first
1621
+ authenticated_providers = self.get_authenticated_providers()
1622
+ if not authenticated_providers:
1623
+ logger.warning(
1624
+ 'No authenticated providers found - no models will be registered'
1625
+ )
1626
+ return []
1627
+
1628
+ all_models = []
1629
+
1630
+ # Try default port first
1631
+ port = 9777
1632
+ try:
1633
+ url = f'http://localhost:{port}/provider'
1634
+ async with aiohttp.ClientSession() as session:
1635
+ async with session.get(
1636
+ url, timeout=aiohttp.ClientTimeout(total=2)
1637
+ ) as resp:
1638
+ if resp.status == 200:
1639
+ data = await resp.json()
1640
+ all_providers = data.get('all', [])
1641
+ for provider in all_providers:
1642
+ provider_id = provider.get('id')
1643
+ provider_name = provider.get('name', provider_id)
1644
+ for model_id, model_info in provider.get(
1645
+ 'models', {}
1646
+ ).items():
1647
+ all_models.append(
1648
+ {
1649
+ 'id': f'{provider_id}/{model_id}',
1650
+ 'name': model_info.get(
1651
+ 'name', model_id
1652
+ ),
1653
+ 'provider': provider_name,
1654
+ 'provider_id': provider_id,
1655
+ 'capabilities': {
1656
+ 'reasoning': model_info.get(
1657
+ 'reasoning', False
1658
+ ),
1659
+ 'attachment': model_info.get(
1660
+ 'attachment', False
1661
+ ),
1662
+ 'tool_call': model_info.get(
1663
+ 'tool_call', False
1664
+ ),
1665
+ },
1666
+ }
1667
+ )
1668
+ except Exception as e:
1669
+ # OpenCode might not be running
1670
+ logger.debug(f'Model discovery via API failed: {e}')
1671
+
1672
+ # Fallback: Try CLI if no models found via API
1673
+ if not all_models:
1674
+ try:
1675
+ logger.info(f'Trying CLI: {opencode_bin} models')
1676
+ if opencode_bin and os.path.exists(opencode_bin):
1677
+ proc = await asyncio.create_subprocess_exec(
1678
+ opencode_bin,
1679
+ 'models',
1680
+ stdout=asyncio.subprocess.PIPE,
1681
+ stderr=asyncio.subprocess.PIPE,
1682
+ )
1683
+ stdout, stderr = await proc.communicate()
1684
+ if proc.returncode == 0:
1685
+ lines = stdout.decode().strip().splitlines()
1686
+ for line in lines:
1687
+ line = line.strip()
1688
+ if not line:
1689
+ continue
1690
+ # Format is provider/model
1691
+ parts = line.split('/', 1)
1692
+ if len(parts) == 2:
1693
+ provider, model_name = parts
1694
+ all_models.append(
1695
+ {
1696
+ 'id': line,
1697
+ 'name': model_name,
1698
+ 'provider': provider,
1699
+ 'provider_id': provider,
1700
+ 'capabilities': {
1701
+ 'reasoning': False,
1702
+ 'attachment': False,
1703
+ 'tool_call': True,
1704
+ },
1705
+ }
1706
+ )
1707
+ else:
1708
+ logger.warning(
1709
+ f'CLI failed with code {proc.returncode}: {stderr.decode()}'
1710
+ )
1711
+ else:
1712
+ logger.warning(
1713
+ f'OpenCode binary not found or not executable: {opencode_bin}'
1714
+ )
1715
+ except Exception as e:
1716
+ logger.warning(f'Failed to list models via CLI: {e}')
1717
+
1718
+ # Filter to only authenticated providers
1719
+ authenticated_models = []
1720
+ for model in all_models:
1721
+ provider_id = model.get('provider_id') or model.get('provider', '')
1722
+ if provider_id in authenticated_providers:
1723
+ authenticated_models.append(model)
1724
+
1725
+ logger.info(
1726
+ f'Discovered {len(all_models)} total models, '
1727
+ f'{len(authenticated_models)} from authenticated providers'
1728
+ )
1729
+
1730
+ if authenticated_models:
1731
+ providers_with_models = sorted(
1732
+ set(
1733
+ m.get('provider_id', m.get('provider'))
1734
+ for m in authenticated_models
1735
+ )
1736
+ )
1737
+ logger.info(
1738
+ f'Authenticated providers with models: {providers_with_models}'
1739
+ )
1740
+
1741
+ return authenticated_models
1742
+
1743
+ def get_opencode_storage_path(self) -> Path:
1744
+ """Get the OpenCode global storage path.
1745
+
1746
+ We prefer an explicit override, but we also try to "do what I mean" in
1747
+ common deployments where the worker runs as a service account while the
1748
+ codebases (and OpenCode storage) live under /home/<user>/.
1749
+ """
1750
+
1751
+ if self._opencode_storage_path is not None:
1752
+ return self._opencode_storage_path
1753
+
1754
+ def _dir_has_any_children(p: Path) -> bool:
1755
+ try:
1756
+ if not p.exists() or not p.is_dir():
1757
+ return False
1758
+ # Fast path: check for any entry without materializing a list.
1759
+ for _ in p.iterdir():
1760
+ return True
1761
+ return False
1762
+ except Exception as e:
1763
+ logger.debug(f'Error checking directory children for {p}: {e}')
1764
+ return False
1765
+
1766
+ def _storage_has_message_data(storage: Path) -> bool:
1767
+ """Return True if this storage appears to contain message/part data."""
1768
+ return _dir_has_any_children(
1769
+ storage / 'message'
1770
+ ) and _dir_has_any_children(storage / 'part')
1771
+
1772
+ def _storage_match_score(storage: Path) -> int:
1773
+ """Return how many registered codebases appear in this OpenCode storage's project list."""
1774
+ codebase_paths: List[str] = [
1775
+ str(cb.get('path'))
1776
+ for cb in (self.config.codebases or [])
1777
+ if cb.get('path')
1778
+ ]
1779
+ if not codebase_paths:
1780
+ return 0
1781
+
1782
+ project_dir = storage / 'project'
1783
+ if not project_dir.exists() or not project_dir.is_dir():
1784
+ return 0
1785
+
1786
+ # Compare resolved paths to handle symlinks/relative config.
1787
+ try:
1788
+ resolved_codebases = {
1789
+ str(Path(p).resolve()) for p in codebase_paths
1790
+ }
1791
+ except Exception as e:
1792
+ logger.debug(
1793
+ f'Failed to resolve codebase paths, using raw paths: {e}'
1794
+ )
1795
+ resolved_codebases = set(codebase_paths)
1796
+
1797
+ matched: set[str] = set()
1798
+
1799
+ for project_file in project_dir.glob('*.json'):
1800
+ if project_file.stem == 'global':
1801
+ continue
1802
+ try:
1803
+ with open(project_file, 'r', encoding='utf-8') as f:
1804
+ project = json.load(f)
1805
+ worktree = project.get('worktree')
1806
+ if not worktree:
1807
+ continue
1808
+ try:
1809
+ wt = str(Path(worktree).resolve())
1810
+ if wt in resolved_codebases:
1811
+ matched.add(wt)
1812
+ except Exception as e:
1813
+ logger.debug(
1814
+ f'Failed to resolve worktree path {worktree}: {e}'
1815
+ )
1816
+ if worktree in resolved_codebases:
1817
+ matched.add(worktree)
1818
+ except Exception as e:
1819
+ logger.debug(
1820
+ f'Error reading project file {project_file}: {e}'
1821
+ )
1822
+ continue
1823
+
1824
+ return len(matched)
1825
+
1826
+ candidates: List[Path] = []
1827
+ override_path: Optional[Path] = None
1828
+
1829
+ # 1) Explicit override (config/env)
1830
+ override = (
1831
+ self.config.opencode_storage_path
1832
+ or os.environ.get('A2A_OPENCODE_STORAGE_PATH')
1833
+ or os.environ.get('OPENCODE_STORAGE_PATH')
1834
+ )
1835
+ if override:
1836
+ override_path = Path(os.path.expanduser(override)).resolve()
1837
+ candidates.append(override_path)
1838
+
1839
+ # 2) Standard per-user location for the current service user
1840
+ xdg_data = os.environ.get(
1841
+ 'XDG_DATA_HOME', str(Path.home() / '.local' / 'share')
1842
+ )
1843
+ candidates.append(
1844
+ Path(os.path.expanduser(xdg_data)) / 'opencode' / 'storage'
1845
+ )
1846
+
1847
+ # 3) Heuristic: infer /home/<user> from codebase paths
1848
+ inferred_users: List[str] = []
1849
+ for cb in self.config.codebases:
1850
+ p = cb.get('path')
1851
+ if not p:
1852
+ continue
1853
+ parts = Path(p).parts
1854
+ if len(parts) >= 3 and parts[0] == '/' and parts[1] == 'home':
1855
+ inferred_users.append(parts[2])
1856
+
1857
+ # Also infer from the opencode binary path (often /home/<user>/.opencode/bin/opencode)
1858
+ opencode_bin = self.config.opencode_bin
1859
+ if opencode_bin:
1860
+ try:
1861
+ bin_parts = Path(opencode_bin).parts
1862
+ if (
1863
+ len(bin_parts) >= 3
1864
+ and bin_parts[0] == '/'
1865
+ and bin_parts[1] == 'home'
1866
+ ):
1867
+ inferred_users.append(bin_parts[2])
1868
+ except Exception as e:
1869
+ logger.debug(
1870
+ f'Failed to infer user from opencode binary path: {e}'
1871
+ )
1872
+
1873
+ for user in dict.fromkeys(inferred_users): # preserve order, de-dupe
1874
+ candidates.append(
1875
+ Path('/home')
1876
+ / user
1877
+ / '.local'
1878
+ / 'share'
1879
+ / 'opencode'
1880
+ / 'storage'
1881
+ )
1882
+
1883
+ inferred_candidate_paths = {
1884
+ (
1885
+ Path('/home')
1886
+ / user
1887
+ / '.local'
1888
+ / 'share'
1889
+ / 'opencode'
1890
+ / 'storage'
1891
+ ).resolve()
1892
+ for user in dict.fromkeys(inferred_users)
1893
+ }
1894
+
1895
+ # Pick the best existing candidate.
1896
+ first_existing: Optional[Path] = None
1897
+ best_match: Optional[Path] = None
1898
+ best_tuple: Optional[tuple] = None
1899
+ for c in candidates:
1900
+ try:
1901
+ if c.exists() and c.is_dir():
1902
+ if first_existing is None:
1903
+ first_existing = c
1904
+
1905
+ # Explicit override wins if it exists.
1906
+ if override_path is not None and c == override_path:
1907
+ self._opencode_storage_path = c
1908
+ logger.info(
1909
+ f'Using OpenCode storage at (override): {c}'
1910
+ )
1911
+ return c
1912
+
1913
+ # Otherwise, score by how many registered codebases this storage contains.
1914
+ score_codebases = _storage_match_score(c)
1915
+ has_message_data = 1 if _storage_has_message_data(c) else 0
1916
+ inferred_bonus = (
1917
+ 1 if c.resolve() in inferred_candidate_paths else 0
1918
+ )
1919
+
1920
+ # Prefer:
1921
+ # 1) Storage that matches registered codebases
1922
+ # 2) Storage that actually contains message/part data (for session detail UI)
1923
+ # 3) Inferred /home/<user> storage over service-account storage when tied
1924
+ score_tuple = (
1925
+ score_codebases,
1926
+ has_message_data,
1927
+ inferred_bonus,
1928
+ )
1929
+
1930
+ if best_tuple is None or score_tuple > best_tuple:
1931
+ best_tuple = score_tuple
1932
+ best_match = c
1933
+ except Exception as e:
1934
+ logger.debug(f'Error evaluating storage candidate {c}: {e}')
1935
+ continue
1936
+
1937
+ if best_match is not None and best_tuple is not None:
1938
+ if best_tuple[0] > 0:
1939
+ self._opencode_storage_path = best_match
1940
+ logger.info(
1941
+ f'Using OpenCode storage at: {best_match} (matched {best_tuple[0]} codebase(s))'
1942
+ )
1943
+ return best_match
1944
+
1945
+ # No project→codebase matches found. Prefer a storage that still looks
1946
+ # "real" (has message/part data) and/or was inferred from /home/<user>.
1947
+ if best_tuple[1] > 0 or best_tuple[2] > 0:
1948
+ self._opencode_storage_path = best_match
1949
+ logger.info(
1950
+ 'Using OpenCode storage at: %s (best available; message_data=%s, inferred_home=%s)',
1951
+ best_match,
1952
+ bool(best_tuple[1]),
1953
+ bool(best_tuple[2]),
1954
+ )
1955
+ return best_match
1956
+
1957
+ if first_existing is not None:
1958
+ # Fall back to *something* that exists, but warn because it might be empty/wrong.
1959
+ self._opencode_storage_path = first_existing
1960
+ logger.warning(
1961
+ 'OpenCode storage path exists but did not match any registered codebase projects; '
1962
+ f'falling back to: {first_existing}'
1963
+ )
1964
+ return first_existing
1965
+
1966
+ # Final fallback (even if it doesn't exist yet)
1967
+ self._opencode_storage_path = (
1968
+ candidates[0]
1969
+ if candidates
1970
+ else (Path.home() / '.local' / 'share' / 'opencode' / 'storage')
1971
+ )
1972
+ logger.warning(
1973
+ f'OpenCode storage path not found on disk; defaulting to: {self._opencode_storage_path}'
1974
+ )
1975
+ return self._opencode_storage_path
1976
+
1977
+
1978
+ # =============================================================================
1979
+ # SessionSyncService - Session management and syncing
1980
+ # =============================================================================
1981
+
1982
+
1983
+ class SessionSyncService:
1984
+ """
1985
+ Handles session management and syncing with the server.
1986
+
1987
+ Responsibilities:
1988
+ - Reading sessions from OpenCode storage
1989
+ - Reporting sessions to server
1990
+ - Message sync for remote codebases
1991
+ """
1992
+
1993
+ def __init__(
1994
+ self,
1995
+ config: WorkerConfig,
1996
+ config_manager: ConfigManager,
1997
+ client: WorkerClient,
1998
+ ):
1999
+ self.config = config
2000
+ self.config_manager = config_manager
2001
+ self.client = client
2002
+
2003
+ def _get_project_id_for_path(self, codebase_path: str) -> Optional[str]:
2004
+ """Get the OpenCode project ID (hash) for a given codebase path."""
2005
+ storage_path = self.config_manager.get_opencode_storage_path()
2006
+ project_dir = storage_path / 'project'
2007
+
2008
+ if not project_dir.exists():
2009
+ return None
2010
+
2011
+ # Read all project files to find the matching worktree
2012
+ for project_file in project_dir.glob('*.json'):
2013
+ if project_file.stem == 'global':
2014
+ continue
2015
+ try:
2016
+ with open(project_file, 'r', encoding='utf-8') as f:
2017
+ project = json.load(f)
2018
+ worktree = project.get('worktree')
2019
+ if worktree:
2020
+ try:
2021
+ if (
2022
+ Path(worktree).resolve()
2023
+ == Path(codebase_path).resolve()
2024
+ ):
2025
+ return project.get('id')
2026
+ except Exception as e:
2027
+ logger.debug(
2028
+ f'Failed to resolve paths for comparison ({worktree} vs {codebase_path}): {e}'
2029
+ )
2030
+ if worktree == codebase_path:
2031
+ return project.get('id')
2032
+ except Exception as e:
2033
+ logger.debug(f'Error reading project file {project_file}: {e}')
2034
+ continue
2035
+
2036
+ return None
2037
+
2038
+ def get_sessions_for_codebase(
2039
+ self, codebase_path: str
2040
+ ) -> List[Dict[str, Any]]:
2041
+ """Get all OpenCode sessions for a codebase."""
2042
+ project_id = self._get_project_id_for_path(codebase_path)
2043
+ if not project_id:
2044
+ logger.debug(f'No OpenCode project ID found for {codebase_path}')
2045
+ return []
2046
+
2047
+ storage_path = self.config_manager.get_opencode_storage_path()
2048
+ session_dir = storage_path / 'session' / project_id
2049
+
2050
+ if not session_dir.exists():
2051
+ return []
2052
+
2053
+ sessions: List[Dict[str, Any]] = []
2054
+ for session_file in session_dir.glob('ses_*.json'):
2055
+ try:
2056
+ with open(session_file) as f:
2057
+ session_data = json.load(f)
2058
+ # Convert timestamps from milliseconds to ISO format
2059
+ time_data = session_data.get('time', {})
2060
+ created_ms = time_data.get('created', 0)
2061
+ updated_ms = time_data.get('updated', 0)
2062
+
2063
+ session_id = session_data.get('id')
2064
+ # OpenCode stores messages separately; count message files for UI convenience.
2065
+ msg_count = 0
2066
+ if session_id:
2067
+ msg_dir = storage_path / 'message' / str(session_id)
2068
+ try:
2069
+ if msg_dir.exists():
2070
+ msg_count = len(
2071
+ list(msg_dir.glob('msg_*.json'))
2072
+ )
2073
+ except Exception:
2074
+ msg_count = 0
2075
+
2076
+ created_iso = (
2077
+ datetime.fromtimestamp(created_ms / 1000).isoformat()
2078
+ if created_ms
2079
+ else None
2080
+ )
2081
+ updated_iso = (
2082
+ datetime.fromtimestamp(updated_ms / 1000).isoformat()
2083
+ if updated_ms
2084
+ else None
2085
+ )
2086
+
2087
+ sessions.append(
2088
+ {
2089
+ 'id': session_id,
2090
+ 'title': session_data.get('title', 'Untitled'),
2091
+ 'directory': session_data.get('directory'),
2092
+ 'project_id': project_id,
2093
+ # Match the UI expectations from monitor-tailwind.html
2094
+ 'created': created_iso,
2095
+ 'updated': updated_iso,
2096
+ 'messageCount': msg_count,
2097
+ 'summary': session_data.get('summary', {}),
2098
+ 'version': session_data.get('version'),
2099
+ }
2100
+ )
2101
+ except Exception as e:
2102
+ logger.debug(f'Error reading session file {session_file}: {e}')
2103
+ continue
2104
+
2105
+ # Sort by updated time descending
2106
+ sessions.sort(key=lambda s: s.get('updated') or '', reverse=True)
2107
+ return sessions
2108
+
2109
+ def get_global_sessions(self) -> List[Dict[str, Any]]:
2110
+ """Get all global OpenCode sessions (not associated with a specific project)."""
2111
+ storage_path = self.config_manager.get_opencode_storage_path()
2112
+ session_dir = storage_path / 'session' / 'global'
2113
+
2114
+ if not session_dir.exists():
2115
+ return []
2116
+
2117
+ sessions: List[Dict[str, Any]] = []
2118
+ for session_file in session_dir.glob('ses_*.json'):
2119
+ try:
2120
+ with open(session_file) as f:
2121
+ session_data = json.load(f)
2122
+ time_data = session_data.get('time', {})
2123
+ created_ms = time_data.get('created', 0)
2124
+ updated_ms = time_data.get('updated', 0)
2125
+
2126
+ session_id = session_data.get('id')
2127
+ msg_count = 0
2128
+ if session_id:
2129
+ msg_dir = storage_path / 'message' / str(session_id)
2130
+ try:
2131
+ if msg_dir.exists():
2132
+ msg_count = len(
2133
+ list(msg_dir.glob('msg_*.json'))
2134
+ )
2135
+ except Exception:
2136
+ msg_count = 0
2137
+
2138
+ created_iso = (
2139
+ datetime.fromtimestamp(created_ms / 1000).isoformat()
2140
+ if created_ms
2141
+ else None
2142
+ )
2143
+ updated_iso = (
2144
+ datetime.fromtimestamp(updated_ms / 1000).isoformat()
2145
+ if updated_ms
2146
+ else None
2147
+ )
2148
+
2149
+ sessions.append(
2150
+ {
2151
+ 'id': session_id,
2152
+ 'title': session_data.get('title', 'Untitled'),
2153
+ 'directory': session_data.get('directory'),
2154
+ 'project_id': SpecialCodebaseId.GLOBAL,
2155
+ 'created': created_iso,
2156
+ 'updated': updated_iso,
2157
+ 'messageCount': msg_count,
2158
+ 'summary': session_data.get('summary', {}),
2159
+ 'version': session_data.get('version'),
2160
+ }
2161
+ )
2162
+ except Exception as e:
2163
+ logger.debug(
2164
+ f'Error reading global session file {session_file}: {e}'
2165
+ )
2166
+ continue
2167
+
2168
+ sessions.sort(key=lambda s: s.get('updated') or '', reverse=True)
2169
+ return sessions
2170
+
2171
+ def get_session_messages(
2172
+ self, session_id: str, max_messages: Optional[int] = None
2173
+ ) -> List[Dict[str, Any]]:
2174
+ """Get messages (including parts) for a specific session from OpenCode storage."""
2175
+ storage_path = self.config_manager.get_opencode_storage_path()
2176
+ message_dir = storage_path / 'message' / session_id
2177
+
2178
+ if not message_dir.exists():
2179
+ return []
2180
+
2181
+ msg_files = sorted(message_dir.glob('msg_*.json'))
2182
+ if (
2183
+ max_messages is not None
2184
+ and max_messages > 0
2185
+ and len(msg_files) > max_messages
2186
+ ):
2187
+ msg_files = msg_files[-max_messages:]
2188
+
2189
+ messages: List[Dict[str, Any]] = []
2190
+ for msg_file in msg_files:
2191
+ try:
2192
+ with open(msg_file) as f:
2193
+ msg_data = json.load(f)
2194
+
2195
+ msg_id = msg_data.get('id')
2196
+ role = msg_data.get('role')
2197
+ agent = msg_data.get('agent')
2198
+ model_obj = msg_data.get('model') or {}
2199
+ model = None
2200
+ if isinstance(model_obj, dict):
2201
+ provider_id = model_obj.get('providerID')
2202
+ model_id = model_obj.get('modelID')
2203
+ if provider_id and model_id:
2204
+ model = f'{provider_id}/{model_id}'
2205
+ elif isinstance(model_obj, str):
2206
+ model = model_obj
2207
+
2208
+ time_data = msg_data.get('time', {}) or {}
2209
+ created_ms = time_data.get('created', 0)
2210
+ created_iso = (
2211
+ datetime.fromtimestamp(created_ms / 1000).isoformat()
2212
+ if created_ms
2213
+ else None
2214
+ )
2215
+
2216
+ # Load message parts (text/tool/step/etc)
2217
+ parts: List[Dict[str, Any]] = []
2218
+ if msg_id:
2219
+ parts_dir = storage_path / 'part' / str(msg_id)
2220
+ if parts_dir.exists() and parts_dir.is_dir():
2221
+ for part_file in sorted(parts_dir.glob('prt_*.json')):
2222
+ try:
2223
+ with open(
2224
+ part_file, 'r', encoding='utf-8'
2225
+ ) as f:
2226
+ part_data = json.load(f)
2227
+ part_obj: Dict[str, Any] = {
2228
+ 'id': part_data.get('id'),
2229
+ 'type': part_data.get('type'),
2230
+ }
2231
+ for k in (
2232
+ 'text',
2233
+ 'tool',
2234
+ 'state',
2235
+ 'reason',
2236
+ 'callID',
2237
+ 'cost',
2238
+ 'tokens',
2239
+ ):
2240
+ if k in part_data:
2241
+ part_obj[k] = part_data.get(k)
2242
+ parts.append(part_obj)
2243
+ except Exception as e:
2244
+ logger.debug(
2245
+ f'Error reading part file {part_file}: {e}'
2246
+ )
2247
+
2248
+ messages.append(
2249
+ {
2250
+ 'id': msg_id,
2251
+ 'sessionID': msg_data.get('sessionID') or session_id,
2252
+ 'role': role,
2253
+ 'time': {'created': created_iso},
2254
+ 'agent': agent,
2255
+ 'model': model,
2256
+ # OpenCode message-level metadata (preferred for UI stats)
2257
+ 'cost': msg_data.get('cost'),
2258
+ 'tokens': msg_data.get('tokens'),
2259
+ 'tool_calls': msg_data.get('tool_calls')
2260
+ or msg_data.get('toolCalls')
2261
+ or [],
2262
+ 'parts': parts,
2263
+ }
2264
+ )
2265
+ except Exception as e:
2266
+ logger.debug(f'Error reading message file {msg_file}: {e}')
2267
+ continue
2268
+
2269
+ # Sort by created time ascending (ISO or None)
2270
+ messages.sort(key=lambda m: (m.get('time') or {}).get('created') or '')
2271
+ return messages
2272
+
2273
+ async def report_sessions_to_server(
2274
+ self,
2275
+ codebases: Dict[str, LocalCodebase],
2276
+ global_codebase_id: Optional[str],
2277
+ register_codebase_fn: Callable,
2278
+ ):
2279
+ """Report all sessions for registered codebases to the server."""
2280
+ # Iterate over a snapshot since we may update codebases if we need
2281
+ # to re-register a codebase (e.g., after a server restart).
2282
+ for codebase_id, codebase in list(codebases.items()):
2283
+ try:
2284
+ sessions = self.get_sessions_for_codebase(codebase.path)
2285
+ logger.info(
2286
+ f"Discovered {len(sessions)} OpenCode sessions for codebase '{codebase.name}' "
2287
+ f'(id={codebase_id}, path={codebase.path})'
2288
+ )
2289
+ if not sessions:
2290
+ continue
2291
+
2292
+ status = await self.client.sync_sessions(codebase_id, sessions)
2293
+
2294
+ # Self-heal common failure modes:
2295
+ # - 404: server lost codebase registry (restart / db reset)
2296
+ # - 403: worker_id mismatch for this codebase
2297
+ # In either case, re-register the codebase and retry once.
2298
+ if status in (403, 404):
2299
+ logger.info(
2300
+ "Attempting to re-register codebase '%s' after session sync %s (old_id=%s)",
2301
+ codebase.name,
2302
+ status,
2303
+ codebase_id,
2304
+ )
2305
+ new_codebase_id = await register_codebase_fn(
2306
+ name=codebase.name,
2307
+ path=codebase.path,
2308
+ description=codebase.description,
2309
+ )
2310
+
2311
+ # If a new ID was created/returned, drop the stale mapping.
2312
+ if new_codebase_id and new_codebase_id != codebase_id:
2313
+ codebases.pop(codebase_id, None)
2314
+ codebase_id = new_codebase_id
2315
+
2316
+ if new_codebase_id:
2317
+ await self.client.sync_sessions(codebase_id, sessions)
2318
+
2319
+ # Optionally sync recent session messages so the UI can show session details
2320
+ max_sessions = (
2321
+ getattr(self.config, 'session_message_sync_max_sessions', 0)
2322
+ or 0
2323
+ )
2324
+ max_messages = (
2325
+ getattr(self.config, 'session_message_sync_max_messages', 0)
2326
+ or 0
2327
+ )
2328
+ if max_sessions > 0 and max_messages > 0:
2329
+ await self._report_recent_session_messages_to_server(
2330
+ codebase_id=codebase_id,
2331
+ sessions=sessions[:max_sessions],
2332
+ max_messages=max_messages,
2333
+ )
2334
+
2335
+ except Exception as e:
2336
+ logger.debug(
2337
+ f'Failed to sync sessions for {codebase.name}: {e}'
2338
+ )
2339
+
2340
+ # Also sync global sessions (not associated with any specific project)
2341
+ await self._report_global_sessions_to_server(
2342
+ global_codebase_id, register_codebase_fn
2343
+ )
2344
+
2345
+ async def _report_global_sessions_to_server(
2346
+ self,
2347
+ global_codebase_id: Optional[str],
2348
+ register_codebase_fn: Callable,
2349
+ ):
2350
+ """Report global sessions to the server under a 'global' pseudo-codebase."""
2351
+ try:
2352
+ global_sessions = self.get_global_sessions()
2353
+ if not global_sessions:
2354
+ return
2355
+
2356
+ logger.info(
2357
+ f'Discovered {len(global_sessions)} global OpenCode sessions'
2358
+ )
2359
+
2360
+ # Ensure we have a "global" codebase registered
2361
+ if not global_codebase_id:
2362
+ return
2363
+
2364
+ status = await self.client.sync_sessions(
2365
+ global_codebase_id, global_sessions
2366
+ )
2367
+
2368
+ # Optionally sync recent session messages so the remote UI can show session detail.
2369
+ async def _sync_recent_global_messages(
2370
+ target_codebase_id: str,
2371
+ ) -> None:
2372
+ max_sessions = (
2373
+ getattr(self.config, 'session_message_sync_max_sessions', 0)
2374
+ or 0
2375
+ )
2376
+ max_messages = (
2377
+ getattr(self.config, 'session_message_sync_max_messages', 0)
2378
+ or 0
2379
+ )
2380
+ if max_sessions > 0 and max_messages > 0:
2381
+ await self._report_recent_session_messages_to_server(
2382
+ codebase_id=target_codebase_id,
2383
+ sessions=global_sessions[:max_sessions],
2384
+ max_messages=max_messages,
2385
+ )
2386
+
2387
+ if status == 200:
2388
+ await _sync_recent_global_messages(global_codebase_id)
2389
+ elif status in (403, 404):
2390
+ # Re-register and retry
2391
+ new_id = await register_codebase_fn(
2392
+ name=SpecialCodebaseId.GLOBAL,
2393
+ path=str(Path.home()),
2394
+ description='Global OpenCode sessions (not project-specific)',
2395
+ )
2396
+ if new_id:
2397
+ retry_status = await self.client.sync_sessions(
2398
+ new_id, global_sessions
2399
+ )
2400
+ if retry_status == 200:
2401
+ await _sync_recent_global_messages(new_id)
2402
+ # Return new_id to caller to update global_codebase_id
2403
+ return new_id
2404
+
2405
+ except Exception as e:
2406
+ logger.warning(f'Failed to sync global sessions: {e}')
2407
+
2408
+ return None
2409
+
2410
+ async def _report_recent_session_messages_to_server(
2411
+ self,
2412
+ codebase_id: str,
2413
+ sessions: List[Dict[str, Any]],
2414
+ max_messages: int,
2415
+ ):
2416
+ """Best-effort sync for the most recent sessions' messages."""
2417
+ try:
2418
+ for ses in sessions:
2419
+ session_id = ses.get('id')
2420
+ if not session_id:
2421
+ continue
2422
+
2423
+ messages = self.get_session_messages(
2424
+ str(session_id), max_messages=max_messages
2425
+ )
2426
+ if not messages:
2427
+ continue
2428
+
2429
+ await self.client.sync_session_messages(
2430
+ codebase_id, str(session_id), messages
2431
+ )
2432
+ except Exception as e:
2433
+ logger.debug(
2434
+ f'Failed to sync session messages for codebase {codebase_id}: {e}'
2435
+ )
2436
+
2437
+
2438
+ # =============================================================================
2439
+ # ContextCompactionService - Auto-compaction and summarization for sessions
2440
+ # =============================================================================
2441
+
2442
+
2443
+ class ContextCompactionService:
2444
+ """
2445
+ Handles automatic context compaction and summarization for task handoffs.
2446
+
2447
+ When sessions grow large or tasks are handed off between workers/agents,
2448
+ this service:
2449
+ 1. Estimates token count from session messages
2450
+ 2. Generates a summary of completed work
2451
+ 3. Creates a compacted context for the next agent
2452
+
2453
+ This prevents context overflow errors and ensures clean handoffs.
2454
+ """
2455
+
2456
+ # Rough estimate: 1 token ≈ 4 characters for English text
2457
+ CHARS_PER_TOKEN = 4
2458
+
2459
+ # Thresholds for compaction
2460
+ DEFAULT_MAX_TOKENS = 100000 # Trigger compaction above this
2461
+ DEFAULT_TARGET_TOKENS = 50000 # Target size after compaction
2462
+ SUMMARY_MAX_TOKENS = 2000 # Max tokens for summary
2463
+
2464
+ def __init__(
2465
+ self,
2466
+ session_sync: 'SessionSyncService',
2467
+ opencode_bin: str,
2468
+ max_tokens: int = DEFAULT_MAX_TOKENS,
2469
+ target_tokens: int = DEFAULT_TARGET_TOKENS,
2470
+ ):
2471
+ self.session_sync = session_sync
2472
+ self.opencode_bin = opencode_bin
2473
+ self.max_tokens = max_tokens
2474
+ self.target_tokens = target_tokens
2475
+
2476
+ def estimate_tokens(self, text: str) -> int:
2477
+ """Estimate token count from text length."""
2478
+ return len(text) // self.CHARS_PER_TOKEN
2479
+
2480
+ def estimate_session_tokens(self, messages: List[Dict[str, Any]]) -> int:
2481
+ """Estimate total tokens in session messages."""
2482
+ total_chars = 0
2483
+ for msg in messages:
2484
+ # Count message content
2485
+ parts = msg.get('parts', [])
2486
+ for part in parts:
2487
+ if isinstance(part, dict):
2488
+ content = part.get('content', '') or part.get('text', '')
2489
+ if isinstance(content, str):
2490
+ total_chars += len(content)
2491
+ elif isinstance(part, str):
2492
+ total_chars += len(part)
2493
+ return total_chars // self.CHARS_PER_TOKEN
2494
+
2495
+ def needs_compaction(self, messages: List[Dict[str, Any]]) -> bool:
2496
+ """Check if session needs compaction based on estimated tokens."""
2497
+ return self.estimate_session_tokens(messages) > self.max_tokens
2498
+
2499
+ def extract_key_context(
2500
+ self, messages: List[Dict[str, Any]]
2501
+ ) -> Dict[str, Any]:
2502
+ """
2503
+ Extract key context from messages for summarization.
2504
+
2505
+ Returns a structured summary of:
2506
+ - Files modified
2507
+ - Tools used
2508
+ - Key decisions made
2509
+ - Current state/progress
2510
+ """
2511
+ context = {
2512
+ 'files_modified': set(),
2513
+ 'files_read': set(),
2514
+ 'tools_used': set(),
2515
+ 'errors_encountered': [],
2516
+ 'key_outputs': [],
2517
+ 'message_count': len(messages),
2518
+ }
2519
+
2520
+ for msg in messages:
2521
+ parts = msg.get('parts', [])
2522
+ for part in parts:
2523
+ if not isinstance(part, dict):
2524
+ continue
2525
+
2526
+ part_type = part.get('type', '')
2527
+
2528
+ # Track tool usage
2529
+ if part_type == 'tool-invocation':
2530
+ tool_name = part.get('toolInvocation', {}).get(
2531
+ 'toolName', ''
2532
+ )
2533
+ if tool_name:
2534
+ context['tools_used'].add(tool_name)
2535
+
2536
+ # Track file operations
2537
+ args = part.get('toolInvocation', {}).get('args', {})
2538
+ if isinstance(args, dict):
2539
+ file_path = (
2540
+ args.get('filePath')
2541
+ or args.get('path')
2542
+ or args.get('file')
2543
+ )
2544
+ if file_path:
2545
+ if tool_name in ('write', 'edit', 'Write', 'Edit'):
2546
+ context['files_modified'].add(file_path)
2547
+ elif tool_name in ('read', 'Read', 'glob', 'Glob'):
2548
+ context['files_read'].add(file_path)
2549
+
2550
+ # Track errors
2551
+ if part_type == 'tool-result':
2552
+ result = part.get('toolResult', {})
2553
+ if isinstance(result, dict) and result.get('isError'):
2554
+ error_text = str(result.get('content', ''))[:200]
2555
+ context['errors_encountered'].append(error_text)
2556
+
2557
+ # Track key text outputs (assistant messages)
2558
+ if part_type == 'text' and msg.get('role') == 'assistant':
2559
+ text = part.get('text', '')
2560
+ if text and len(text) > 50:
2561
+ # Keep first 500 chars of significant outputs
2562
+ context['key_outputs'].append(text[:500])
2563
+
2564
+ # Convert sets to lists for JSON serialization
2565
+ context['files_modified'] = list(context['files_modified'])
2566
+ context['files_read'] = list(context['files_read'])
2567
+ context['tools_used'] = list(context['tools_used'])
2568
+
2569
+ # Limit key outputs to last 5
2570
+ context['key_outputs'] = context['key_outputs'][-5:]
2571
+
2572
+ return context
2573
+
2574
+ def generate_summary_prompt(
2575
+ self,
2576
+ messages: List[Dict[str, Any]],
2577
+ original_task: str,
2578
+ ) -> str:
2579
+ """
2580
+ Generate a prompt for the LLM to create a session summary.
2581
+
2582
+ This summary will be prepended to the next task for context.
2583
+ """
2584
+ context = self.extract_key_context(messages)
2585
+
2586
+ # Get the last few assistant messages for recent context
2587
+ recent_outputs = []
2588
+ for msg in reversed(messages[-10:]):
2589
+ if msg.get('role') == 'assistant':
2590
+ for part in msg.get('parts', []):
2591
+ if isinstance(part, dict) and part.get('type') == 'text':
2592
+ text = part.get('text', '')
2593
+ if text:
2594
+ recent_outputs.append(text[:1000])
2595
+ if len(recent_outputs) >= 3:
2596
+ break
2597
+
2598
+ summary_prompt = f"""Summarize the work done in this coding session for handoff to another agent.
2599
+
2600
+ ORIGINAL TASK: {original_task}
2601
+
2602
+ SESSION STATISTICS:
2603
+ - Total messages: {context['message_count']}
2604
+ - Files modified: {', '.join(context['files_modified'][:20]) or 'None'}
2605
+ - Files read: {', '.join(context['files_read'][:20]) or 'None'}
2606
+ - Tools used: {', '.join(context['tools_used']) or 'None'}
2607
+ - Errors encountered: {len(context['errors_encountered'])}
2608
+
2609
+ RECENT WORK:
2610
+ {chr(10).join(recent_outputs[:3])}
2611
+
2612
+ Please provide a concise summary (max 500 words) covering:
2613
+ 1. What was accomplished
2614
+ 2. Current state of the work
2615
+ 3. Any blockers or issues encountered
2616
+ 4. What remains to be done
2617
+ 5. Key files/code that was changed
2618
+
2619
+ Format as a handoff note for the next agent."""
2620
+
2621
+ return summary_prompt
2622
+
2623
+ async def generate_summary(
2624
+ self,
2625
+ session_id: str,
2626
+ codebase_path: str,
2627
+ original_task: str,
2628
+ ) -> Optional[str]:
2629
+ """
2630
+ Generate a summary of the session using OpenCode.
2631
+
2632
+ Returns the summary text or None if generation fails.
2633
+ """
2634
+ messages = self.session_sync.get_session_messages(
2635
+ session_id, max_messages=100
2636
+ )
2637
+ if not messages:
2638
+ return None
2639
+
2640
+ # Check if compaction is needed
2641
+ if not self.needs_compaction(messages):
2642
+ logger.debug(f'Session {session_id} does not need compaction')
2643
+ return None
2644
+
2645
+ logger.info(
2646
+ f'Generating summary for session {session_id} (estimated {self.estimate_session_tokens(messages)} tokens)'
2647
+ )
2648
+
2649
+ summary_prompt = self.generate_summary_prompt(messages, original_task)
2650
+
2651
+ # Run a quick summarization using OpenCode with a fast model
2652
+ try:
2653
+ cmd = [
2654
+ self.opencode_bin,
2655
+ 'run',
2656
+ '--agent',
2657
+ 'general', # Use general agent for summarization
2658
+ '--model',
2659
+ 'anthropic/claude-3-5-haiku-latest', # Fast, cheap model
2660
+ '--format',
2661
+ 'json',
2662
+ '--',
2663
+ summary_prompt,
2664
+ ]
2665
+
2666
+ process = await asyncio.create_subprocess_exec(
2667
+ *cmd,
2668
+ cwd=codebase_path,
2669
+ stdin=asyncio.subprocess.DEVNULL,
2670
+ stdout=asyncio.subprocess.PIPE,
2671
+ stderr=asyncio.subprocess.PIPE,
2672
+ env={**os.environ, 'NO_COLOR': '1'},
2673
+ limit=16 * 1024 * 1024,
2674
+ )
2675
+
2676
+ stdout, stderr = await asyncio.wait_for(
2677
+ process.communicate(),
2678
+ timeout=60, # 1 minute timeout for summarization
2679
+ )
2680
+
2681
+ if process.returncode == 0:
2682
+ output = stdout.decode('utf-8', errors='replace')
2683
+ # Try to extract the summary from JSON output
2684
+ for line in output.split('\n'):
2685
+ try:
2686
+ obj = json.loads(line)
2687
+ if isinstance(obj, dict):
2688
+ # Look for text content in the response
2689
+ content = (
2690
+ obj.get('content')
2691
+ or obj.get('text')
2692
+ or obj.get('output')
2693
+ )
2694
+ if content:
2695
+ return content[
2696
+ : self.SUMMARY_MAX_TOKENS
2697
+ * self.CHARS_PER_TOKEN
2698
+ ]
2699
+ except json.JSONDecodeError:
2700
+ continue
2701
+ # Fallback: return raw output truncated
2702
+ return output[: self.SUMMARY_MAX_TOKENS * self.CHARS_PER_TOKEN]
2703
+ else:
2704
+ logger.warning(
2705
+ f'Summary generation failed: {stderr.decode("utf-8", errors="replace")[:500]}'
2706
+ )
2707
+ return None
2708
+
2709
+ except asyncio.TimeoutError:
2710
+ logger.warning(
2711
+ f'Summary generation timed out for session {session_id}'
2712
+ )
2713
+ return None
2714
+ except Exception as e:
2715
+ logger.warning(f'Summary generation error: {e}')
2716
+ return None
2717
+
2718
+ def create_handoff_context(
2719
+ self,
2720
+ original_prompt: str,
2721
+ summary: Optional[str],
2722
+ session_id: Optional[str],
2723
+ ) -> str:
2724
+ """
2725
+ Create a compacted context for task handoff.
2726
+
2727
+ Prepends summary and context to the original prompt.
2728
+ """
2729
+ if not summary:
2730
+ return original_prompt
2731
+
2732
+ handoff_context = f"""## Previous Session Summary
2733
+
2734
+ {summary}
2735
+
2736
+ ## Continuation Task
2737
+
2738
+ {original_prompt}
2739
+
2740
+ ---
2741
+ Note: This task continues from a previous session. The summary above describes what was already done.
2742
+ Please review and continue the work, avoiding redundant actions on files already modified."""
2743
+
2744
+ return handoff_context
2745
+
2746
+ async def prepare_task_context(
2747
+ self,
2748
+ prompt: str,
2749
+ resume_session_id: Optional[str],
2750
+ codebase_path: str,
2751
+ auto_summarize: bool = True,
2752
+ ) -> str:
2753
+ """
2754
+ Prepare task context with auto-compaction if needed.
2755
+
2756
+ Args:
2757
+ prompt: The original task prompt
2758
+ resume_session_id: Session ID to resume (if any)
2759
+ codebase_path: Path to the codebase
2760
+ auto_summarize: Whether to auto-generate summary for large sessions
2761
+
2762
+ Returns:
2763
+ The (possibly enhanced) prompt with summary context
2764
+ """
2765
+ if not resume_session_id or not auto_summarize:
2766
+ return prompt
2767
+
2768
+ # Check if session needs compaction
2769
+ messages = self.session_sync.get_session_messages(
2770
+ resume_session_id, max_messages=100
2771
+ )
2772
+ if not messages or not self.needs_compaction(messages):
2773
+ return prompt
2774
+
2775
+ # Generate summary
2776
+ summary = await self.generate_summary(
2777
+ session_id=resume_session_id,
2778
+ codebase_path=codebase_path,
2779
+ original_task=prompt,
2780
+ )
2781
+
2782
+ # Create handoff context
2783
+ return self.create_handoff_context(prompt, summary, resume_session_id)
2784
+
2785
+
2786
+ # =============================================================================
2787
+ # TaskExecutor - Task execution logic
2788
+ # =============================================================================
2789
+
2790
+
2791
+ class TaskExecutor:
2792
+ """
2793
+ Handles task execution logic.
2794
+
2795
+ Responsibilities:
2796
+ - OpenCode subprocess management
2797
+ - Task claiming/releasing (via client)
2798
+ - Special task handlers (register_codebase, echo, noop)
2799
+ - Semaphore-based concurrency control
2800
+ - Email notifications on task completion/failure
2801
+ - Auto-compaction and summarization for task handoffs
2802
+ """
2803
+
2804
+ def __init__(
2805
+ self,
2806
+ config: WorkerConfig,
2807
+ client: WorkerClient,
2808
+ config_manager: ConfigManager,
2809
+ session_sync: SessionSyncService,
2810
+ opencode_bin: str,
2811
+ email_service: Optional[EmailNotificationService] = None,
2812
+ ):
2813
+ self.config = config
2814
+ self.client = client
2815
+ self.config_manager = config_manager
2816
+ self.session_sync = session_sync
2817
+ self.opencode_bin = opencode_bin
2818
+ self.email_service = email_service
2819
+ self.active_processes: Dict[str, asyncio.subprocess.Process] = {}
2820
+ # Task processing state
2821
+ self._task_semaphore: Optional[asyncio.Semaphore] = None
2822
+ self._active_task_ids: Set[str] = set()
2823
+ # Context compaction service for auto-summarization
2824
+ self.compaction_service = ContextCompactionService(
2825
+ session_sync=session_sync,
2826
+ opencode_bin=opencode_bin,
2827
+ max_tokens=getattr(config, 'compaction_max_tokens', 100000),
2828
+ target_tokens=getattr(config, 'compaction_target_tokens', 50000),
2829
+ )
2830
+
2831
+ def init_semaphore(self):
2832
+ """Initialize the task semaphore for bounded concurrency."""
2833
+ if self._task_semaphore is None:
2834
+ self._task_semaphore = asyncio.Semaphore(
2835
+ self.config.max_concurrent_tasks
2836
+ )
2837
+
2838
+ async def terminate_all_processes(self):
2839
+ """Terminate all active processes."""
2840
+ for task_id, process in list(self.active_processes.items()):
2841
+ logger.info(f'Terminating process for task {task_id}')
2842
+ process.terminate()
2843
+ try:
2844
+ await asyncio.wait_for(process.wait(), timeout=5)
2845
+ except asyncio.TimeoutError:
2846
+ process.kill()
2847
+
2848
+ async def process_task_with_semaphore(
2849
+ self,
2850
+ task: Dict[str, Any],
2851
+ codebases: Dict[str, LocalCodebase],
2852
+ global_codebase_id: Optional[str],
2853
+ register_codebase_fn: Callable,
2854
+ ):
2855
+ """Process a task with bounded concurrency using semaphore."""
2856
+ task_id = task.get('id') or task.get('task_id') or ''
2857
+
2858
+ if self._task_semaphore is None:
2859
+ self._task_semaphore = asyncio.Semaphore(
2860
+ self.config.max_concurrent_tasks
2861
+ )
2862
+
2863
+ if not task_id:
2864
+ logger.warning('Task has no ID, skipping')
2865
+ return
2866
+
2867
+ # -------------------------------------------------------------------------
2868
+ # Belt-and-suspenders validation: ensure this worker can handle the task
2869
+ # Server-side routing should already filter, but we validate here for
2870
+ # defense-in-depth to prevent workers from claiming tasks they can't execute.
2871
+ # -------------------------------------------------------------------------
2872
+ codebase_id = task.get('codebase_id', '')
2873
+ can_handle = (
2874
+ codebase_id in codebases
2875
+ or codebase_id == SpecialCodebaseId.PENDING
2876
+ or (
2877
+ codebase_id == SpecialCodebaseId.GLOBAL
2878
+ and global_codebase_id is not None
2879
+ )
2880
+ )
2881
+ if not can_handle:
2882
+ logger.warning(
2883
+ f'Task {task_id} has codebase_id={codebase_id!r} which this worker '
2884
+ f'cannot handle (registered: {list(codebases.keys())}). '
2885
+ f'Skipping to prevent incorrect claim.'
2886
+ )
2887
+ return
2888
+
2889
+ # Mark task as active
2890
+ self._active_task_ids.add(task_id)
2891
+
2892
+ try:
2893
+ async with self._task_semaphore:
2894
+ logger.debug(f'Acquired semaphore for task {task_id}')
2895
+ # Atomically claim the task before processing to prevent duplicate work
2896
+ claimed = await self.client.claim_task(task_id)
2897
+ if not claimed:
2898
+ logger.debug(
2899
+ f'Task {task_id} already claimed by another worker, skipping'
2900
+ )
2901
+ return
2902
+ try:
2903
+ await self.execute_task(
2904
+ task,
2905
+ codebases,
2906
+ global_codebase_id,
2907
+ register_codebase_fn,
2908
+ )
2909
+ finally:
2910
+ # Release the claim when done (success or failure)
2911
+ await self.client.release_task(task_id)
2912
+ finally:
2913
+ self._active_task_ids.discard(task_id)
2914
+
2915
+ def is_task_active(self, task_id: str) -> bool:
2916
+ """Check if a task is currently being processed."""
2917
+ return task_id in self._active_task_ids
2918
+
2919
+ async def execute_task(
2920
+ self,
2921
+ task: Dict[str, Any],
2922
+ codebases: Dict[str, LocalCodebase],
2923
+ global_codebase_id: Optional[str],
2924
+ register_codebase_fn: Callable,
2925
+ ):
2926
+ """Execute a task using OpenCode or handle special task types."""
2927
+ task_id: str = task.get('id') or task.get('task_id') or ''
2928
+ codebase_id: str = task.get('codebase_id') or ''
2929
+ agent_type: str = (
2930
+ task.get('agent_type', AgentType.BUILD) or AgentType.BUILD
2931
+ )
2932
+
2933
+ if not task_id:
2934
+ logger.error('Task has no ID, cannot execute')
2935
+ return
2936
+
2937
+ # Handle special task types
2938
+ if agent_type == AgentType.REGISTER_CODEBASE:
2939
+ await self.handle_register_codebase_task(task, register_codebase_fn)
2940
+ return
2941
+
2942
+ # Lightweight test/utility agent types that do not require OpenCode.
2943
+ # Useful for end-to-end validation of the CodeTether task queue.
2944
+ if agent_type in (AgentType.ECHO, AgentType.NOOP):
2945
+ title = task.get('title')
2946
+ logger.info(
2947
+ f'Executing lightweight task {task_id}: {title} (agent_type={agent_type})'
2948
+ )
2949
+
2950
+ await self.client.update_task_status(task_id, TaskStatus.RUNNING)
2951
+ try:
2952
+ if agent_type == AgentType.NOOP:
2953
+ result = 'ok'
2954
+ else:
2955
+ # Echo returns the prompt/description verbatim.
2956
+ result = task.get('prompt', task.get('description', ''))
2957
+
2958
+ await self.client.update_task_status(
2959
+ task_id, TaskStatus.COMPLETED, result=result
2960
+ )
2961
+ logger.info(
2962
+ f'Task {task_id} completed successfully (agent_type={agent_type})'
2963
+ )
2964
+ except Exception as e:
2965
+ logger.error(
2966
+ f'Task {task_id} execution error (agent_type={agent_type}): {e}'
2967
+ )
2968
+ await self.client.update_task_status(
2969
+ task_id, TaskStatus.FAILED, error=str(e)
2970
+ )
2971
+ return
2972
+
2973
+ # Regular task - requires existing codebase
2974
+ # Handle special 'global' codebase_id from MCP/UI clients
2975
+ effective_codebase_id = codebase_id
2976
+ if codebase_id == SpecialCodebaseId.GLOBAL:
2977
+ if not global_codebase_id:
2978
+ logger.error(
2979
+ f'Cannot process global task {task_id}: worker has no global codebase registered'
2980
+ )
2981
+ return
2982
+ effective_codebase_id = global_codebase_id
2983
+
2984
+ codebase = codebases.get(effective_codebase_id)
2985
+
2986
+ if not codebase:
2987
+ logger.error(f'Codebase {codebase_id} not found for task {task_id}')
2988
+ return
2989
+
2990
+ # -------------------------------------------------------------------------
2991
+ # Defense-in-depth: verify codebase path exists on disk before executing
2992
+ # This catches stale registrations, mount issues, or path misconfigurations
2993
+ # -------------------------------------------------------------------------
2994
+ codebase_path = Path(codebase.path)
2995
+ if not codebase_path.exists():
2996
+ error_msg = (
2997
+ f'Codebase path does not exist on disk: {codebase.path} '
2998
+ f'(codebase_id={effective_codebase_id}, task_id={task_id})'
2999
+ )
3000
+ logger.error(error_msg)
3001
+ await self.client.update_task_status(
3002
+ task_id, TaskStatus.FAILED, error=error_msg
3003
+ )
3004
+ return
3005
+
3006
+ if not codebase_path.is_dir():
3007
+ error_msg = (
3008
+ f'Codebase path is not a directory: {codebase.path} '
3009
+ f'(codebase_id={effective_codebase_id}, task_id={task_id})'
3010
+ )
3011
+ logger.error(error_msg)
3012
+ await self.client.update_task_status(
3013
+ task_id, TaskStatus.FAILED, error=error_msg
3014
+ )
3015
+ return
3016
+
3017
+ logger.info(f'Executing task {task_id}: {task.get("title")}')
3018
+
3019
+ # Claim the task
3020
+ await self.client.update_task_status(task_id, TaskStatus.RUNNING)
3021
+
3022
+ start_time = time.time()
3023
+ try:
3024
+ # Build the prompt
3025
+ prompt = task.get('prompt', task.get('description', ''))
3026
+ metadata = task.get('metadata', {})
3027
+ model = metadata.get(
3028
+ 'model'
3029
+ ) # e.g., "anthropic/claude-sonnet-4-20250514"
3030
+ resume_session_id = metadata.get(
3031
+ 'resume_session_id'
3032
+ ) # Session to resume
3033
+
3034
+ # Auto-compaction: If resuming a session with large context,
3035
+ # generate a summary and prepend it to the prompt
3036
+ auto_summarize = metadata.get('auto_summarize', True)
3037
+ if resume_session_id and auto_summarize:
3038
+ try:
3039
+ enhanced_prompt = (
3040
+ await self.compaction_service.prepare_task_context(
3041
+ prompt=prompt,
3042
+ resume_session_id=resume_session_id,
3043
+ codebase_path=codebase.path,
3044
+ auto_summarize=True,
3045
+ )
3046
+ )
3047
+ if enhanced_prompt != prompt:
3048
+ logger.info(
3049
+ f'Task {task_id}: Added session summary for handoff'
3050
+ )
3051
+ prompt = enhanced_prompt
3052
+ except Exception as e:
3053
+ logger.warning(
3054
+ f'Auto-summarization failed for task {task_id}: {e}'
3055
+ )
3056
+ # Continue with original prompt
3057
+
3058
+ # Run OpenCode
3059
+ result = await self.run_opencode(
3060
+ codebase_id=codebase_id,
3061
+ codebase_path=codebase.path,
3062
+ prompt=prompt,
3063
+ agent_type=agent_type,
3064
+ task_id=task_id,
3065
+ model=model,
3066
+ session_id=resume_session_id,
3067
+ )
3068
+
3069
+ # Calculate duration
3070
+ duration_ms = int((time.time() - start_time) * 1000)
3071
+
3072
+ if result['success']:
3073
+ await self.client.update_task_status(
3074
+ task_id,
3075
+ TaskStatus.COMPLETED,
3076
+ result=result.get('output', 'Task completed successfully'),
3077
+ )
3078
+ logger.info(f'Task {task_id} completed successfully')
3079
+
3080
+ # Send email notification
3081
+ if self.email_service:
3082
+ await self.email_service.send_task_report(
3083
+ task_id=task_id,
3084
+ title=task.get('title', 'Untitled'),
3085
+ status='completed',
3086
+ result=result.get('output'),
3087
+ duration_ms=duration_ms,
3088
+ session_id=resume_session_id,
3089
+ codebase_id=codebase_id,
3090
+ )
3091
+ else:
3092
+ error_msg = result.get('error', 'Unknown error')
3093
+ await self.client.update_task_status(
3094
+ task_id,
3095
+ TaskStatus.FAILED,
3096
+ error=error_msg,
3097
+ )
3098
+ logger.error(f'Task {task_id} failed: {error_msg}')
3099
+
3100
+ # Send email notification
3101
+ if self.email_service:
3102
+ await self.email_service.send_task_report(
3103
+ task_id=task_id,
3104
+ title=task.get('title', 'Untitled'),
3105
+ status='failed',
3106
+ error=error_msg,
3107
+ duration_ms=duration_ms,
3108
+ session_id=resume_session_id,
3109
+ codebase_id=codebase_id,
3110
+ )
3111
+
3112
+ except Exception as e:
3113
+ logger.error(f'Task {task_id} execution error: {e}')
3114
+ await self.client.update_task_status(
3115
+ task_id, TaskStatus.FAILED, error=str(e)
3116
+ )
3117
+
3118
+ # Send email notification for exception
3119
+ if self.email_service:
3120
+ await self.email_service.send_task_report(
3121
+ task_id=task_id,
3122
+ title=task.get('title', 'Untitled'),
3123
+ status='failed',
3124
+ error=str(e),
3125
+ codebase_id=codebase_id,
3126
+ )
3127
+
3128
+ async def handle_register_codebase_task(
3129
+ self,
3130
+ task: Dict[str, Any],
3131
+ register_codebase_fn: Callable,
3132
+ ):
3133
+ """
3134
+ Handle a codebase registration task from the server.
3135
+
3136
+ This validates the path exists locally and registers the codebase
3137
+ with this worker's ID.
3138
+ """
3139
+ task_id: str = task.get('id') or task.get('task_id') or ''
3140
+ metadata: Dict[str, Any] = task.get('metadata', {}) or {}
3141
+
3142
+ name = metadata.get('name', 'Unknown')
3143
+ path = metadata.get('path')
3144
+ description = metadata.get('description', '')
3145
+
3146
+ logger.info(f'Handling registration task {task_id}: {name} at {path}')
3147
+
3148
+ # Claim the task
3149
+ await self.client.update_task_status(task_id, TaskStatus.RUNNING)
3150
+
3151
+ try:
3152
+ # Validate path exists locally on this worker
3153
+ if not path:
3154
+ await self.client.update_task_status(
3155
+ task_id,
3156
+ TaskStatus.FAILED,
3157
+ error='No path provided in registration task',
3158
+ )
3159
+ return
3160
+
3161
+ if not os.path.isdir(path):
3162
+ await self.client.update_task_status(
3163
+ task_id,
3164
+ TaskStatus.FAILED,
3165
+ error=f'Path does not exist on this worker: {path}',
3166
+ )
3167
+ logger.warning(f'Registration failed - path not found: {path}')
3168
+ return
3169
+
3170
+ # Path exists! Register it with the server (with our worker_id)
3171
+ codebase_id = await register_codebase_fn(
3172
+ name=name,
3173
+ path=path,
3174
+ description=description,
3175
+ )
3176
+
3177
+ if codebase_id:
3178
+ await self.client.update_task_status(
3179
+ task_id,
3180
+ TaskStatus.COMPLETED,
3181
+ result=f'Codebase registered successfully with ID: {codebase_id}',
3182
+ )
3183
+ logger.info(
3184
+ f'Registration task {task_id} completed: {name} -> {codebase_id}'
3185
+ )
3186
+ else:
3187
+ await self.client.update_task_status(
3188
+ task_id,
3189
+ TaskStatus.FAILED,
3190
+ error='Failed to register codebase with server',
3191
+ )
3192
+
3193
+ except Exception as e:
3194
+ logger.error(f'Registration task {task_id} error: {e}')
3195
+ await self.client.update_task_status(
3196
+ task_id, TaskStatus.FAILED, error=str(e)
3197
+ )
3198
+
3199
+ async def run_opencode(
3200
+ self,
3201
+ codebase_id: str,
3202
+ codebase_path: str,
3203
+ prompt: str,
3204
+ agent_type: str = 'build',
3205
+ task_id: Optional[str] = None,
3206
+ model: Optional[str] = None,
3207
+ session_id: Optional[str] = None,
3208
+ ) -> Dict[str, Any]:
3209
+ """Run OpenCode agent on a codebase."""
3210
+
3211
+ def _extract_session_id(obj: Any) -> Optional[str]:
3212
+ """Best-effort extraction of an OpenCode session id from JSON output."""
3213
+ if isinstance(obj, dict):
3214
+ for k in ('sessionID', 'session_id', 'sessionId', 'session'):
3215
+ v = obj.get(k)
3216
+ if isinstance(v, str) and v.startswith('ses_'):
3217
+ return v
3218
+ for v in obj.values():
3219
+ found = _extract_session_id(v)
3220
+ if found:
3221
+ return found
3222
+ elif isinstance(obj, list):
3223
+ for v in obj:
3224
+ found = _extract_session_id(v)
3225
+ if found:
3226
+ return found
3227
+ return None
3228
+
3229
+ async def _sync_session_messages_once(
3230
+ *,
3231
+ target_session_id: str,
3232
+ messages: List[Dict[str, Any]],
3233
+ ) -> bool:
3234
+ """Sync messages for a single session. Returns True on HTTP 200."""
3235
+ return await self.client.sync_session_messages(
3236
+ codebase_id, target_session_id, messages
3237
+ )
3238
+
3239
+ def _messages_fingerprint(messages: List[Dict[str, Any]]) -> str:
3240
+ """Fingerprint that detects any message update for reliable sync.
3241
+
3242
+ Computes a fingerprint based on:
3243
+ - Total message count
3244
+ - Last message ID and parts count
3245
+ - Sum of all parts across all messages (detects updates to any message)
3246
+ - Last part ID from each message (detects new parts added anywhere)
3247
+
3248
+ This ensures the worker syncs to the database every time any message
3249
+ is updated, not just when new messages are added.
3250
+ """
3251
+ if not messages:
3252
+ return ''
3253
+ last = messages[-1]
3254
+ last_id = last.get('id') or ''
3255
+ last_parts = last.get('parts')
3256
+ last_parts_len = (
3257
+ len(last_parts) if isinstance(last_parts, list) else 0
3258
+ )
3259
+ # Include total message count and last created timestamp when available.
3260
+ created = (
3261
+ (last.get('time') or {})
3262
+ if isinstance(last.get('time'), dict)
3263
+ else {}
3264
+ ).get('created') or ''
3265
+
3266
+ # Sum all parts across all messages to detect any message update
3267
+ total_parts = 0
3268
+ last_part_ids = []
3269
+ for msg in messages:
3270
+ parts = msg.get('parts')
3271
+ if isinstance(parts, list):
3272
+ total_parts += len(parts)
3273
+ # Track the last part ID from each message to detect new parts
3274
+ if parts:
3275
+ last_part = parts[-1]
3276
+ if isinstance(last_part, dict):
3277
+ last_part_ids.append(last_part.get('id') or '')
3278
+
3279
+ # Include hash of last part IDs to detect updates within messages
3280
+ part_ids_hash = (
3281
+ hash(tuple(last_part_ids)) & 0xFFFFFFFF
3282
+ ) # 32-bit hash
3283
+
3284
+ return f'{len(messages)}|{last_id}|{last_parts_len}|{created}|{total_parts}|{part_ids_hash}'
3285
+
3286
+ async def _infer_active_session_id(
3287
+ *,
3288
+ known_before: set[str],
3289
+ start_epoch_s: float,
3290
+ ) -> Optional[str]:
3291
+ """Infer the active session by looking for the most recently updated session."""
3292
+ try:
3293
+ sessions = self.session_sync.get_sessions_for_codebase(
3294
+ codebase_path
3295
+ )
3296
+ if not sessions:
3297
+ return None
3298
+ top = sessions[0]
3299
+ sid = top.get('id')
3300
+ if not isinstance(sid, str) or not sid:
3301
+ return None
3302
+
3303
+ # Prefer brand-new sessions.
3304
+ if sid not in known_before:
3305
+ return sid
3306
+
3307
+ # Or sessions updated after the task started.
3308
+ updated = top.get('updated')
3309
+ if isinstance(updated, str) and updated:
3310
+ try:
3311
+ # worker writes naive isoformat; treat as local time.
3312
+ updated_dt = datetime.fromisoformat(updated)
3313
+ if updated_dt.timestamp() >= (start_epoch_s - 2.0):
3314
+ return sid
3315
+ except Exception:
3316
+ return sid # best-effort
3317
+ return sid
3318
+ except Exception:
3319
+ return None
3320
+
3321
+ def _recent_opencode_log_hint(returncode: int) -> Optional[str]:
3322
+ """Best-effort hint for failures where OpenCode logs to file.
3323
+
3324
+ Avoid dumping full logs into task output (can be huge / sensitive).
3325
+ Instead, point operators to the most recent log file and surface
3326
+ common actionable errors (like missing API keys).
3327
+ """
3328
+
3329
+ try:
3330
+ data_home = os.environ.get(
3331
+ 'XDG_DATA_HOME', str(Path.home() / '.local' / 'share')
3332
+ )
3333
+ log_dir = (
3334
+ Path(os.path.expanduser(data_home)) / 'opencode' / 'log'
3335
+ )
3336
+ if not log_dir.exists() or not log_dir.is_dir():
3337
+ return None
3338
+
3339
+ logs = list(log_dir.glob('*.log'))
3340
+ if not logs:
3341
+ return None
3342
+
3343
+ latest = max(logs, key=lambda p: p.stat().st_mtime)
3344
+ age_s = time.time() - latest.stat().st_mtime
3345
+ if age_s > 300: # don't point at stale logs
3346
+ return None
3347
+
3348
+ try:
3349
+ tail_lines = latest.read_text(
3350
+ encoding='utf-8', errors='replace'
3351
+ ).splitlines()[-80:]
3352
+ except Exception:
3353
+ tail_lines = []
3354
+
3355
+ tail_text = '\n'.join(tail_lines)
3356
+ if (
3357
+ 'API key is missing' in tail_text
3358
+ or 'AI_LoadAPIKeyError' in tail_text
3359
+ ):
3360
+ return (
3361
+ 'OpenCode is missing LLM credentials (e.g. ANTHROPIC_API_KEY). '
3362
+ 'Set the required key(s) in /etc/a2a-worker/env and restart the worker. '
3363
+ f'OpenCode log: {latest}'
3364
+ )
3365
+
3366
+ return f'OpenCode exited with code {returncode}. See OpenCode log: {latest}'
3367
+ except Exception:
3368
+ return None
3369
+
3370
+ # Check if opencode exists
3371
+ if not Path(self.opencode_bin).exists():
3372
+ return {
3373
+ 'success': False,
3374
+ 'error': f'OpenCode not found at {self.opencode_bin}',
3375
+ }
3376
+
3377
+ # Build command using 'opencode run' with proper flags
3378
+ cmd = [
3379
+ self.opencode_bin,
3380
+ 'run',
3381
+ '--agent',
3382
+ agent_type,
3383
+ '--format',
3384
+ 'json',
3385
+ ]
3386
+
3387
+ # Add model if specified (format: provider/model)
3388
+ if model:
3389
+ cmd.extend(['--model', model])
3390
+
3391
+ # Add session resumption if specified
3392
+ if session_id:
3393
+ cmd.extend(['--session', session_id])
3394
+ logger.info(f'Resuming session: {session_id}')
3395
+
3396
+ # Add '--' separator and then the prompt as positional message argument
3397
+ # This ensures the prompt isn't interpreted as a flag
3398
+ if prompt:
3399
+ cmd.append('--')
3400
+ cmd.append(prompt)
3401
+
3402
+ log_model = f' --model {model}' if model else ''
3403
+ log_session = f' --session {session_id}' if session_id else ''
3404
+ logger.info(
3405
+ f'Running: {self.opencode_bin} run --agent {agent_type}{log_model}{log_session} ...'
3406
+ )
3407
+
3408
+ try:
3409
+ start_epoch_s = time.time()
3410
+ known_sessions_before: set[str] = set()
3411
+ if not session_id:
3412
+ try:
3413
+ known_sessions_before = {
3414
+ str(s.get('id'))
3415
+ for s in self.session_sync.get_sessions_for_codebase(
3416
+ codebase_path
3417
+ )
3418
+ if s.get('id')
3419
+ }
3420
+ except Exception as e:
3421
+ logger.debug(
3422
+ f'Failed to get existing sessions before task start: {e}'
3423
+ )
3424
+ known_sessions_before = set()
3425
+
3426
+ active_session_id: Optional[str] = session_id
3427
+
3428
+ # Run the process using async subprocess to avoid blocking the event loop
3429
+ # Use a large buffer limit (16MB) to handle OpenCode's potentially very long
3430
+ # JSON output lines (e.g., file contents, large tool results). The default
3431
+ # 64KB limit causes "Separator is found, but chunk is longer than limit" errors.
3432
+ subprocess_limit = 16 * 1024 * 1024 # 16MB
3433
+ process = await asyncio.create_subprocess_exec(
3434
+ *cmd,
3435
+ cwd=codebase_path,
3436
+ stdin=asyncio.subprocess.DEVNULL,
3437
+ stdout=asyncio.subprocess.PIPE,
3438
+ stderr=asyncio.subprocess.PIPE,
3439
+ env={**os.environ, 'NO_COLOR': '1'},
3440
+ limit=subprocess_limit,
3441
+ )
3442
+
3443
+ if task_id:
3444
+ self.active_processes[task_id] = process
3445
+
3446
+ # Eagerly sync the *active* session messages while the task runs.
3447
+ eager_sync_interval = 1.0
3448
+ try:
3449
+ eager_sync_interval = float(
3450
+ os.environ.get(
3451
+ 'A2A_ACTIVE_SESSION_SYNC_INTERVAL', eager_sync_interval
3452
+ )
3453
+ )
3454
+ except Exception:
3455
+ eager_sync_interval = 1.0
3456
+
3457
+ max_messages = (
3458
+ getattr(self.config, 'session_message_sync_max_messages', 0)
3459
+ or 0
3460
+ )
3461
+ if max_messages <= 0:
3462
+ max_messages = 100
3463
+
3464
+ async def _eager_sync_loop():
3465
+ nonlocal active_session_id
3466
+ last_fp: Optional[str] = None
3467
+ session_attached = False
3468
+
3469
+ while process.returncode is None:
3470
+ # Discover session id if needed.
3471
+ if not active_session_id:
3472
+ active_session_id = await _infer_active_session_id(
3473
+ known_before=known_sessions_before,
3474
+ start_epoch_s=start_epoch_s,
3475
+ )
3476
+
3477
+ if active_session_id and task_id and not session_attached:
3478
+ # Attach the session id to the running task so UIs can deep-link.
3479
+ await self.client.update_task_status(
3480
+ task_id,
3481
+ TaskStatus.RUNNING,
3482
+ session_id=active_session_id,
3483
+ )
3484
+ session_attached = True
3485
+
3486
+ if active_session_id:
3487
+ # Sync whenever the message fingerprint changes (any message update).
3488
+ try:
3489
+ current_messages = (
3490
+ self.session_sync.get_session_messages(
3491
+ str(active_session_id),
3492
+ max_messages=max_messages,
3493
+ )
3494
+ )
3495
+ fp = _messages_fingerprint(current_messages)
3496
+ if fp and fp != last_fp:
3497
+ ok = await _sync_session_messages_once(
3498
+ target_session_id=str(active_session_id),
3499
+ messages=current_messages,
3500
+ )
3501
+ if ok:
3502
+ last_fp = fp
3503
+ logger.debug(
3504
+ f'Synced messages for session {active_session_id} (fingerprint changed)'
3505
+ )
3506
+ except Exception as e:
3507
+ logger.debug(f'Eager sync loop read failed: {e}')
3508
+
3509
+ await asyncio.sleep(max(0.2, eager_sync_interval))
3510
+
3511
+ # Final flush after process ends.
3512
+ if active_session_id:
3513
+ try:
3514
+ final_messages = self.session_sync.get_session_messages(
3515
+ str(active_session_id),
3516
+ max_messages=max_messages,
3517
+ )
3518
+ await _sync_session_messages_once(
3519
+ target_session_id=str(active_session_id),
3520
+ messages=final_messages,
3521
+ )
3522
+ except Exception as e:
3523
+ logger.debug(
3524
+ f'Final message flush failed for session {active_session_id}: {e}'
3525
+ )
3526
+
3527
+ eager_task: Optional[asyncio.Task] = None
3528
+ if task_id:
3529
+ eager_task = asyncio.create_task(_eager_sync_loop())
3530
+
3531
+ # Stream output in real-time using async iteration
3532
+ output_lines: List[str] = []
3533
+ stderr_lines: List[str] = []
3534
+
3535
+ async def _read_stdout():
3536
+ """Read stdout lines asynchronously.
3537
+
3538
+ Uses readline() with explicit error handling for very long lines.
3539
+ OpenCode can produce JSON lines >64KB when including file contents.
3540
+ """
3541
+ nonlocal active_session_id
3542
+ if process.stdout is None:
3543
+ return
3544
+
3545
+ while True:
3546
+ try:
3547
+ # Read line with the increased buffer limit
3548
+ line_bytes = await process.stdout.readline()
3549
+ if not line_bytes:
3550
+ break # EOF
3551
+
3552
+ line = line_bytes.decode('utf-8', errors='replace')
3553
+ output_lines.append(line)
3554
+
3555
+ # Try to detect session id from OpenCode JSON output.
3556
+ if not active_session_id:
3557
+ try:
3558
+ obj = json.loads(line)
3559
+ active_session_id = (
3560
+ _extract_session_id(obj)
3561
+ or active_session_id
3562
+ )
3563
+ except json.JSONDecodeError:
3564
+ pass # Not JSON, skip session extraction
3565
+
3566
+ # Stream output to server (truncate very long lines to prevent issues)
3567
+ if task_id:
3568
+ # Truncate output for streaming to prevent overwhelming the server
3569
+ stream_line = line.strip()
3570
+ if len(stream_line) > 10000:
3571
+ stream_line = (
3572
+ stream_line[:10000] + '... [truncated]'
3573
+ )
3574
+ await self.client.stream_task_output(
3575
+ task_id, stream_line
3576
+ )
3577
+
3578
+ except ValueError as e:
3579
+ # Handle "Separator is found, but chunk is longer than limit"
3580
+ # by reading raw bytes and chunking
3581
+ logger.warning(
3582
+ f'Line too long for readline, reading raw: {e}'
3583
+ )
3584
+ try:
3585
+ raw_chunk = await process.stdout.read(
3586
+ 1024 * 1024
3587
+ ) # 1MB chunk
3588
+ if raw_chunk:
3589
+ line = raw_chunk.decode(
3590
+ 'utf-8', errors='replace'
3591
+ )
3592
+ output_lines.append(line)
3593
+ else:
3594
+ break # EOF
3595
+ except Exception as chunk_err:
3596
+ logger.error(
3597
+ f'Failed to read raw chunk: {chunk_err}'
3598
+ )
3599
+ break
3600
+
3601
+ async def _read_stderr():
3602
+ """Read stderr lines asynchronously with error handling for long lines."""
3603
+ if process.stderr is None:
3604
+ return
3605
+
3606
+ while True:
3607
+ try:
3608
+ line_bytes = await process.stderr.readline()
3609
+ if not line_bytes:
3610
+ break # EOF
3611
+
3612
+ line = line_bytes.decode('utf-8', errors='replace')
3613
+ stderr_lines.append(line)
3614
+ if task_id:
3615
+ # Truncate very long stderr lines
3616
+ stream_line = line.strip()
3617
+ if len(stream_line) > 10000:
3618
+ stream_line = (
3619
+ stream_line[:10000] + '... [truncated]'
3620
+ )
3621
+ await self.client.stream_task_output(
3622
+ task_id, f'[stderr] {stream_line}'
3623
+ )
3624
+ except ValueError:
3625
+ # Handle very long lines by reading raw
3626
+ try:
3627
+ raw_chunk = await process.stderr.read(1024 * 1024)
3628
+ if raw_chunk:
3629
+ stderr_lines.append(
3630
+ raw_chunk.decode('utf-8', errors='replace')
3631
+ )
3632
+ else:
3633
+ break
3634
+ except Exception:
3635
+ break
3636
+
3637
+ try:
3638
+ # Read stdout and stderr concurrently
3639
+ await asyncio.gather(_read_stdout(), _read_stderr())
3640
+
3641
+ # Wait for process to complete
3642
+ await process.wait()
3643
+
3644
+ stdout = ''.join(output_lines)
3645
+ stderr = ''.join(stderr_lines)
3646
+
3647
+ except asyncio.CancelledError:
3648
+ process.kill()
3649
+ await process.wait()
3650
+ stdout = ''.join(output_lines)
3651
+ stderr = ''.join(stderr_lines)
3652
+ return {
3653
+ 'success': False,
3654
+ 'error': 'Task was cancelled',
3655
+ }
3656
+ finally:
3657
+ if task_id and task_id in self.active_processes:
3658
+ del self.active_processes[task_id]
3659
+ if task_id and eager_task is not None:
3660
+ try:
3661
+ eager_task.cancel()
3662
+ await eager_task
3663
+ except asyncio.CancelledError:
3664
+ pass # Expected when cancelling
3665
+ except Exception as e:
3666
+ logger.debug(
3667
+ f'Error awaiting cancelled eager sync task: {e}'
3668
+ )
3669
+
3670
+ returncode = process.returncode or 0
3671
+ if returncode == 0:
3672
+ return {'success': True, 'output': stdout}
3673
+ else:
3674
+ hint = _recent_opencode_log_hint(returncode)
3675
+ err = (stderr or '').strip()
3676
+ return {
3677
+ 'success': False,
3678
+ 'error': err or hint or f'Exit code: {returncode}',
3679
+ }
3680
+
3681
+ except Exception as e:
3682
+ return {'success': False, 'error': str(e)}
3683
+
3684
+
3685
+ # =============================================================================
3686
+ # AgentWorker - Thin orchestrator composing all services
3687
+ # =============================================================================
3688
+
3689
+
3690
+ class AgentWorker:
3691
+ """
3692
+ Agent worker that connects to A2A server and executes tasks locally.
3693
+
3694
+ Uses SSE (Server-Sent Events) for real-time task streaming instead of polling.
3695
+ This class acts as a thin orchestrator that composes the following services:
3696
+ - WorkerClient: HTTP/SSE communication
3697
+ - ConfigManager: Configuration and setup
3698
+ - SessionSyncService: Session management and syncing
3699
+ - TaskExecutor: Task execution logic
3700
+ """
3701
+
3702
+ def __init__(self, config: WorkerConfig):
3703
+ self.config = config
3704
+ self.codebases: Dict[str, LocalCodebase] = {}
3705
+ self.running = False
3706
+ self._global_codebase_id: Optional[str] = (
3707
+ None # Cached ID for global sessions codebase
3708
+ )
3709
+ # Track tasks we've seen to avoid duplicates (LRU cache with max size)
3710
+ self._known_task_ids: OrderedDict[str, None] = OrderedDict()
3711
+ self._known_task_ids_max_size: int = 10000
3712
+
3713
+ # Initialize services
3714
+ self.client = WorkerClient(config)
3715
+ self.config_manager = ConfigManager(config)
3716
+ self.opencode_bin = (
3717
+ config.opencode_bin or self.config_manager.find_opencode_binary()
3718
+ )
3719
+ self.session_sync = SessionSyncService(
3720
+ config, self.config_manager, self.client
3721
+ )
3722
+ # Initialize email service if configured
3723
+ self.email_service = EmailNotificationService(config)
3724
+ if self.email_service.is_configured():
3725
+ logger.info(
3726
+ f'Email notifications enabled: {config.notification_email}'
3727
+ )
3728
+ else:
3729
+ logger.info('Email notifications not configured')
3730
+
3731
+ self.task_executor = TaskExecutor(
3732
+ config,
3733
+ self.client,
3734
+ self.config_manager,
3735
+ self.session_sync,
3736
+ self.opencode_bin,
3737
+ self.email_service if self.email_service.is_configured() else None,
3738
+ )
3739
+
3740
+ # -------------------------------------------------------------------------
3741
+ # Delegated methods for backward compatibility
3742
+ # -------------------------------------------------------------------------
3743
+
3744
+ async def _get_session(self) -> aiohttp.ClientSession:
3745
+ """Get or create HTTP session with connection pooling."""
3746
+ return await self.client.get_session()
3747
+
3748
+ def _find_opencode_binary(self) -> str:
3749
+ """Find the opencode binary."""
3750
+ return self.config_manager.find_opencode_binary()
3751
+
3752
+ def _get_authenticated_providers(self) -> set:
3753
+ """Get set of provider IDs that have authentication configured."""
3754
+ return self.config_manager.get_authenticated_providers()
3755
+
3756
+ async def _get_available_models(self) -> List[Dict[str, Any]]:
3757
+ """Fetch available models from local OpenCode instance."""
3758
+ return await self.config_manager.get_available_models(self.opencode_bin)
3759
+
3760
+ def _get_opencode_storage_path(self) -> Path:
3761
+ """Get the OpenCode global storage path."""
3762
+ return self.config_manager.get_opencode_storage_path()
3763
+
3764
+ def _get_project_id_for_path(self, codebase_path: str) -> Optional[str]:
3765
+ """Get the OpenCode project ID (hash) for a given codebase path."""
3766
+ return self.session_sync._get_project_id_for_path(codebase_path)
3767
+
3768
+ def get_sessions_for_codebase(
3769
+ self, codebase_path: str
3770
+ ) -> List[Dict[str, Any]]:
3771
+ """Get all OpenCode sessions for a codebase."""
3772
+ return self.session_sync.get_sessions_for_codebase(codebase_path)
3773
+
3774
+ def get_global_sessions(self) -> List[Dict[str, Any]]:
3775
+ """Get all global OpenCode sessions (not associated with a specific project)."""
3776
+ return self.session_sync.get_global_sessions()
3777
+
3778
+ def get_session_messages(
3779
+ self, session_id: str, max_messages: Optional[int] = None
3780
+ ) -> List[Dict[str, Any]]:
3781
+ """Get messages (including parts) for a specific session from OpenCode storage."""
3782
+ return self.session_sync.get_session_messages(session_id, max_messages)
3783
+
3784
+ async def sync_api_keys_from_server(
3785
+ self, user_id: Optional[str] = None
3786
+ ) -> bool:
3787
+ """Sync API keys from the server to local OpenCode auth.json."""
3788
+ return await self.client.sync_api_keys_from_server(user_id)
3789
+
3790
+ async def stream_task_output(self, task_id: str, output: str):
3791
+ """Stream output chunk to the server."""
3792
+ await self.client.stream_task_output(task_id, output)
3793
+
3794
+ async def update_task_status(
3795
+ self,
3796
+ task_id: str,
3797
+ status: str,
3798
+ result: Optional[str] = None,
3799
+ error: Optional[str] = None,
3800
+ session_id: Optional[str] = None,
3801
+ max_retries: int = 4,
3802
+ base_delay: float = 1.0,
3803
+ ):
3804
+ """Update task status on the server with exponential backoff retry."""
3805
+ await self.client.update_task_status(
3806
+ task_id, status, result, error, session_id, max_retries, base_delay
3807
+ )
3808
+
3809
+ async def _claim_task(self, task_id: str) -> bool:
3810
+ """Atomically claim a task on the server."""
3811
+ return await self.client.claim_task(task_id)
3812
+
3813
+ async def _release_task(self, task_id: str) -> bool:
3814
+ """Release a task claim on the server after processing."""
3815
+ return await self.client.release_task(task_id)
3816
+
3817
+ async def send_heartbeat(self) -> bool:
3818
+ """Send heartbeat to the A2A server to indicate worker is alive."""
3819
+ return await self.client.send_heartbeat()
3820
+
3821
+ async def run_opencode(
3822
+ self,
3823
+ codebase_id: str,
3824
+ codebase_path: str,
3825
+ prompt: str,
3826
+ agent_type: str = 'build',
3827
+ task_id: Optional[str] = None,
3828
+ model: Optional[str] = None,
3829
+ session_id: Optional[str] = None,
3830
+ ) -> Dict[str, Any]:
3831
+ """Run OpenCode agent on a codebase."""
3832
+ return await self.task_executor.run_opencode(
3833
+ codebase_id,
3834
+ codebase_path,
3835
+ prompt,
3836
+ agent_type,
3837
+ task_id,
3838
+ model,
3839
+ session_id,
3840
+ )
3841
+
3842
+ async def execute_task(self, task: Dict[str, Any]):
3843
+ """Execute a task using OpenCode or handle special task types."""
3844
+ await self.task_executor.execute_task(
3845
+ task,
3846
+ self.codebases,
3847
+ self._global_codebase_id,
3848
+ self.register_codebase,
3849
+ )
3850
+
3851
+ async def handle_register_codebase_task(self, task: Dict[str, Any]):
3852
+ """Handle a codebase registration task from the server."""
3853
+ await self.task_executor.handle_register_codebase_task(
3854
+ task, self.register_codebase
3855
+ )
3856
+
3857
+ async def _process_task_with_semaphore(self, task: Dict[str, Any]):
3858
+ """Process a task with bounded concurrency using semaphore."""
3859
+ await self.task_executor.process_task_with_semaphore(
3860
+ task,
3861
+ self.codebases,
3862
+ self._global_codebase_id,
3863
+ self.register_codebase,
3864
+ )
3865
+
3866
+ async def report_sessions_to_server(self):
3867
+ """Report all sessions for registered codebases to the server."""
3868
+ result = await self.session_sync.report_sessions_to_server(
3869
+ self.codebases, self._global_codebase_id, self.register_codebase
3870
+ )
3871
+ # Update global_codebase_id if it was re-registered
3872
+ if result is not None:
3873
+ self._global_codebase_id = result
3874
+
3875
+ async def _report_global_sessions_to_server(self):
3876
+ """Report global sessions to the server under a 'global' pseudo-codebase."""
3877
+ result = await self.session_sync._report_global_sessions_to_server(
3878
+ self._global_codebase_id, self.register_codebase
3879
+ )
3880
+ if result is not None:
3881
+ self._global_codebase_id = result
3882
+
3883
+ async def _report_recent_session_messages_to_server(
3884
+ self,
3885
+ codebase_id: str,
3886
+ sessions: List[Dict[str, Any]],
3887
+ max_messages: int,
3888
+ ):
3889
+ """Best-effort sync for the most recent sessions' messages."""
3890
+ await self.session_sync._report_recent_session_messages_to_server(
3891
+ codebase_id, sessions, max_messages
3892
+ )
3893
+
3894
+ # -------------------------------------------------------------------------
3895
+ # Core orchestration methods
3896
+ # -------------------------------------------------------------------------
3897
+
3898
+ async def start(self):
3899
+ """Start the worker."""
3900
+ logger.info(
3901
+ f"Starting worker '{self.config.worker_name}' (ID: {self.config.worker_id})"
3902
+ )
3903
+ logger.info(f'Connecting to server: {self.config.server_url}')
3904
+
3905
+ # Surface OpenCode credential discovery issues early (common when running under systemd).
3906
+ try:
3907
+ data_home = os.environ.get('XDG_DATA_HOME') or os.path.expanduser(
3908
+ '~/.local/share'
3909
+ )
3910
+ auth_path = (
3911
+ Path(os.path.expanduser(data_home)) / 'opencode' / 'auth.json'
3912
+ )
3913
+ if auth_path.exists():
3914
+ logger.info(f'OpenCode auth detected at: {auth_path}')
3915
+ else:
3916
+ logger.warning(
3917
+ 'OpenCode auth.json not found for this worker. '
3918
+ f'Expected at: {auth_path}. '
3919
+ "OpenCode agents may fail with 'missing API key' unless you authenticate as this service user "
3920
+ "or import/copy auth.json into the worker's XDG data directory."
3921
+ )
3922
+ except Exception as e:
3923
+ logger.debug(f'Failed to check OpenCode auth.json presence: {e}')
3924
+
3925
+ self.running = True
3926
+
3927
+ # Initialize task semaphore for bounded concurrency
3928
+ self.task_executor.init_semaphore()
3929
+
3930
+ # Register global pseudo-codebase first so we can include its ID in worker registration
3931
+ logger.info('Registering global pseudo-codebase...')
3932
+ self._global_codebase_id = await self.register_codebase(
3933
+ name=SpecialCodebaseId.GLOBAL,
3934
+ path=str(Path.home()),
3935
+ description='Global OpenCode sessions (not project-specific)',
3936
+ )
3937
+
3938
+ # Register worker with server
3939
+ await self.register_worker()
3940
+
3941
+ # Register configured codebases
3942
+ for cb_config in self.config.codebases:
3943
+ await self.register_codebase(
3944
+ name=cb_config.get('name', Path(cb_config['path']).name),
3945
+ path=cb_config['path'],
3946
+ description=cb_config.get('description', ''),
3947
+ )
3948
+
3949
+ # Register as a discoverable agent (enables agent-to-agent communication)
3950
+ # This is done AFTER codebase registration so the worker is "ready"
3951
+ # Registration is best-effort and non-blocking
3952
+ if self.config.register_as_agent:
3953
+ logger.info('Registering as discoverable agent...')
3954
+ await self.client.register_as_agent(
3955
+ agent_name=self.config.agent_name,
3956
+ description=self.config.agent_description,
3957
+ url=self.config.agent_url,
3958
+ routing_capabilities=self.config.capabilities,
3959
+ )
3960
+
3961
+ # Sync API keys from server (allows web UI key management)
3962
+ logger.info('Syncing API keys from server...')
3963
+ await self.sync_api_keys_from_server()
3964
+
3965
+ # Immediately sync sessions on startup
3966
+ logger.info('Syncing sessions with server...')
3967
+ await self.report_sessions_to_server()
3968
+
3969
+ # Start SSE task stream with fallback to polling
3970
+ await self._run_with_sse_and_fallback()
3971
+
3972
+ async def stop(self):
3973
+ """Stop the worker gracefully."""
3974
+ logger.info('Stopping worker...')
3975
+ self.running = False
3976
+
3977
+ # Kill any active processes
3978
+ await self.task_executor.terminate_all_processes()
3979
+
3980
+ # Unregister from server (best effort)
3981
+ try:
3982
+ await self.unregister_worker()
3983
+ except Exception as e:
3984
+ logger.debug(f'Failed to unregister worker during shutdown: {e}')
3985
+
3986
+ # Close sessions properly
3987
+ await self.client.close()
3988
+ await self.email_service.close()
3989
+
3990
+ logger.info('Worker stopped')
3991
+
3992
+ async def register_worker(self):
3993
+ """Register this worker with the A2A server."""
3994
+ # Ensure global codebase is registered
3995
+ if not self._global_codebase_id:
3996
+ logger.info(
3997
+ 'Global codebase not registered, attempting registration...'
3998
+ )
3999
+ self._global_codebase_id = await self.register_codebase(
4000
+ name=SpecialCodebaseId.GLOBAL,
4001
+ path=str(Path.home()),
4002
+ description='Global OpenCode sessions (not project-specific)',
4003
+ )
4004
+
4005
+ # Get available models before registering
4006
+ models = await self._get_available_models()
4007
+ logger.info(f'Models to register: {len(models)}')
4008
+
4009
+ await self.client.register_worker(models, self._global_codebase_id)
4010
+
4011
+ async def unregister_worker(self):
4012
+ """Unregister this worker from the A2A server."""
4013
+ await self.client.unregister_worker()
4014
+
4015
+ async def register_codebase(
4016
+ self, name: str, path: str, description: str = ''
4017
+ ) -> Optional[str]:
4018
+ """Register a local codebase with the A2A server."""
4019
+ # Normalize for comparisons / de-duping when re-registering.
4020
+ normalized_path = os.path.abspath(os.path.expanduser(path))
4021
+
4022
+ codebase_id = await self.client.register_codebase(
4023
+ name, path, description
4024
+ )
4025
+
4026
+ if codebase_id:
4027
+ # If we're re-registering after a server restart, the
4028
+ # server may assign a new codebase ID for the same path.
4029
+ # Remove any stale local entries for this path.
4030
+ stale_ids = [
4031
+ cid
4032
+ for cid, cb in self.codebases.items()
4033
+ if os.path.abspath(os.path.expanduser(cb.path))
4034
+ == normalized_path
4035
+ and cid != codebase_id
4036
+ ]
4037
+ for cid in stale_ids:
4038
+ self.codebases.pop(cid, None)
4039
+
4040
+ self.codebases[codebase_id] = LocalCodebase(
4041
+ id=codebase_id,
4042
+ name=name,
4043
+ path=normalized_path,
4044
+ description=description,
4045
+ )
4046
+
4047
+ return codebase_id
4048
+
4049
+ async def get_pending_tasks(self) -> List[Dict[str, Any]]:
4050
+ """Get pending tasks from the server (fallback polling method)."""
4051
+ codebase_ids = list(self.codebases.keys())
4052
+ tasks = await self.client.get_pending_tasks(codebase_ids)
4053
+ # Filter to:
4054
+ # 1. Tasks for our registered codebases
4055
+ # 2. Registration tasks (codebase_id = '__pending__') that any worker can claim
4056
+ # 3. Global tasks (codebase_id = 'global') for workers with global codebase
4057
+ matching = [
4058
+ t
4059
+ for t in tasks
4060
+ if t.get('codebase_id') in self.codebases
4061
+ or t.get('codebase_id') == SpecialCodebaseId.PENDING
4062
+ or (
4063
+ t.get('codebase_id') == SpecialCodebaseId.GLOBAL
4064
+ and self._global_codebase_id is not None
4065
+ )
4066
+ ]
4067
+ if matching:
4068
+ logger.info(
4069
+ f'Found {len(matching)} pending tasks for our codebases'
4070
+ )
4071
+ return matching
4072
+
4073
+ async def _run_with_sse_and_fallback(self):
4074
+ """Run the main loop with SSE streaming, falling back to polling if needed."""
4075
+ session_sync_counter = 0
4076
+ session_sync_interval = 12 # Sync sessions every 12 cycles (60s at 5s)
4077
+
4078
+ while self.running:
4079
+ try:
4080
+ # Try SSE streaming first
4081
+ logger.info('Attempting SSE connection for task streaming...')
4082
+ await self._sse_task_stream()
4083
+ except asyncio.CancelledError:
4084
+ break
4085
+ except Exception as e:
4086
+ logger.warning(f'SSE connection failed: {e}')
4087
+ self.client.sse_connected = False
4088
+
4089
+ if not self.running:
4090
+ break
4091
+
4092
+ # SSE failed or disconnected - fall back to polling temporarily
4093
+ logger.info(
4094
+ f'Falling back to polling (reconnect in {self.client.sse_reconnect_delay}s)...'
4095
+ )
4096
+
4097
+ # Do one poll cycle while waiting to reconnect
4098
+ try:
4099
+ tasks = await self.get_pending_tasks()
4100
+ for task in tasks:
4101
+ if not self.running:
4102
+ break
4103
+ codebase_id = task.get('codebase_id')
4104
+ if (
4105
+ codebase_id in self.codebases
4106
+ or codebase_id == SpecialCodebaseId.PENDING
4107
+ or (
4108
+ codebase_id == SpecialCodebaseId.GLOBAL
4109
+ and self._global_codebase_id is not None
4110
+ )
4111
+ ):
4112
+ # Process task with bounded concurrency
4113
+ asyncio.create_task(
4114
+ self._process_task_with_semaphore(task)
4115
+ )
4116
+
4117
+ # Periodic maintenance
4118
+ session_sync_counter += 1
4119
+ if session_sync_counter >= session_sync_interval:
4120
+ session_sync_counter = 0
4121
+ await self.register_worker()
4122
+ for cb_config in self.config.codebases:
4123
+ await self.register_codebase(
4124
+ name=cb_config.get(
4125
+ 'name', Path(cb_config['path']).name
4126
+ ),
4127
+ path=cb_config['path'],
4128
+ description=cb_config.get('description', ''),
4129
+ )
4130
+ await self.report_sessions_to_server()
4131
+
4132
+ except Exception as e:
4133
+ logger.error(f'Error in fallback poll: {e}')
4134
+
4135
+ # Wait before trying SSE again (with exponential backoff)
4136
+ await asyncio.sleep(self.client.sse_reconnect_delay)
4137
+ self.client.sse_reconnect_delay = min(
4138
+ self.client.sse_reconnect_delay * 2,
4139
+ self.config.sse_max_reconnect_delay,
4140
+ )
4141
+
4142
+ async def _sse_task_stream(self):
4143
+ """Connect to SSE endpoint and receive task assignments in real-time."""
4144
+ session = await self._get_session()
4145
+
4146
+ # Build SSE URL with worker_id and agent_name
4147
+ # Use agent_name if set, otherwise fall back to worker_name
4148
+ # This ensures SSE routing identity matches discovery identity
4149
+ sse_url = f'{self.config.server_url}/v1/worker/tasks/stream'
4150
+ resolved_agent_name = self.config.agent_name or self.config.worker_name
4151
+ params = {
4152
+ 'worker_id': self.config.worker_id,
4153
+ 'agent_name': resolved_agent_name, # Required by SSE endpoint
4154
+ }
4155
+
4156
+ logger.info(f'Connecting to SSE stream: {sse_url}')
4157
+
4158
+ # Use a longer timeout for SSE connections
4159
+ sse_timeout = aiohttp.ClientTimeout(
4160
+ total=None, # No total timeout
4161
+ connect=30,
4162
+ sock_read=self.config.sse_heartbeat_timeout
4163
+ + 15, # Allow some slack
4164
+ )
4165
+
4166
+ # Build headers including auth token if available
4167
+ sse_headers = {'Accept': 'text/event-stream'}
4168
+ if self.config.auth_token:
4169
+ sse_headers['Authorization'] = f'Bearer {self.config.auth_token}'
4170
+
4171
+ # Add codebase IDs as header for SSE routing
4172
+ # Always include 'global' so worker accepts tasks for any codebase
4173
+ codebase_ids = list(self.codebases.keys())
4174
+ codebase_ids.append('global')
4175
+ sse_headers['X-Codebases'] = ','.join(codebase_ids)
4176
+
4177
+ # Add capabilities header
4178
+ sse_headers['X-Capabilities'] = 'opencode,build,deploy,test'
4179
+
4180
+ async with session.get(
4181
+ sse_url,
4182
+ params=params,
4183
+ timeout=sse_timeout,
4184
+ headers=sse_headers,
4185
+ ) as response:
4186
+ if response.status != 200:
4187
+ text = await response.text()
4188
+ raise Exception(
4189
+ f'SSE connection failed: {response.status} - {text}'
4190
+ )
4191
+
4192
+ self.client.sse_connected = True
4193
+ self.client.sse_reconnect_delay = (
4194
+ self.config.sse_reconnect_delay
4195
+ ) # Reset backoff
4196
+ self.client.last_heartbeat = time.time()
4197
+ logger.info('SSE connection established')
4198
+
4199
+ # Start background tasks
4200
+ heartbeat_checker = asyncio.create_task(
4201
+ self._check_heartbeat_timeout()
4202
+ )
4203
+ periodic_maintenance = asyncio.create_task(
4204
+ self._periodic_maintenance()
4205
+ )
4206
+
4207
+ try:
4208
+ event_type = None
4209
+ event_data_lines = []
4210
+
4211
+ async for line in response.content:
4212
+ if not self.running:
4213
+ break
4214
+
4215
+ line = line.decode('utf-8').rstrip('\r\n')
4216
+
4217
+ if line.startswith('event:'):
4218
+ event_type = line[6:].strip()
4219
+ elif line.startswith('data:'):
4220
+ event_data_lines.append(line[5:].strip())
4221
+ elif line == '':
4222
+ # Empty line signals end of event
4223
+ if event_data_lines:
4224
+ event_data = '\n'.join(event_data_lines)
4225
+ await self._handle_sse_event(event_type, event_data)
4226
+ event_data_lines = []
4227
+ event_type = None
4228
+ # Handle comment lines (heartbeats often sent as : comment)
4229
+ elif line.startswith(':'):
4230
+ self.client.last_heartbeat = time.time()
4231
+ logger.debug('Received SSE heartbeat (comment)')
4232
+
4233
+ finally:
4234
+ heartbeat_checker.cancel()
4235
+ periodic_maintenance.cancel()
4236
+ try:
4237
+ await heartbeat_checker
4238
+ except asyncio.CancelledError:
4239
+ pass
4240
+ try:
4241
+ await periodic_maintenance
4242
+ except asyncio.CancelledError:
4243
+ pass
4244
+
4245
+ async def _handle_sse_event(self, event_type: Optional[str], data: str):
4246
+ """Handle an SSE event from the server."""
4247
+ self.client.last_heartbeat = time.time()
4248
+
4249
+ # Handle heartbeat events
4250
+ if event_type == 'heartbeat' or event_type == 'ping':
4251
+ logger.debug('Received SSE heartbeat event')
4252
+ return
4253
+
4254
+ # Handle task events
4255
+ if event_type in (
4256
+ 'task',
4257
+ 'task_available',
4258
+ 'task_assigned',
4259
+ 'task.created',
4260
+ 'task.assigned',
4261
+ ):
4262
+ try:
4263
+ task = json.loads(data)
4264
+ task_id = task.get('id') or task.get('task_id')
4265
+
4266
+ # Skip if we've already seen this task (LRU deduplication)
4267
+ if task_id in self._known_task_ids:
4268
+ logger.debug(f'Skipping duplicate task: {task_id}')
4269
+ return
4270
+ # Add to LRU cache, evicting oldest if at capacity
4271
+ self._known_task_ids[task_id] = None
4272
+ if len(self._known_task_ids) > self._known_task_ids_max_size:
4273
+ self._known_task_ids.popitem(last=False)
4274
+
4275
+ # Skip if already processing
4276
+ if self.task_executor.is_task_active(task_id):
4277
+ logger.debug(f'Task already being processed: {task_id}')
4278
+ return
4279
+
4280
+ codebase_id = task.get('codebase_id')
4281
+ if (
4282
+ codebase_id in self.codebases
4283
+ or codebase_id == SpecialCodebaseId.PENDING
4284
+ or (
4285
+ codebase_id == SpecialCodebaseId.GLOBAL
4286
+ and self._global_codebase_id is not None
4287
+ )
4288
+ ):
4289
+ logger.info(
4290
+ f'Received task via SSE: {task_id} - {task.get("title", "Untitled")}'
4291
+ )
4292
+ # Process task with bounded concurrency (don't await)
4293
+ asyncio.create_task(self._process_task_with_semaphore(task))
4294
+ else:
4295
+ logger.debug(
4296
+ f'Task {task_id} not for our codebases, ignoring'
4297
+ )
4298
+
4299
+ except json.JSONDecodeError as e:
4300
+ logger.warning(f'Failed to parse task data: {e}')
4301
+ except Exception as e:
4302
+ logger.error(f'Error handling task event: {e}')
4303
+
4304
+ elif event_type == 'connected':
4305
+ logger.info(f'SSE connection confirmed: {data}')
4306
+
4307
+ elif event_type == 'error':
4308
+ logger.warning(f'SSE server error: {data}')
4309
+
4310
+ else:
4311
+ logger.debug(
4312
+ f'Unknown SSE event type: {event_type}, data: {data[:100]}...'
4313
+ )
4314
+
4315
+ async def _check_heartbeat_timeout(self):
4316
+ """Check if we've received a heartbeat recently."""
4317
+ while self.running and self.client.sse_connected:
4318
+ await asyncio.sleep(10)
4319
+
4320
+ if not self.client.sse_connected:
4321
+ break
4322
+
4323
+ elapsed = time.time() - self.client.last_heartbeat
4324
+ if elapsed > self.config.sse_heartbeat_timeout:
4325
+ logger.warning(
4326
+ f'No SSE heartbeat for {elapsed:.1f}s (timeout: {self.config.sse_heartbeat_timeout}s)'
4327
+ )
4328
+ # Force reconnection by breaking the SSE loop
4329
+ self.client.sse_connected = False
4330
+ break
4331
+
4332
+ async def _periodic_maintenance(self):
4333
+ """Perform periodic maintenance tasks while SSE is connected."""
4334
+ sync_interval = 60 # seconds
4335
+ heartbeat_interval = 15 # seconds
4336
+ agent_heartbeat_interval = 45 # seconds (must be < 120s TTL)
4337
+ last_sync = time.time()
4338
+ last_heartbeat = time.time()
4339
+ last_agent_heartbeat = time.time()
4340
+
4341
+ while self.running and self.client.sse_connected:
4342
+ await asyncio.sleep(5)
4343
+
4344
+ now = time.time()
4345
+
4346
+ # Send heartbeat to server periodically (worker heartbeat)
4347
+ if now - last_heartbeat >= heartbeat_interval:
4348
+ last_heartbeat = now
4349
+ await self.send_heartbeat()
4350
+
4351
+ # Refresh agent discovery heartbeat (keeps agent visible in discover_agents)
4352
+ if (
4353
+ self.config.register_as_agent
4354
+ and now - last_agent_heartbeat >= agent_heartbeat_interval
4355
+ ):
4356
+ last_agent_heartbeat = now
4357
+ await self.client.refresh_agent_heartbeat()
4358
+
4359
+ # Sync sessions and re-register periodically
4360
+ if now - last_sync >= sync_interval:
4361
+ last_sync = now
4362
+ try:
4363
+ await self.register_worker()
4364
+ for cb_config in self.config.codebases:
4365
+ await self.register_codebase(
4366
+ name=cb_config.get(
4367
+ 'name', Path(cb_config['path']).name
4368
+ ),
4369
+ path=cb_config['path'],
4370
+ description=cb_config.get('description', ''),
4371
+ )
4372
+ await self.report_sessions_to_server()
4373
+ except Exception as e:
4374
+ logger.warning(f'Periodic maintenance error: {e}')
4375
+
4376
+
4377
+ def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
4378
+ """Load configuration from file."""
4379
+ if config_path and Path(config_path).exists():
4380
+ try:
4381
+ with open(config_path) as f:
4382
+ return json.load(f)
4383
+ except Exception as e:
4384
+ logger.warning(f'Failed to load config from {config_path}: {e}')
4385
+
4386
+ # Check default locations
4387
+ default_paths = [
4388
+ Path.home() / '.config' / 'a2a-worker' / 'config.json',
4389
+ Path('/etc/a2a-worker/config.json'),
4390
+ Path('worker-config.json'),
4391
+ ]
4392
+
4393
+ for path in default_paths:
4394
+ try:
4395
+ if path.exists():
4396
+ with open(path) as f:
4397
+ return json.load(f)
4398
+ except Exception:
4399
+ # Skip if we can't read it (e.g. permission denied)
4400
+ continue
4401
+
4402
+ return {}
4403
+
4404
+
4405
+ async def main():
4406
+ parser = argparse.ArgumentParser(description='A2A Agent Worker')
4407
+ parser.add_argument('--server', '-s', default=None, help='A2A server URL')
4408
+ parser.add_argument('--name', '-n', default=None, help='Worker name')
4409
+ parser.add_argument(
4410
+ '--worker-id',
4411
+ default=None,
4412
+ help='Stable worker id (recommended for systemd/k8s). If omitted, a random id is generated.',
4413
+ )
4414
+ parser.add_argument('--config', '-c', help='Path to config file')
4415
+ parser.add_argument(
4416
+ '--codebase',
4417
+ '-b',
4418
+ action='append',
4419
+ help='Codebase to register (format: name:path or just path)',
4420
+ )
4421
+ parser.add_argument(
4422
+ '--poll-interval',
4423
+ '-i',
4424
+ type=int,
4425
+ default=None,
4426
+ help='Fallback poll interval in seconds (when SSE unavailable)',
4427
+ )
4428
+ parser.add_argument('--opencode', help='Path to opencode binary')
4429
+
4430
+ parser.add_argument(
4431
+ '--opencode-storage-path',
4432
+ default=None,
4433
+ help='Override OpenCode storage path (directory containing project/, session/, message/, part/)',
4434
+ )
4435
+ parser.add_argument(
4436
+ '--session-message-sync-max-sessions',
4437
+ type=int,
4438
+ default=None,
4439
+ help='How many most-recent sessions per codebase to sync messages for (0 disables)',
4440
+ )
4441
+ parser.add_argument(
4442
+ '--session-message-sync-max-messages',
4443
+ type=int,
4444
+ default=None,
4445
+ help='How many most-recent messages per session to sync (0 disables)',
4446
+ )
4447
+ parser.add_argument(
4448
+ '--max-concurrent-tasks',
4449
+ type=int,
4450
+ default=None,
4451
+ help='Maximum number of tasks to process concurrently (default: 2)',
4452
+ )
4453
+ parser.add_argument(
4454
+ '--sse-heartbeat-timeout',
4455
+ type=float,
4456
+ default=None,
4457
+ help='SSE heartbeat timeout in seconds (default: 45)',
4458
+ )
4459
+ # Email notification options
4460
+ parser.add_argument(
4461
+ '--email',
4462
+ '-e',
4463
+ default=None,
4464
+ help='Email address for task completion reports',
4465
+ )
4466
+ parser.add_argument(
4467
+ '--sendgrid-key',
4468
+ default=None,
4469
+ help='SendGrid API key (or set SENDGRID_API_KEY env var)',
4470
+ )
4471
+ parser.add_argument(
4472
+ '--sendgrid-from',
4473
+ default=None,
4474
+ help='SendGrid verified sender email (or set SENDGRID_FROM_EMAIL env var)',
4475
+ )
4476
+ # Email debugging and testing options
4477
+ parser.add_argument(
4478
+ '--test-email',
4479
+ action='store_true',
4480
+ help='Send a test notification email and exit (validates email config)',
4481
+ )
4482
+ parser.add_argument(
4483
+ '--email-dry-run',
4484
+ action='store_true',
4485
+ help='Log emails instead of sending them (dry run mode)',
4486
+ )
4487
+ parser.add_argument(
4488
+ '--email-verbose',
4489
+ action='store_true',
4490
+ help='Enable verbose logging for email operations',
4491
+ )
4492
+ # Agent registration options (for A2A network discovery)
4493
+ parser.add_argument(
4494
+ '--no-agent-registration',
4495
+ action='store_true',
4496
+ help='Disable automatic agent registration (worker will not be discoverable via discover_agents)',
4497
+ )
4498
+ parser.add_argument(
4499
+ '--agent-name',
4500
+ default=None,
4501
+ help='Name for agent discovery and routing (defaults to worker name). '
4502
+ 'This is the identity used for discover_agents and send_to_agent.',
4503
+ )
4504
+ parser.add_argument(
4505
+ '--agent-description',
4506
+ default=None,
4507
+ help='Description for agent discovery (what this agent does)',
4508
+ )
4509
+ parser.add_argument(
4510
+ '--agent-url',
4511
+ default=None,
4512
+ help='URL where this agent can be reached directly (optional, defaults to server URL)',
4513
+ )
4514
+
4515
+ args = parser.parse_args()
4516
+
4517
+ # Load config from file
4518
+ file_config = load_config(args.config)
4519
+
4520
+ # Honor config file values when CLI flags are not explicitly provided.
4521
+ # Note: argparse does not tell us whether a value came from a default or
4522
+ # from an explicit flag, so we detect explicit flags via sys.argv.
4523
+ server_flag_set = ('--server' in sys.argv) or ('-s' in sys.argv)
4524
+ name_flag_set = ('--name' in sys.argv) or ('-n' in sys.argv)
4525
+ worker_id_flag_set = '--worker-id' in sys.argv
4526
+ poll_flag_set = ('--poll-interval' in sys.argv) or ('-i' in sys.argv)
4527
+
4528
+ # Resolve server_url with precedence: CLI flag > env > config > default
4529
+ if server_flag_set and args.server:
4530
+ server_url = args.server
4531
+ elif os.environ.get('A2A_SERVER_URL'):
4532
+ server_url = os.environ['A2A_SERVER_URL']
4533
+ elif file_config.get('server_url'):
4534
+ server_url = file_config['server_url']
4535
+ else:
4536
+ server_url = 'https://api.codetether.run'
4537
+
4538
+ # Resolve worker_name with precedence: CLI flag > env > config > hostname
4539
+ if name_flag_set and args.name:
4540
+ worker_name = args.name
4541
+ elif os.environ.get('A2A_WORKER_NAME'):
4542
+ worker_name = os.environ['A2A_WORKER_NAME']
4543
+ elif file_config.get('worker_name'):
4544
+ worker_name = file_config['worker_name']
4545
+ else:
4546
+ import platform
4547
+
4548
+ worker_name = platform.node() # Cross-platform (works on Windows)
4549
+
4550
+ # Resolve worker_id with precedence: CLI flag > env > config > default
4551
+ worker_id: Optional[str] = None
4552
+ if worker_id_flag_set and args.worker_id:
4553
+ worker_id = args.worker_id
4554
+ elif os.environ.get('A2A_WORKER_ID'):
4555
+ worker_id = os.environ['A2A_WORKER_ID']
4556
+ elif file_config.get('worker_id'):
4557
+ worker_id = file_config['worker_id']
4558
+
4559
+ # Resolve poll_interval with precedence: CLI flag > env > config > default
4560
+ poll_interval_raw = None
4561
+ if poll_flag_set and (args.poll_interval is not None):
4562
+ poll_interval_raw = args.poll_interval
4563
+ elif os.environ.get('A2A_POLL_INTERVAL'):
4564
+ poll_interval_raw = os.environ.get('A2A_POLL_INTERVAL')
4565
+ elif file_config.get('poll_interval') is not None:
4566
+ poll_interval_raw = file_config.get('poll_interval')
4567
+ else:
4568
+ poll_interval_raw = 5
4569
+
4570
+ try:
4571
+ poll_interval = (
4572
+ int(poll_interval_raw) if poll_interval_raw is not None else 5
4573
+ )
4574
+ except (TypeError, ValueError):
4575
+ poll_interval = 5
4576
+ logger.warning('Invalid poll_interval value; falling back to 5 seconds')
4577
+
4578
+ capabilities = file_config.get('capabilities')
4579
+ if not isinstance(capabilities, list):
4580
+ capabilities = None
4581
+
4582
+ # Build codebase list
4583
+ codebases = file_config.get('codebases', [])
4584
+ if args.codebase:
4585
+ for cb in args.codebase:
4586
+ if ':' in cb:
4587
+ name, path = cb.split(':', 1)
4588
+ else:
4589
+ name = Path(cb).name
4590
+ path = cb
4591
+ codebases.append({'name': name, 'path': os.path.abspath(path)})
4592
+
4593
+ # Create config
4594
+ config_kwargs: Dict[str, Any] = {
4595
+ 'server_url': server_url,
4596
+ 'worker_name': worker_name,
4597
+ 'codebases': codebases,
4598
+ 'poll_interval': poll_interval,
4599
+ 'opencode_bin': args.opencode or file_config.get('opencode_bin'),
4600
+ 'opencode_storage_path': (
4601
+ args.opencode_storage_path
4602
+ or os.environ.get('A2A_OPENCODE_STORAGE_PATH')
4603
+ or file_config.get('opencode_storage_path')
4604
+ ),
4605
+ }
4606
+
4607
+ if worker_id:
4608
+ config_kwargs['worker_id'] = worker_id
4609
+
4610
+ # Optional session message sync tuning
4611
+ if args.session_message_sync_max_sessions is not None:
4612
+ config_kwargs['session_message_sync_max_sessions'] = (
4613
+ args.session_message_sync_max_sessions
4614
+ )
4615
+ elif os.environ.get('A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS'):
4616
+ try:
4617
+ config_kwargs['session_message_sync_max_sessions'] = int(
4618
+ os.environ['A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS']
4619
+ )
4620
+ except ValueError as e:
4621
+ logger.warning(
4622
+ f'Invalid A2A_SESSION_MESSAGE_SYNC_MAX_SESSIONS value: {e}'
4623
+ )
4624
+ elif file_config.get('session_message_sync_max_sessions') is not None:
4625
+ config_kwargs['session_message_sync_max_sessions'] = file_config.get(
4626
+ 'session_message_sync_max_sessions'
4627
+ )
4628
+
4629
+ if args.session_message_sync_max_messages is not None:
4630
+ config_kwargs['session_message_sync_max_messages'] = (
4631
+ args.session_message_sync_max_messages
4632
+ )
4633
+ elif os.environ.get('A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES'):
4634
+ try:
4635
+ config_kwargs['session_message_sync_max_messages'] = int(
4636
+ os.environ['A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES']
4637
+ )
4638
+ except ValueError as e:
4639
+ logger.warning(
4640
+ f'Invalid A2A_SESSION_MESSAGE_SYNC_MAX_MESSAGES value: {e}'
4641
+ )
4642
+ elif file_config.get('session_message_sync_max_messages') is not None:
4643
+ config_kwargs['session_message_sync_max_messages'] = file_config.get(
4644
+ 'session_message_sync_max_messages'
4645
+ )
4646
+
4647
+ # Max concurrent tasks
4648
+ if args.max_concurrent_tasks is not None:
4649
+ config_kwargs['max_concurrent_tasks'] = args.max_concurrent_tasks
4650
+ elif os.environ.get('A2A_MAX_CONCURRENT_TASKS'):
4651
+ try:
4652
+ config_kwargs['max_concurrent_tasks'] = int(
4653
+ os.environ['A2A_MAX_CONCURRENT_TASKS']
4654
+ )
4655
+ except ValueError as e:
4656
+ logger.warning(f'Invalid A2A_MAX_CONCURRENT_TASKS value: {e}')
4657
+ elif file_config.get('max_concurrent_tasks') is not None:
4658
+ config_kwargs['max_concurrent_tasks'] = file_config.get(
4659
+ 'max_concurrent_tasks'
4660
+ )
4661
+
4662
+ # SSE heartbeat timeout
4663
+ if args.sse_heartbeat_timeout is not None:
4664
+ config_kwargs['sse_heartbeat_timeout'] = args.sse_heartbeat_timeout
4665
+ elif os.environ.get('A2A_SSE_HEARTBEAT_TIMEOUT'):
4666
+ try:
4667
+ config_kwargs['sse_heartbeat_timeout'] = float(
4668
+ os.environ['A2A_SSE_HEARTBEAT_TIMEOUT']
4669
+ )
4670
+ except ValueError as e:
4671
+ logger.warning(f'Invalid A2A_SSE_HEARTBEAT_TIMEOUT value: {e}')
4672
+ elif file_config.get('sse_heartbeat_timeout') is not None:
4673
+ config_kwargs['sse_heartbeat_timeout'] = file_config.get(
4674
+ 'sse_heartbeat_timeout'
4675
+ )
4676
+
4677
+ if capabilities is not None:
4678
+ config_kwargs['capabilities'] = capabilities
4679
+
4680
+ # Auth token for SSE endpoint
4681
+ auth_token = os.environ.get('A2A_AUTH_TOKEN')
4682
+ if auth_token:
4683
+ config_kwargs['auth_token'] = auth_token
4684
+
4685
+ # SendGrid email notification config
4686
+ # Precedence: CLI flag > env var > Vault > config file
4687
+ sendgrid_key = (
4688
+ args.sendgrid_key
4689
+ or os.environ.get('SENDGRID_API_KEY')
4690
+ or file_config.get('sendgrid_api_key')
4691
+ )
4692
+ sendgrid_from = (
4693
+ args.sendgrid_from
4694
+ or os.environ.get('SENDGRID_FROM_EMAIL')
4695
+ or file_config.get('sendgrid_from_email')
4696
+ )
4697
+ notification_email = (
4698
+ args.email
4699
+ or os.environ.get('A2A_NOTIFICATION_EMAIL')
4700
+ or file_config.get('notification_email')
4701
+ )
4702
+
4703
+ # Try to fetch from Vault if not configured via CLI/env/config
4704
+ vault_path = file_config.get(
4705
+ 'vault_sendgrid_path', 'secret/spotlessbinco/sendgrid'
4706
+ )
4707
+ if not sendgrid_key or not sendgrid_from or not notification_email:
4708
+ vault = VaultClient()
4709
+ if vault.is_configured():
4710
+ logger.info(f'Fetching SendGrid config from Vault: {vault_path}')
4711
+ try:
4712
+ # Run sync in event loop
4713
+ import asyncio as _asyncio
4714
+
4715
+ async def _fetch_vault():
4716
+ try:
4717
+ secrets = await vault.get_secret(vault_path)
4718
+ return secrets
4719
+ finally:
4720
+ await vault.close()
4721
+
4722
+ loop = _asyncio.new_event_loop()
4723
+ vault_secrets = loop.run_until_complete(_fetch_vault())
4724
+ loop.close()
4725
+
4726
+ if vault_secrets:
4727
+ if not sendgrid_key:
4728
+ sendgrid_key = vault_secrets.get('SENDGRID_API_KEY')
4729
+ if not sendgrid_from:
4730
+ sendgrid_from = vault_secrets.get('SENDGRID_FROM_EMAIL')
4731
+ if not notification_email:
4732
+ notification_email = vault_secrets.get(
4733
+ 'NOTIFICATION_EMAIL'
4734
+ )
4735
+ logger.info('Loaded SendGrid config from Vault')
4736
+ except Exception as e:
4737
+ logger.warning(
4738
+ f'Failed to fetch SendGrid config from Vault: {e}'
4739
+ )
4740
+
4741
+ if sendgrid_key:
4742
+ config_kwargs['sendgrid_api_key'] = sendgrid_key
4743
+ if sendgrid_from:
4744
+ config_kwargs['sendgrid_from_email'] = sendgrid_from
4745
+ if notification_email:
4746
+ config_kwargs['notification_email'] = notification_email
4747
+
4748
+ # Email debugging flags
4749
+ if args.email_dry_run:
4750
+ config_kwargs['email_dry_run'] = True
4751
+ logger.info(
4752
+ 'Email dry-run mode enabled (emails will be logged, not sent)'
4753
+ )
4754
+
4755
+ if args.email_verbose:
4756
+ config_kwargs['email_verbose'] = True
4757
+ logger.info('Email verbose logging enabled')
4758
+
4759
+ # Add email inbound domain from config if available
4760
+ email_inbound_domain = os.environ.get(
4761
+ 'EMAIL_INBOUND_DOMAIN'
4762
+ ) or file_config.get('email_inbound_domain')
4763
+ if email_inbound_domain:
4764
+ config_kwargs['email_inbound_domain'] = email_inbound_domain
4765
+
4766
+ email_reply_prefix = os.environ.get(
4767
+ 'EMAIL_REPLY_PREFIX'
4768
+ ) or file_config.get('email_reply_prefix')
4769
+ if email_reply_prefix:
4770
+ config_kwargs['email_reply_prefix'] = email_reply_prefix
4771
+
4772
+ # Agent registration options
4773
+ # Disable agent registration if --no-agent-registration flag is set
4774
+ if args.no_agent_registration:
4775
+ config_kwargs['register_as_agent'] = False
4776
+ logger.info(
4777
+ 'Agent registration disabled (worker will not be discoverable)'
4778
+ )
4779
+ else:
4780
+ # Default to True (register as discoverable agent)
4781
+ register_as_agent = file_config.get('register_as_agent', True)
4782
+ config_kwargs['register_as_agent'] = register_as_agent
4783
+
4784
+ # Agent name (identity for discovery and routing - should match SSE agent_name)
4785
+ # This is the key identity used by discover_agents and send_to_agent
4786
+ agent_name = (
4787
+ args.agent_name
4788
+ or os.environ.get('A2A_AGENT_NAME')
4789
+ or file_config.get('agent_name')
4790
+ )
4791
+ if agent_name:
4792
+ config_kwargs['agent_name'] = agent_name
4793
+ logger.info(f"Agent name set to: '{agent_name}'")
4794
+
4795
+ # Agent description (what this agent does)
4796
+ agent_description = (
4797
+ args.agent_description
4798
+ or os.environ.get('A2A_AGENT_DESCRIPTION')
4799
+ or file_config.get('agent_description')
4800
+ )
4801
+ if agent_description:
4802
+ config_kwargs['agent_description'] = agent_description
4803
+
4804
+ # Agent URL (where this agent can be reached directly)
4805
+ agent_url = (
4806
+ args.agent_url
4807
+ or os.environ.get('A2A_AGENT_URL')
4808
+ or file_config.get('agent_url')
4809
+ )
4810
+ if agent_url:
4811
+ config_kwargs['agent_url'] = agent_url
4812
+
4813
+ config = WorkerConfig(**config_kwargs)
4814
+
4815
+ # Handle --test-email flag: send test email and exit
4816
+ if args.test_email:
4817
+ logger.info('=== Email Configuration Test ===')
4818
+ email_service = EmailNotificationService(config)
4819
+
4820
+ # Print configuration status
4821
+ config_status = email_service.get_config_status()
4822
+ logger.info(f'Configuration status:')
4823
+ logger.info(f' Configured: {config_status["configured"]}')
4824
+ logger.info(f' Dry-run mode: {config_status["dry_run"]}')
4825
+ logger.info(f' Verbose mode: {config_status["verbose"]}')
4826
+ logger.info(
4827
+ f' SendGrid API key set: {config_status["sendgrid_api_key_set"]}'
4828
+ )
4829
+ logger.info(f' From email: {config_status["sendgrid_from_email"]}')
4830
+ logger.info(f' To email: {config_status["notification_email"]}')
4831
+ logger.info(f' Inbound domain: {config_status["inbound_domain"]}')
4832
+ logger.info(f' Reply prefix: {config_status["reply_prefix"]}')
4833
+
4834
+ if config_status['issues']:
4835
+ logger.warning(f'Issues found:')
4836
+ for issue in config_status['issues']:
4837
+ logger.warning(f' - {issue}')
4838
+
4839
+ # Send test email
4840
+ result = await email_service.send_test_email()
4841
+
4842
+ if result['success']:
4843
+ logger.info(f'SUCCESS: {result["message"]}')
4844
+ else:
4845
+ logger.error(f'FAILED: {result["message"]}')
4846
+
4847
+ await email_service.close()
4848
+
4849
+ # Exit after test
4850
+ return
4851
+
4852
+ # Create and start worker
4853
+ worker = AgentWorker(config)
4854
+
4855
+ # Handle signals
4856
+ loop = asyncio.get_event_loop()
4857
+
4858
+ def signal_handler():
4859
+ logger.info('Received shutdown signal')
4860
+ worker.running = False
4861
+
4862
+ for sig in (signal.SIGTERM, signal.SIGINT):
4863
+ loop.add_signal_handler(sig, signal_handler)
4864
+
4865
+ try:
4866
+ await worker.start()
4867
+ except KeyboardInterrupt:
4868
+ pass
4869
+ finally:
4870
+ # Always ensure clean shutdown
4871
+ await worker.stop()
4872
+ # Give aiohttp time to close connections gracefully
4873
+ await asyncio.sleep(0.25)
4874
+
4875
+
4876
+ if __name__ == '__main__':
4877
+ asyncio.run(main())