agent-relay 3.0.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +8 -0
  2. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  3. package/bin/agent-relay-broker-darwin-x64 +0 -0
  4. package/bin/agent-relay-broker-linux-arm64 +0 -0
  5. package/bin/agent-relay-broker-linux-x64 +0 -0
  6. package/dist/index.cjs +273 -56
  7. package/dist/src/cli/commands/core.d.ts +2 -0
  8. package/dist/src/cli/commands/core.d.ts.map +1 -1
  9. package/dist/src/cli/commands/core.js +9 -2
  10. package/dist/src/cli/commands/core.js.map +1 -1
  11. package/dist/src/cli/lib/broker-lifecycle.d.ts.map +1 -1
  12. package/dist/src/cli/lib/broker-lifecycle.js +87 -28
  13. package/dist/src/cli/lib/broker-lifecycle.js.map +1 -1
  14. package/package.json +8 -8
  15. package/packages/acp-bridge/README.md +50 -67
  16. package/packages/acp-bridge/package.json +2 -2
  17. package/packages/config/package.json +1 -1
  18. package/packages/hooks/package.json +4 -4
  19. package/packages/memory/package.json +2 -2
  20. package/packages/policy/package.json +2 -2
  21. package/packages/sdk/README.md +169 -64
  22. package/packages/sdk/dist/__tests__/contract-fixtures.test.js +76 -9
  23. package/packages/sdk/dist/__tests__/contract-fixtures.test.js.map +1 -1
  24. package/packages/sdk/dist/__tests__/integration.test.js +5 -4
  25. package/packages/sdk/dist/__tests__/integration.test.js.map +1 -1
  26. package/packages/sdk/dist/client.d.ts +34 -3
  27. package/packages/sdk/dist/client.d.ts.map +1 -1
  28. package/packages/sdk/dist/client.js +120 -10
  29. package/packages/sdk/dist/client.js.map +1 -1
  30. package/packages/sdk/dist/protocol.d.ts +7 -1
  31. package/packages/sdk/dist/protocol.d.ts.map +1 -1
  32. package/packages/sdk/dist/relay.d.ts +47 -11
  33. package/packages/sdk/dist/relay.d.ts.map +1 -1
  34. package/packages/sdk/dist/relay.js +114 -23
  35. package/packages/sdk/dist/relay.js.map +1 -1
  36. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  37. package/packages/sdk/dist/workflows/runner.js +71 -36
  38. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  39. package/packages/sdk/dist/workflows/types.d.ts +1 -1
  40. package/packages/sdk/dist/workflows/types.d.ts.map +1 -1
  41. package/packages/sdk/package.json +2 -2
  42. package/packages/sdk/src/__tests__/contract-fixtures.test.ts +88 -9
  43. package/packages/sdk/src/__tests__/error-scenarios.test.ts +1 -1
  44. package/packages/sdk/src/__tests__/idle-nudge.test.ts +205 -257
  45. package/packages/sdk/src/__tests__/integration.test.ts +5 -4
  46. package/packages/sdk/src/__tests__/orchestration-upgrades.test.ts +277 -13
  47. package/packages/sdk/src/__tests__/swarm-coordinator.test.ts +1 -0
  48. package/packages/sdk/src/__tests__/workflow-runner.test.ts +67 -7
  49. package/packages/sdk/src/__tests__/workflow-trajectory.test.ts +4 -5
  50. package/packages/sdk/src/client.ts +171 -14
  51. package/packages/sdk/src/examples/workflows/runner-idle-refactor.yaml +306 -0
  52. package/packages/sdk/src/protocol.ts +7 -2
  53. package/packages/sdk/src/relay.ts +196 -34
  54. package/packages/sdk/src/workflows/runner.ts +73 -42
  55. package/packages/sdk/src/workflows/schema.json +1 -1
  56. package/packages/sdk/src/workflows/types.ts +1 -1
  57. package/packages/sdk/vitest.config.ts +1 -0
  58. package/packages/sdk-py/README.md +89 -102
  59. package/packages/sdk-py/agent_relay/__init__.py +16 -19
  60. package/packages/sdk-py/pyproject.toml +5 -1
  61. package/packages/sdk-py/src/agent_relay/__init__.py +35 -1
  62. package/packages/sdk-py/src/agent_relay/client.py +776 -0
  63. package/packages/sdk-py/src/agent_relay/models.py +27 -0
  64. package/packages/sdk-py/src/agent_relay/protocol.py +114 -0
  65. package/packages/sdk-py/src/agent_relay/relay.py +860 -0
  66. package/packages/sdk-py/tests/test_relay_lifecycle_hooks.py +250 -0
  67. package/packages/telemetry/package.json +1 -1
  68. package/packages/trajectory/package.json +2 -2
  69. package/packages/user-directory/package.json +2 -2
  70. package/packages/utils/package.json +2 -2
  71. package/packages/sdk/.trajectories/active/traj_1771875803391_84ca57b2.json +0 -50
  72. package/packages/sdk/.trajectories/active/traj_1771891934534_06504121.json +0 -50
  73. package/packages/sdk/.trajectories/active/traj_1771891957929_211afc4e.json +0 -50
  74. package/packages/sdk/.trajectories/active/traj_1771891982509_38c84638.json +0 -50
  75. package/packages/sdk/.trajectories/completed/traj_1771875803188_cd6d181c.json +0 -80
  76. package/packages/sdk/.trajectories/completed/traj_1771875803204_f2aeb8c8.json +0 -80
  77. package/packages/sdk/.trajectories/completed/traj_1771875803210_d65f3f1a.json +0 -80
  78. package/packages/sdk/.trajectories/completed/traj_1771875803218_e454a25d.json +0 -80
  79. package/packages/sdk/.trajectories/completed/traj_1771875803223_d7a64815.json +0 -80
  80. package/packages/sdk/.trajectories/completed/traj_1771875803227_7e56da5b.json +0 -80
  81. package/packages/sdk/.trajectories/completed/traj_1771875803235_4fbf93b4.json +0 -80
  82. package/packages/sdk/.trajectories/completed/traj_1771875803243_47931c71.json +0 -80
  83. package/packages/sdk/.trajectories/completed/traj_1771875803258_3816f3fe.json +0 -80
  84. package/packages/sdk/.trajectories/completed/traj_1771875803268_8061140e.json +0 -80
  85. package/packages/sdk/.trajectories/completed/traj_1771875803326_ae6f9c78.json +0 -80
  86. package/packages/sdk/.trajectories/completed/traj_1771875808396_cbde0a6c.json +0 -91
  87. package/packages/sdk/.trajectories/completed/traj_1771875812026_aa2442bb.json +0 -91
  88. package/packages/sdk/.trajectories/completed/traj_1771875815431_c2c656c5.json +0 -91
  89. package/packages/sdk/.trajectories/completed/traj_1771875818645_3a4dbf02.json +0 -91
  90. package/packages/sdk/.trajectories/completed/traj_1771891934403_24923c03.json +0 -80
  91. package/packages/sdk/.trajectories/completed/traj_1771891934421_dca16e24.json +0 -80
  92. package/packages/sdk/.trajectories/completed/traj_1771891934430_057706f7.json +0 -80
  93. package/packages/sdk/.trajectories/completed/traj_1771891934442_faf97382.json +0 -80
  94. package/packages/sdk/.trajectories/completed/traj_1771891934454_5542ecd5.json +0 -80
  95. package/packages/sdk/.trajectories/completed/traj_1771891934464_12202a08.json +0 -80
  96. package/packages/sdk/.trajectories/completed/traj_1771891934487_94378275.json +0 -80
  97. package/packages/sdk/.trajectories/completed/traj_1771891934503_ca728c13.json +0 -80
  98. package/packages/sdk/.trajectories/completed/traj_1771891934519_100af69a.json +0 -80
  99. package/packages/sdk/.trajectories/completed/traj_1771891934536_62ad39d9.json +0 -80
  100. package/packages/sdk/.trajectories/completed/traj_1771891934553_d6798a52.json +0 -80
  101. package/packages/sdk/.trajectories/completed/traj_1771891939537_541c8096.json +0 -91
  102. package/packages/sdk/.trajectories/completed/traj_1771891942985_36ab9a4d.json +0 -91
  103. package/packages/sdk/.trajectories/completed/traj_1771891946453_e8a6e05f.json +0 -91
  104. package/packages/sdk/.trajectories/completed/traj_1771891949838_5de0de84.json +0 -91
  105. package/packages/sdk/.trajectories/completed/traj_1771891957807_0ecfb4f4.json +0 -80
  106. package/packages/sdk/.trajectories/completed/traj_1771891957827_c4539239.json +0 -80
  107. package/packages/sdk/.trajectories/completed/traj_1771891957836_91168b48.json +0 -80
  108. package/packages/sdk/.trajectories/completed/traj_1771891957848_8c5cad0b.json +0 -80
  109. package/packages/sdk/.trajectories/completed/traj_1771891957857_0986b293.json +0 -80
  110. package/packages/sdk/.trajectories/completed/traj_1771891957872_8a3113af.json +0 -80
  111. package/packages/sdk/.trajectories/completed/traj_1771891957884_0bb85208.json +0 -80
  112. package/packages/sdk/.trajectories/completed/traj_1771891957892_86c75e2e.json +0 -80
  113. package/packages/sdk/.trajectories/completed/traj_1771891957907_98ca0e6f.json +0 -80
  114. package/packages/sdk/.trajectories/completed/traj_1771891957918_d9091231.json +0 -80
  115. package/packages/sdk/.trajectories/completed/traj_1771891957931_dcaf77ed.json +0 -80
  116. package/packages/sdk/.trajectories/completed/traj_1771891962931_eb1fdee2.json +0 -91
  117. package/packages/sdk/.trajectories/completed/traj_1771891966262_9061a93f.json +0 -91
  118. package/packages/sdk/.trajectories/completed/traj_1771891969915_1adaba19.json +0 -91
  119. package/packages/sdk/.trajectories/completed/traj_1771891973588_f08b79e9.json +0 -91
  120. package/packages/sdk/.trajectories/completed/traj_1771891982421_f1985bce.json +0 -80
  121. package/packages/sdk/.trajectories/completed/traj_1771891982432_e7a84163.json +0 -80
  122. package/packages/sdk/.trajectories/completed/traj_1771891982447_369b842a.json +0 -80
  123. package/packages/sdk/.trajectories/completed/traj_1771891982469_5fc45199.json +0 -80
  124. package/packages/sdk/.trajectories/completed/traj_1771891982495_454c7cb3.json +0 -80
  125. package/packages/sdk/.trajectories/completed/traj_1771891982514_08098e03.json +0 -80
  126. package/packages/sdk/.trajectories/completed/traj_1771891982526_b351d778.json +0 -80
  127. package/packages/sdk/.trajectories/completed/traj_1771891982533_fa542d83.json +0 -80
  128. package/packages/sdk/.trajectories/completed/traj_1771891982540_18ab24dc.json +0 -80
  129. package/packages/sdk/.trajectories/completed/traj_1771891982544_5b4fa163.json +0 -80
  130. package/packages/sdk/.trajectories/completed/traj_1771891982548_c13f089a.json +0 -80
  131. package/packages/sdk/.trajectories/completed/traj_1771891987510_23f6da1f.json +0 -91
  132. package/packages/sdk/.trajectories/completed/traj_1771891991466_912c2e04.json +0 -91
  133. package/packages/sdk/.trajectories/completed/traj_1771891994891_60604be2.json +0 -91
  134. package/packages/sdk/.trajectories/completed/traj_1771891998370_cfaf9b8b.json +0 -91
@@ -0,0 +1,776 @@
1
+ """Low-level async client for the Agent Relay broker subprocess.
2
+
3
+ Manages the broker process lifecycle, line-delimited JSON protocol,
4
+ request/response correlation, and event dispatch.
5
+
6
+ Mirrors packages/sdk/src/client.ts.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import json
13
+ import os
14
+ import platform
15
+ import shutil
16
+ import stat
17
+ import subprocess
18
+ import sys
19
+ import urllib.request
20
+ from pathlib import Path
21
+ from typing import Any, Callable, Literal, Optional
22
+
23
+ from .protocol import (
24
+ PROTOCOL_VERSION,
25
+ AgentSpec,
26
+ BrokerEvent,
27
+ HeadlessProvider,
28
+ ProtocolEnvelope,
29
+ )
30
+
31
+ # ── Errors ────────────────────────────────────────────────────────────────────
32
+
33
+
34
+ class AgentRelayProtocolError(Exception):
35
+ """Raised when the broker returns a protocol-level error."""
36
+
37
+ def __init__(
38
+ self, code: str, message: str, retryable: bool = False, data: Any = None
39
+ ):
40
+ super().__init__(message)
41
+ self.code = code
42
+ self.retryable = retryable
43
+ self.data = data
44
+
45
+
46
+ class AgentRelayProcessError(Exception):
47
+ """Raised for broker process lifecycle errors."""
48
+
49
+
50
+ AgentTransport = Literal["pty", "headless"]
51
+
52
+
53
+ # ── CLI / model helpers ───────────────────────────────────────────────────────
54
+
55
+ _CLI_MODEL_FLAG_CLIS = {"claude", "codex", "gemini", "goose", "aider"}
56
+
57
+ _CLI_DEFAULT_ARGS: dict[str, list[str]] = {
58
+ "codex": ["-c", "check_for_update_on_startup=false"],
59
+ }
60
+
61
+
62
+ def _has_model_arg(args: list[str]) -> bool:
63
+ for arg in args:
64
+ if arg == "--model" or arg.startswith("--model="):
65
+ return True
66
+ return False
67
+
68
+
69
+ def _build_pty_args_with_model(
70
+ cli: str, args: list[str], model: Optional[str] = None
71
+ ) -> list[str]:
72
+ cli_name = cli.split(":")[0].strip().lower()
73
+ default_args = _CLI_DEFAULT_ARGS.get(cli_name, [])
74
+ base_args = [*default_args, *args]
75
+ if not model:
76
+ return base_args
77
+ if cli_name not in _CLI_MODEL_FLAG_CLIS:
78
+ return base_args
79
+ if _has_model_arg(base_args):
80
+ return base_args
81
+ return ["--model", model, *base_args]
82
+
83
+
84
+ def _expand_tilde(p: str) -> str:
85
+ if p == "~" or p.startswith("~/") or p.startswith("~\\"):
86
+ return str(Path.home() / p[2:])
87
+ return p
88
+
89
+
90
+ def _is_explicit_path(binary_path: str) -> bool:
91
+ return (
92
+ "/" in binary_path
93
+ or "\\" in binary_path
94
+ or binary_path.startswith(".")
95
+ or binary_path.startswith("~")
96
+ )
97
+
98
+
99
+ def _detect_platform() -> str:
100
+ """Detect platform string matching GitHub release binary names."""
101
+ system = platform.system().lower()
102
+ machine = platform.machine().lower()
103
+
104
+ if system == "darwin":
105
+ os_name = "darwin"
106
+ elif system == "linux":
107
+ os_name = "linux"
108
+ else:
109
+ raise AgentRelayProcessError(f"Unsupported OS: {system}")
110
+
111
+ if machine in ("x86_64", "amd64"):
112
+ arch = "x64"
113
+ elif machine in ("arm64", "aarch64"):
114
+ arch = "arm64"
115
+ else:
116
+ raise AgentRelayProcessError(f"Unsupported architecture: {machine}")
117
+
118
+ return f"{os_name}-{arch}"
119
+
120
+
121
+ def _get_latest_version() -> str:
122
+ """Fetch the latest release version tag from GitHub."""
123
+ url = "https://api.github.com/repos/AgentWorkforce/relay/releases/latest"
124
+ headers = {"Accept": "application/vnd.github.v3+json"}
125
+ token = os.environ.get("GITHUB_TOKEN")
126
+ if token:
127
+ headers["Authorization"] = f"token {token}"
128
+ req = urllib.request.Request(url, headers=headers)
129
+ with urllib.request.urlopen(req, timeout=15) as resp:
130
+ data = json.loads(resp.read().decode())
131
+ tag = data.get("tag_name", "")
132
+ return tag.lstrip("v")
133
+
134
+
135
+ def _install_broker_binary() -> str:
136
+ """Download the broker binary from GitHub releases. Returns the installed path."""
137
+ install_dir = Path.home() / ".agent-relay"
138
+ bin_dir = install_dir / "bin"
139
+ target_path = bin_dir / "agent-relay-broker"
140
+
141
+ plat = _detect_platform()
142
+ print(f"[agent-relay] Broker binary not found, installing for {plat}...")
143
+
144
+ version = _get_latest_version()
145
+ if not version:
146
+ raise AgentRelayProcessError(
147
+ "Failed to fetch latest agent-relay version from GitHub"
148
+ )
149
+
150
+ binary_name = f"agent-relay-broker-{plat}"
151
+ download_url = f"https://github.com/AgentWorkforce/relay/releases/download/v{version}/{binary_name}"
152
+
153
+ bin_dir.mkdir(parents=True, exist_ok=True)
154
+
155
+ print(f"[agent-relay] Downloading v{version} from {download_url}")
156
+ try:
157
+ urllib.request.urlretrieve(download_url, str(target_path))
158
+ except Exception as e:
159
+ target_path.unlink(missing_ok=True)
160
+ raise AgentRelayProcessError(
161
+ f"Failed to download broker binary: {e}\n"
162
+ f"You can install manually: curl -fsSL https://raw.githubusercontent.com/AgentWorkforce/relay/main/install.sh | bash"
163
+ ) from e
164
+
165
+ # Make executable
166
+ target_path.chmod(
167
+ target_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
168
+ )
169
+
170
+ # macOS: re-sign to avoid Gatekeeper issues
171
+ if platform.system() == "Darwin":
172
+ try:
173
+ subprocess.run(
174
+ ["codesign", "--force", "--sign", "-", str(target_path)],
175
+ capture_output=True,
176
+ timeout=10,
177
+ )
178
+ except Exception:
179
+ pass # Non-fatal — binary may still work
180
+
181
+ # Verify
182
+ try:
183
+ result = subprocess.run(
184
+ [str(target_path), "--help"],
185
+ capture_output=True,
186
+ timeout=10,
187
+ )
188
+ if result.returncode != 0:
189
+ target_path.unlink(missing_ok=True)
190
+ raise AgentRelayProcessError("Downloaded broker binary failed verification")
191
+ except subprocess.TimeoutExpired:
192
+ target_path.unlink(missing_ok=True)
193
+ raise AgentRelayProcessError(
194
+ "Downloaded broker binary timed out during verification"
195
+ )
196
+
197
+ print(f"[agent-relay] Broker installed to {target_path}")
198
+ return str(target_path)
199
+
200
+
201
+ def _resolve_default_binary_path() -> str:
202
+ broker_exe = "agent-relay-broker"
203
+
204
+ # 1. Check ~/.agent-relay/bin/
205
+ home = Path.home()
206
+ standalone = home / ".agent-relay" / "bin" / broker_exe
207
+ if standalone.exists():
208
+ return str(standalone)
209
+
210
+ # 2. Fall back to PATH
211
+ found = shutil.which(broker_exe)
212
+ if found:
213
+ return found
214
+
215
+ # 3. Auto-install from GitHub releases
216
+ return _install_broker_binary()
217
+
218
+
219
+ # ── Pending request tracking ─────────────────────────────────────────────────
220
+
221
+
222
+ class _PendingRequest:
223
+ __slots__ = ("expected_type", "future", "timeout_handle")
224
+
225
+ def __init__(
226
+ self,
227
+ expected_type: str,
228
+ future: asyncio.Future[ProtocolEnvelope],
229
+ timeout_handle: asyncio.TimerHandle,
230
+ ):
231
+ self.expected_type = expected_type
232
+ self.future = future
233
+ self.timeout_handle = timeout_handle
234
+
235
+
236
+ # ── Client ────────────────────────────────────────────────────────────────────
237
+
238
+
239
+ class AgentRelayClient:
240
+ """Manages a broker subprocess and communicates over line-delimited JSON."""
241
+
242
+ def __init__(
243
+ self,
244
+ *,
245
+ binary_path: Optional[str] = None,
246
+ binary_args: Optional[list[str]] = None,
247
+ broker_name: Optional[str] = None,
248
+ channels: Optional[list[str]] = None,
249
+ cwd: Optional[str] = None,
250
+ env: Optional[dict[str, str]] = None,
251
+ request_timeout_ms: int = 10_000,
252
+ shutdown_timeout_ms: int = 3_000,
253
+ client_name: str = "agent-relay-sdk-py",
254
+ client_version: str = "0.3.0",
255
+ ):
256
+ self._binary_path = binary_path or _resolve_default_binary_path()
257
+ self._binary_args = binary_args or []
258
+ self._broker_name = (
259
+ broker_name or os.path.basename(cwd or os.getcwd()) or "project"
260
+ )
261
+ self._channels = channels or ["general"]
262
+ self._cwd = cwd or os.getcwd()
263
+ self._env = env
264
+ self._request_timeout_ms = request_timeout_ms
265
+ self._shutdown_timeout_ms = shutdown_timeout_ms
266
+ self._client_name = client_name
267
+ self._client_version = client_version
268
+
269
+ self._process: Optional[asyncio.subprocess.Process] = None
270
+ self._request_seq = 0
271
+ self._pending: dict[str, _PendingRequest] = {}
272
+ self._event_listeners: list[Callable[[BrokerEvent], None]] = []
273
+ self._stderr_listeners: list[Callable[[str], None]] = []
274
+ self._event_buffer: list[BrokerEvent] = []
275
+ self._max_buffer_size = 1000
276
+ self._last_stderr_line: Optional[str] = None
277
+ self._starting_lock = asyncio.Lock()
278
+ self._started = False
279
+ self._reader_task: Optional[asyncio.Task[None]] = None
280
+ self._stderr_task: Optional[asyncio.Task[None]] = None
281
+ self._exit_future: Optional[asyncio.Future[None]] = None
282
+ self.workspace_key: Optional[str] = None
283
+
284
+ @classmethod
285
+ async def start(cls, **kwargs: Any) -> AgentRelayClient:
286
+ client = cls(**kwargs)
287
+ await client.start_client()
288
+ return client
289
+
290
+ # ── Event subscription ────────────────────────────────────────────────
291
+
292
+ def on_event(self, listener: Callable[[BrokerEvent], None]) -> Callable[[], None]:
293
+ self._event_listeners.append(listener)
294
+
295
+ def unsubscribe() -> None:
296
+ try:
297
+ self._event_listeners.remove(listener)
298
+ except ValueError:
299
+ pass
300
+
301
+ return unsubscribe
302
+
303
+ def on_broker_stderr(self, listener: Callable[[str], None]) -> Callable[[], None]:
304
+ self._stderr_listeners.append(listener)
305
+
306
+ def unsubscribe() -> None:
307
+ try:
308
+ self._stderr_listeners.remove(listener)
309
+ except ValueError:
310
+ pass
311
+
312
+ return unsubscribe
313
+
314
+ def query_events(
315
+ self,
316
+ *,
317
+ kind: Optional[str] = None,
318
+ name: Optional[str] = None,
319
+ limit: Optional[int] = None,
320
+ ) -> list[BrokerEvent]:
321
+ events = list(self._event_buffer)
322
+ if kind:
323
+ events = [e for e in events if e.get("kind") == kind]
324
+ if name:
325
+ events = [e for e in events if e.get("name") == name]
326
+ if limit is not None:
327
+ events = events[-limit:]
328
+ return events
329
+
330
+ # ── Lifecycle ─────────────────────────────────────────────────────────
331
+
332
+ async def start_client(self) -> None:
333
+ if self._started:
334
+ return
335
+ async with self._starting_lock:
336
+ if self._started:
337
+ return
338
+ await self._start_internal()
339
+
340
+ async def _start_internal(self) -> None:
341
+ resolved_binary = _expand_tilde(self._binary_path)
342
+ if _is_explicit_path(self._binary_path) and not Path(resolved_binary).exists():
343
+ raise AgentRelayProcessError(
344
+ f"broker binary not found: {self._binary_path}"
345
+ )
346
+
347
+ args = [
348
+ "init",
349
+ "--name",
350
+ self._broker_name,
351
+ "--channels",
352
+ ",".join(self._channels),
353
+ *self._binary_args,
354
+ ]
355
+
356
+ env = dict(self._env) if self._env else dict(os.environ)
357
+ if _is_explicit_path(self._binary_path):
358
+ bin_dir = str(Path(resolved_binary).resolve().parent)
359
+ current_path = env.get("PATH", "")
360
+ if bin_dir not in current_path.split(os.pathsep):
361
+ env["PATH"] = f"{bin_dir}{os.pathsep}{current_path}"
362
+
363
+ self._last_stderr_line = None
364
+
365
+ self._process = await asyncio.create_subprocess_exec(
366
+ resolved_binary,
367
+ *args,
368
+ stdin=asyncio.subprocess.PIPE,
369
+ stdout=asyncio.subprocess.PIPE,
370
+ stderr=asyncio.subprocess.PIPE,
371
+ cwd=self._cwd,
372
+ env=env,
373
+ )
374
+
375
+ loop = asyncio.get_running_loop()
376
+ self._exit_future = loop.create_future()
377
+
378
+ self._reader_task = asyncio.create_task(self._read_stdout())
379
+ self._stderr_task = asyncio.create_task(self._read_stderr())
380
+
381
+ # Monitor process exit
382
+ asyncio.create_task(self._monitor_exit())
383
+
384
+ # Hello handshake
385
+ hello_ack = await self._request_hello()
386
+ self._started = True
387
+ if hello_ack.get("workspace_key"):
388
+ self.workspace_key = hello_ack["workspace_key"]
389
+
390
+ async def _monitor_exit(self) -> None:
391
+ if not self._process:
392
+ return
393
+ code = await self._process.wait()
394
+ detail = f": {self._last_stderr_line}" if self._last_stderr_line else ""
395
+ error = AgentRelayProcessError(f"broker exited (code={code}){detail}")
396
+ self._fail_all_pending(error)
397
+ if self._exit_future and not self._exit_future.done():
398
+ self._exit_future.set_result(None)
399
+
400
+ async def _read_stdout(self) -> None:
401
+ assert self._process and self._process.stdout
402
+ while True:
403
+ line = await self._process.stdout.readline()
404
+ if not line:
405
+ break
406
+ self._handle_stdout_line(
407
+ line.decode("utf-8", errors="replace").rstrip("\n")
408
+ )
409
+
410
+ async def _read_stderr(self) -> None:
411
+ assert self._process and self._process.stderr
412
+ while True:
413
+ line = await self._process.stderr.readline()
414
+ if not line:
415
+ break
416
+ text = line.decode("utf-8", errors="replace").rstrip("\n")
417
+ trimmed = text.strip()
418
+ if trimmed:
419
+ self._last_stderr_line = trimmed
420
+ for listener in self._stderr_listeners:
421
+ listener(text)
422
+
423
+ def _handle_stdout_line(self, line: str) -> None:
424
+ try:
425
+ parsed = json.loads(line)
426
+ except (json.JSONDecodeError, ValueError):
427
+ return
428
+
429
+ if not isinstance(parsed, dict):
430
+ return
431
+ if parsed.get("v") != PROTOCOL_VERSION or not isinstance(
432
+ parsed.get("type"), str
433
+ ):
434
+ return
435
+
436
+ envelope = ProtocolEnvelope.from_dict(parsed)
437
+
438
+ # Events are dispatched to listeners (no request_id)
439
+ if envelope.type == "event":
440
+ event: BrokerEvent = envelope.payload
441
+ self._event_buffer.append(event)
442
+ if len(self._event_buffer) > self._max_buffer_size:
443
+ self._event_buffer.pop(0)
444
+ for listener in self._event_listeners:
445
+ listener(event)
446
+ return
447
+
448
+ # Responses are correlated to pending requests
449
+ if not envelope.request_id:
450
+ return
451
+
452
+ pending = self._pending.pop(envelope.request_id, None)
453
+ if not pending:
454
+ return
455
+
456
+ pending.timeout_handle.cancel()
457
+
458
+ if envelope.type == "error":
459
+ payload = envelope.payload
460
+ pending.future.set_exception(
461
+ AgentRelayProtocolError(
462
+ code=payload.get("code", "unknown"),
463
+ message=payload.get("message", "unknown error"),
464
+ retryable=payload.get("retryable", False),
465
+ data=payload.get("data"),
466
+ )
467
+ )
468
+ return
469
+
470
+ if envelope.type != pending.expected_type:
471
+ pending.future.set_exception(
472
+ AgentRelayProcessError(
473
+ f"unexpected response type '{envelope.type}' for request "
474
+ f"'{envelope.request_id}' (expected '{pending.expected_type}')"
475
+ )
476
+ )
477
+ return
478
+
479
+ pending.future.set_result(envelope)
480
+
481
+ def _fail_all_pending(self, error: Exception) -> None:
482
+ for pending in self._pending.values():
483
+ pending.timeout_handle.cancel()
484
+ if not pending.future.done():
485
+ pending.future.set_exception(error)
486
+ self._pending.clear()
487
+
488
+ # ── Request helpers ───────────────────────────────────────────────────
489
+
490
+ async def _send_request(
491
+ self, type_: str, payload: Any, expected_type: str
492
+ ) -> ProtocolEnvelope:
493
+ if not self._process or not self._process.stdin:
494
+ raise AgentRelayProcessError("broker is not running")
495
+
496
+ self._request_seq += 1
497
+ request_id = f"req_{self._request_seq}"
498
+
499
+ envelope = ProtocolEnvelope(
500
+ v=PROTOCOL_VERSION,
501
+ type=type_,
502
+ payload=payload,
503
+ request_id=request_id,
504
+ )
505
+
506
+ loop = asyncio.get_running_loop()
507
+ future: asyncio.Future[ProtocolEnvelope] = loop.create_future()
508
+
509
+ def on_timeout() -> None:
510
+ self._pending.pop(request_id, None)
511
+ if not future.done():
512
+ future.set_exception(
513
+ AgentRelayProcessError(
514
+ f"request timed out after {self._request_timeout_ms}ms "
515
+ f"(type='{type_}', request_id='{request_id}')"
516
+ )
517
+ )
518
+
519
+ timeout_handle = loop.call_later(self._request_timeout_ms / 1000, on_timeout)
520
+ self._pending[request_id] = _PendingRequest(
521
+ expected_type, future, timeout_handle
522
+ )
523
+
524
+ line = json.dumps(envelope.to_dict()) + "\n"
525
+ self._process.stdin.write(line.encode("utf-8"))
526
+ await self._process.stdin.drain()
527
+
528
+ return await future
529
+
530
+ async def _request_hello(self) -> dict[str, Any]:
531
+ payload = {
532
+ "client_name": self._client_name,
533
+ "client_version": self._client_version,
534
+ }
535
+ frame = await self._send_request("hello", payload, "hello_ack")
536
+ return frame.payload
537
+
538
+ async def _request_ok(self, type_: str, payload: Any) -> Any:
539
+ frame = await self._send_request(type_, payload, "ok")
540
+ return frame.payload.get("result")
541
+
542
+ # ── Public API methods ────────────────────────────────────────────────
543
+
544
+ async def spawn_pty(
545
+ self,
546
+ *,
547
+ name: str,
548
+ cli: str,
549
+ args: Optional[list[str]] = None,
550
+ channels: Optional[list[str]] = None,
551
+ task: Optional[str] = None,
552
+ model: Optional[str] = None,
553
+ cwd: Optional[str] = None,
554
+ team: Optional[str] = None,
555
+ shadow_of: Optional[str] = None,
556
+ shadow_mode: Optional[str] = None,
557
+ idle_threshold_secs: Optional[int] = None,
558
+ restart_policy: Optional[dict[str, Any]] = None,
559
+ continue_from: Optional[str] = None,
560
+ ) -> dict[str, Any]:
561
+ await self.start_client()
562
+ built_args = _build_pty_args_with_model(cli, args or [], model)
563
+ from .protocol import RestartPolicy as ProtocolRestartPolicy
564
+
565
+ rp = None
566
+ if restart_policy:
567
+ rp = ProtocolRestartPolicy(**restart_policy)
568
+ agent = AgentSpec(
569
+ name=name,
570
+ runtime="pty",
571
+ cli=cli,
572
+ args=built_args,
573
+ channels=channels or [],
574
+ model=model,
575
+ cwd=cwd or self._cwd,
576
+ team=team,
577
+ shadow_of=shadow_of,
578
+ shadow_mode=shadow_mode,
579
+ restart_policy=rp,
580
+ )
581
+ request_payload: dict[str, Any] = {"agent": agent.to_dict()}
582
+ if task is not None:
583
+ request_payload["initial_task"] = task
584
+ if idle_threshold_secs is not None:
585
+ request_payload["idle_threshold_secs"] = idle_threshold_secs
586
+ if continue_from is not None:
587
+ request_payload["continue_from"] = continue_from
588
+ return await self._request_ok("spawn_agent", request_payload)
589
+
590
+ async def spawn_headless(
591
+ self,
592
+ *,
593
+ name: str,
594
+ provider: HeadlessProvider,
595
+ args: Optional[list[str]] = None,
596
+ channels: Optional[list[str]] = None,
597
+ task: Optional[str] = None,
598
+ ) -> dict[str, Any]:
599
+ await self.start_client()
600
+ agent = AgentSpec(
601
+ name=name,
602
+ runtime="headless",
603
+ provider=provider,
604
+ args=args or [],
605
+ channels=channels or [],
606
+ )
607
+ request_payload: dict[str, Any] = {"agent": agent.to_dict()}
608
+ if task is not None:
609
+ request_payload["initial_task"] = task
610
+ return await self._request_ok("spawn_agent", request_payload)
611
+
612
+ async def spawn_provider(
613
+ self,
614
+ *,
615
+ name: str,
616
+ provider: str,
617
+ transport: Optional[AgentTransport] = None,
618
+ args: Optional[list[str]] = None,
619
+ channels: Optional[list[str]] = None,
620
+ task: Optional[str] = None,
621
+ model: Optional[str] = None,
622
+ cwd: Optional[str] = None,
623
+ team: Optional[str] = None,
624
+ shadow_of: Optional[str] = None,
625
+ shadow_mode: Optional[str] = None,
626
+ idle_threshold_secs: Optional[int] = None,
627
+ restart_policy: Optional[dict[str, Any]] = None,
628
+ continue_from: Optional[str] = None,
629
+ ) -> dict[str, Any]:
630
+ resolved_transport: AgentTransport = transport or (
631
+ "headless" if provider == "opencode" else "pty"
632
+ )
633
+
634
+ if resolved_transport == "headless":
635
+ if provider not in ("claude", "opencode"):
636
+ raise AgentRelayProcessError(
637
+ f"provider '{provider}' does not support headless transport (supported: claude, opencode)"
638
+ )
639
+ headless_provider: HeadlessProvider = (
640
+ "claude" if provider == "claude" else "opencode"
641
+ )
642
+ return await self.spawn_headless(
643
+ name=name,
644
+ provider=headless_provider,
645
+ args=args,
646
+ channels=channels,
647
+ task=task,
648
+ )
649
+
650
+ return await self.spawn_pty(
651
+ name=name,
652
+ cli=provider,
653
+ args=args,
654
+ channels=channels,
655
+ task=task,
656
+ model=model,
657
+ cwd=cwd,
658
+ team=team,
659
+ shadow_of=shadow_of,
660
+ shadow_mode=shadow_mode,
661
+ idle_threshold_secs=idle_threshold_secs,
662
+ restart_policy=restart_policy,
663
+ continue_from=continue_from,
664
+ )
665
+
666
+ async def spawn_claude(self, **kwargs: Any) -> dict[str, Any]:
667
+ return await self.spawn_provider(provider="claude", **kwargs)
668
+
669
+ async def spawn_opencode(self, **kwargs: Any) -> dict[str, Any]:
670
+ return await self.spawn_provider(provider="opencode", **kwargs)
671
+
672
+ async def release(self, name: str, reason: Optional[str] = None) -> dict[str, Any]:
673
+ await self.start_client()
674
+ payload: dict[str, Any] = {"name": name}
675
+ if reason is not None:
676
+ payload["reason"] = reason
677
+ return await self._request_ok("release_agent", payload)
678
+
679
+ async def send_input(self, name: str, data: str) -> dict[str, Any]:
680
+ await self.start_client()
681
+ return await self._request_ok("send_input", {"name": name, "data": data})
682
+
683
+ async def set_model(
684
+ self, name: str, model: str, *, timeout_ms: Optional[int] = None
685
+ ) -> dict[str, Any]:
686
+ await self.start_client()
687
+ payload: dict[str, Any] = {"name": name, "model": model}
688
+ if timeout_ms is not None:
689
+ payload["timeout_ms"] = timeout_ms
690
+ return await self._request_ok("set_model", payload)
691
+
692
+ async def send_message(
693
+ self,
694
+ *,
695
+ to: str,
696
+ text: str,
697
+ from_: Optional[str] = None,
698
+ thread_id: Optional[str] = None,
699
+ priority: Optional[int] = None,
700
+ data: Optional[dict[str, Any]] = None,
701
+ ) -> dict[str, Any]:
702
+ await self.start_client()
703
+ payload: dict[str, Any] = {"to": to, "text": text}
704
+ if from_ is not None:
705
+ payload["from"] = from_
706
+ if thread_id is not None:
707
+ payload["thread_id"] = thread_id
708
+ if priority is not None:
709
+ payload["priority"] = priority
710
+ if data is not None:
711
+ payload["data"] = data
712
+ try:
713
+ return await self._request_ok("send_message", payload)
714
+ except AgentRelayProtocolError as e:
715
+ if e.code == "unsupported_operation":
716
+ return {"event_id": "unsupported_operation", "targets": []}
717
+ raise
718
+
719
+ async def list_agents(self) -> list[dict[str, Any]]:
720
+ await self.start_client()
721
+ result = await self._request_ok("list_agents", {})
722
+ return result.get("agents", []) if isinstance(result, dict) else []
723
+
724
+ async def get_status(self) -> dict[str, Any]:
725
+ await self.start_client()
726
+ return await self._request_ok("get_status", {})
727
+
728
+ async def get_metrics(self, agent: Optional[str] = None) -> dict[str, Any]:
729
+ await self.start_client()
730
+ return await self._request_ok("get_metrics", {"agent": agent} if agent else {})
731
+
732
+ async def get_crash_insights(self) -> dict[str, Any]:
733
+ await self.start_client()
734
+ return await self._request_ok("get_crash_insights", {})
735
+
736
+ async def preflight_agents(self, agents: list[dict[str, str]]) -> None:
737
+ if not agents:
738
+ return
739
+ await self.start_client()
740
+ await self._request_ok("preflight_agents", {"agents": agents})
741
+
742
+ async def shutdown(self) -> None:
743
+ if not self._process:
744
+ return
745
+
746
+ try:
747
+ await self._request_ok("shutdown", {})
748
+ except Exception:
749
+ pass
750
+
751
+ process = self._process
752
+ try:
753
+ await asyncio.wait_for(
754
+ self._exit_future if self._exit_future else asyncio.sleep(0),
755
+ timeout=self._shutdown_timeout_ms / 1000,
756
+ )
757
+ except asyncio.TimeoutError:
758
+ if process.returncode is None:
759
+ process.terminate()
760
+ try:
761
+ await asyncio.wait_for(process.wait(), timeout=2.0)
762
+ except asyncio.TimeoutError:
763
+ process.kill()
764
+
765
+ # Clean up reader tasks
766
+ if self._reader_task and not self._reader_task.done():
767
+ self._reader_task.cancel()
768
+ if self._stderr_task and not self._stderr_task.done():
769
+ self._stderr_task.cancel()
770
+
771
+ self._process = None
772
+ self._started = False
773
+
774
+ async def wait_for_exit(self) -> None:
775
+ if self._exit_future:
776
+ await self._exit_future