avp-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avp_cli/agent.py ADDED
@@ -0,0 +1,566 @@
1
+ """Run a real AVP agent against one Commission, inside a sandbox.
2
+
3
+ Every conforming AVP agent honors the same run contract the conformance
4
+ harness uses:
5
+
6
+ <command> run --commission <path> --out <ndjson>
7
+
8
+ Here that command executes inside an OpenSandbox container built from the
9
+ environment's derived image (see `avp_cli.images`): the per-run host workspace
10
+ and an io dir are bind-mounted in, the Commission goes in as a file, and the
11
+ NDJSON trajectory lands back on the host where it is tailed live and parsed.
12
+ The host machine is not part of the agent's world; everything the agent sees
13
+ is declared (image, mounts, env vars, egress policy).
14
+
15
+ `describe_agent` stays a host-side subprocess: it is the free pre-flight view
16
+ (no model turn, no tools), driven from the agent's manifest like the
17
+ conformance harness does.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import contextlib
23
+ import json
24
+ import os
25
+ import shlex
26
+ import shutil
27
+ import subprocess
28
+ import time
29
+ import uuid
30
+ from collections.abc import Callable
31
+ from concurrent.futures import ThreadPoolExecutor
32
+ from dataclasses import dataclass, field
33
+ from datetime import timedelta
34
+ from pathlib import Path
35
+ from typing import Any
36
+ from urllib.parse import urlparse
37
+
38
+ from opensandbox import SandboxSync
39
+ from opensandbox.models.execd import RunCommandOpts
40
+ from opensandbox.models.sandboxes import Host, Volume
41
+ from pydantic import BaseModel
42
+
43
+ from avp.commission import Commission
44
+ from avp.descriptor import AgentDescriptor
45
+ from avp.trajectory import parse_event
46
+ from avp_cli import broker, osb, paths, vault
47
+ from avp_cli.agent_manifest import AgentManifest
48
+
49
+ # How often the streaming path re-reads the growing --out file (seconds).
50
+ _TAIL_INTERVAL = 0.06
51
+
52
+ # In-sandbox layout: one rw mount for the (possibly run-shared) workspace, one
53
+ # rw mount for this run's io (commission in, trajectory + stderr out).
54
+ _WORKSPACE_MNT = "/avp/workspace"
55
+ _IO_MNT = "/avp/io"
56
+
57
+ # Host env vars forwarded into the sandbox: model-provider credentials and
58
+ # agent routing knobs. CLAUDE_ covers CLAUDE_CODE_OAUTH_TOKEN (the
59
+ # `claude setup-token` subscription credential the claude CLI accepts in
60
+ # place of an API key). The rest of the host environment stays on the host;
61
+ # the sandbox env is otherwise fully declared.
62
+ _ENV_PASSTHROUGH_PREFIXES = (
63
+ "ANTHROPIC_",
64
+ "CLAUDE_",
65
+ "OPENAI_",
66
+ "GOOGLE_",
67
+ "GEMINI_",
68
+ "MISTRAL_",
69
+ "OPENROUTER_",
70
+ "GOOSE_",
71
+ )
72
+
73
+ # Sandbox lifetime margin beyond the run timeout: covers image boot, setup
74
+ # commands, and trajectory readback before the server reaps the sandbox.
75
+ _SANDBOX_TTL_MARGIN_S = 180.0
76
+
77
+ _DEFAULT_RESOURCES = {"cpu": "2", "memory": "4Gi"}
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class SandboxedAgent:
82
+ """An agent resolved to its in-sandbox form: the derived image that has it
83
+ installed, the argv that runs it there, and its manifest env."""
84
+
85
+ name: str
86
+ image: str
87
+ command: tuple[str, ...]
88
+ env: dict[str, str] = field(default_factory=dict)
89
+
90
+
91
+ @dataclass
92
+ class SandboxContext:
93
+ """The run-level sandbox facts shared by every cell of an eval (or the one
94
+ cell of `avp run`): server connection, the seeded host workspace, and the
95
+ environment's setup / egress / resource asks."""
96
+
97
+ connection: osb.Connection
98
+ workspace: Path
99
+ setup: list[str] = field(default_factory=list)
100
+ net: list[str] = field(default_factory=list)
101
+ resources: dict[str, str] = field(default_factory=dict)
102
+
103
+
104
+ def load_manifest(path: str | Path) -> tuple[AgentManifest, Path]:
105
+ """Load an agent manifest and resolve its cwd relative to the manifest file."""
106
+ p = Path(path).resolve()
107
+ manifest = AgentManifest.model_validate(json.loads(p.read_text()))
108
+ cwd = (p.parent / manifest.cwd).resolve()
109
+ return manifest, cwd
110
+
111
+
112
+ def run_agent(
113
+ agent: SandboxedAgent,
114
+ ctx: SandboxContext,
115
+ commission: Commission,
116
+ *,
117
+ out_path: str | Path,
118
+ timeout_s: float = 300.0,
119
+ on_event: Callable[[BaseModel | dict[str, Any]], None] | None = None,
120
+ ) -> tuple[list[BaseModel | dict[str, Any]] | None, str | None]:
121
+ """Run the agent for one Commission in a fresh sandbox. Returns (events, error).
122
+
123
+ On success `error` is None and `events` is the parsed NDJSON trajectory
124
+ (custom event types pass through as dicts). On failure `events` is None and
125
+ `error` is a short diagnostic (nonzero exit, timeout, sandbox create
126
+ failure) so one bad run is recorded per-cell rather than aborting a matrix.
127
+
128
+ If `on_event` is given, the trajectory file is tailed on the host (through
129
+ the io bind mount) while the agent runs and `on_event` fires per event for
130
+ live progress. The returned list is always re-parsed from the finished
131
+ file, so it stays authoritative regardless of what the live tail saw.
132
+
133
+ The sandbox is always killed before returning; the workspace mount is the
134
+ only place agent writes survive.
135
+ """
136
+ out = Path(out_path)
137
+ out.parent.mkdir(parents=True, exist_ok=True)
138
+ # io lives under ~/.avp: the server's allowed_host_paths confines bind
139
+ # mounts there, and --out may point anywhere on the host. The trajectory
140
+ # is moved to `out` once the run settles.
141
+ io_dir = paths.avp_home() / "tmp" / uuid.uuid4().hex
142
+ io_dir.mkdir(parents=True)
143
+ traj_host = io_dir / "trajectory.ndjson"
144
+
145
+ # Vault broker: when the Commission references any secret (provider
146
+ # credential or MCP auth), a host-side credential-injecting proxy serves
147
+ # those endpoints so the resolved value never enters the sandbox. The
148
+ # written commission + sandbox env then point at the broker with sentinels.
149
+ brk: broker.Broker | None = None
150
+ try:
151
+ brk = _start_broker(commission)
152
+ except vault.VaultError as exc:
153
+ shutil.rmtree(io_dir, ignore_errors=True)
154
+ return None, f"vault: {exc}"
155
+ except Exception as exc:
156
+ shutil.rmtree(io_dir, ignore_errors=True)
157
+ return None, f"vault broker startup failed: {exc}"
158
+
159
+ try:
160
+ (io_dir / "commission.json").write_text(_commission_for_sandbox(commission, brk))
161
+ if brk is not None:
162
+ (io_dir / "broker-preflight.sh").write_text(_BROKER_PREFLIGHT_SH)
163
+ try:
164
+ box = SandboxSync.create(
165
+ agent.image,
166
+ connection_config=ctx.connection.config(),
167
+ env=_sandbox_env(agent, commission, brk),
168
+ volumes=[
169
+ Volume(
170
+ name="workspace",
171
+ host=Host(path=str(ctx.workspace.resolve())),
172
+ mount_path=_WORKSPACE_MNT,
173
+ ),
174
+ Volume(name="io", host=Host(path=str(io_dir)), mount_path=_IO_MNT),
175
+ ],
176
+ # Egress is default-deny floored by runtime-base domains, plus the
177
+ # env's `net`, plus the hosts the run reaches: under broker mode
178
+ # just the broker host (the broker reaches providers/MCP from the
179
+ # host); otherwise the provider + MCP hosts the Commission names.
180
+ # A run can never be commissioned to call a URL the sandbox blocks.
181
+ network_policy=osb.network_policy([*ctx.net, *_egress_extra(commission, brk)]),
182
+ resource={**_DEFAULT_RESOURCES, **ctx.resources},
183
+ timeout=timedelta(seconds=timeout_s + _SANDBOX_TTL_MARGIN_S),
184
+ )
185
+ except Exception as exc:
186
+ return None, f"sandbox create failed: {exc}"
187
+ try:
188
+ # Fail closed: if the vault broker isn't reachable from the sandbox,
189
+ # refuse the run rather than ever placing the secret inside it.
190
+ err = _preflight_broker(box, brk) if brk is not None else None
191
+ if err is None:
192
+ err = _run_setup(box, ctx.setup)
193
+ if err is None:
194
+ argv = [
195
+ *agent.command,
196
+ "run",
197
+ "--commission",
198
+ f"{_IO_MNT}/commission.json",
199
+ "--out",
200
+ f"{_IO_MNT}/trajectory.ndjson",
201
+ ]
202
+ command = f"{shlex.join(argv)} 2>{_IO_MNT}/stderr.log"
203
+ if on_event is None:
204
+ err = _exec(box, command, timeout_s, io_dir)
205
+ else:
206
+ err = _exec_streaming(box, command, timeout_s, io_dir, traj_host, on_event)
207
+ finally:
208
+ with contextlib.suppress(Exception): # reaped by TTL if the kill call fails
209
+ box.kill()
210
+ if err is not None:
211
+ return None, err
212
+ if not traj_host.exists():
213
+ return None, "agent exited 0 but wrote no trajectory"
214
+ if traj_host != out:
215
+ shutil.move(traj_host, out)
216
+ return read_trajectory(out), None
217
+ finally:
218
+ if brk is not None:
219
+ brk.stop()
220
+ shutil.rmtree(io_dir, ignore_errors=True)
221
+
222
+
223
+ # The sentinel key the agent receives in place of a vault-managed credential.
224
+ # Non-empty so SDKs that require a key present are satisfied; the broker
225
+ # overwrites it with the real value on the host before forwarding.
226
+ _VAULT_SENTINEL = "avp-vault-managed"
227
+
228
+
229
+ def _provider_credentialed(commission: Commission) -> bool:
230
+ p = commission.provider
231
+ return p is not None and p.credential is not None
232
+
233
+
234
+ def _start_broker(commission: Commission) -> broker.Broker | None:
235
+ """Start a vault broker iff the Commission references any secret. Resolves
236
+ handles host-side (may raise VaultError); secrets live only in the broker."""
237
+ routes: list[tuple[str, broker.Route]] = []
238
+ prov = commission.provider
239
+ if _provider_credentialed(commission):
240
+ base = prov.base_url or (osb.PROVIDER_REGISTRY.get(prov.id) or (None, None))[1] or ""
241
+ header, prefix = (
242
+ ("x-api-key", "") if prov.id == "anthropic" else ("authorization", "Bearer ")
243
+ )
244
+ routes.append(
245
+ (
246
+ f"llm/{prov.id}",
247
+ broker.Route(
248
+ upstream=broker.origin_of(base),
249
+ header=header,
250
+ prefix=prefix,
251
+ secret=vault.resolve_ref(prov.credential),
252
+ ),
253
+ )
254
+ )
255
+ for server in commission.mcp_servers or []:
256
+ auth = getattr(server, "auth", None)
257
+ if auth is not None:
258
+ routes.append(
259
+ (
260
+ f"mcp/{server.id}",
261
+ broker.Route(
262
+ upstream=server.url,
263
+ header="authorization",
264
+ prefix="Bearer ",
265
+ secret=vault.resolve(auth.vault),
266
+ ),
267
+ )
268
+ )
269
+ if not routes:
270
+ return None
271
+ brk = broker.Broker()
272
+ for key, route in routes:
273
+ brk.add_route(key, route)
274
+ brk.start()
275
+ return brk
276
+
277
+
278
+ def _sandbox_env(
279
+ agent: SandboxedAgent, commission: Commission, brk: broker.Broker | None
280
+ ) -> dict[str, str]:
281
+ """The declared sandbox environment: provider routing (broker urls +
282
+ sentinels for vault-credentialed providers, real base_url otherwise), the
283
+ manifest's env, and the AVP workspace convention (AVP_WORKSPACE /
284
+ AVP_ENV_ROOT). Host provider vars (ANTHROPIC_*, …) are forwarded for the
285
+ no-vault case where the user supplies their own ambient key.
286
+ """
287
+ env = {k: v for k, v in os.environ.items() if k.startswith(_ENV_PASSTHROUGH_PREFIXES)}
288
+ prov = commission.provider
289
+ if prov is not None:
290
+ up = prov.id.upper().replace("-", "_")
291
+ if prov.credential is not None and brk is not None:
292
+ # Vault: route through the broker, hand the agent only a sentinel.
293
+ route = brk.route_url(f"llm/{prov.id}")
294
+ if prov.id == "anthropic":
295
+ env["ANTHROPIC_BASE_URL"] = route
296
+ env["ANTHROPIC_API_KEY"] = _VAULT_SENTINEL
297
+ else:
298
+ env[f"{up}_HOST"] = route
299
+ env[f"{up}_API_KEY"] = _VAULT_SENTINEL
300
+ env["GOOSE_PROVIDER"] = prov.id
301
+ else:
302
+ # Non-vault provider: real endpoint, ambient key (forwarded above).
303
+ base = prov.base_url or (osb.PROVIDER_REGISTRY.get(prov.id) or (None, None))[1]
304
+ if prov.id == "anthropic":
305
+ if base:
306
+ env["ANTHROPIC_BASE_URL"] = base
307
+ else:
308
+ if base:
309
+ env[f"{up}_HOST"] = base
310
+ env["GOOSE_PROVIDER"] = prov.id
311
+ env.update(agent.env)
312
+ env["AVP_WORKSPACE"] = _WORKSPACE_MNT
313
+ env["AVP_ENV_ROOT"] = "/avp"
314
+ return env
315
+
316
+
317
+ def _commission_for_sandbox(commission: Commission, brk: broker.Broker | None) -> str:
318
+ """The commission.json the agent reads.
319
+
320
+ Provider routing and MCP auth are supervisor concerns the CLI delivers via
321
+ env (`_sandbox_env`) and the broker, not fields the agent consumes — so they
322
+ are stripped from what the agent receives. This also keeps the written
323
+ commission compatible with released agents whose (older) wire types reject
324
+ unknown fields like `provider`/`auth`, and keeps the resolved value out of
325
+ the agent's run_requested snapshot. Under broker mode each authed MCP url is
326
+ rewritten to its broker route (the broker injects the credential)."""
327
+ data = commission.model_dump(by_alias=True, exclude_none=True, mode="json")
328
+ data.pop("provider", None)
329
+ for server in data.get("mcp_servers") or []:
330
+ if brk is not None and server.get("type") == "http" and server.get("auth") is not None:
331
+ server["url"] = brk.route_url(f"mcp/{server['id']}")
332
+ server.pop("auth", None)
333
+ return json.dumps(data)
334
+
335
+
336
+ def _egress_extra(commission: Commission, brk: broker.Broker | None) -> list[str]:
337
+ """Per-run egress additions. Without a broker: the provider + MCP hosts the
338
+ Commission names. With a broker: the broker host, plus the hosts of any
339
+ endpoints NOT routed through it (non-credentialed provider / unauthed MCP)."""
340
+ if brk is None:
341
+ return osb.commission_egress_hosts(commission)
342
+ hosts = [broker.SANDBOX_HOST_ALIAS]
343
+ prov = commission.provider
344
+ if prov is not None and prov.credential is None:
345
+ base = prov.base_url or (osb.PROVIDER_REGISTRY.get(prov.id) or (None, None))[1]
346
+ if base:
347
+ hosts.append(urlparse(base).hostname or "")
348
+ for server in commission.mcp_servers or []:
349
+ if getattr(server, "url", None) and getattr(server, "auth", None) is None:
350
+ hosts.append(urlparse(server.url).hostname or "")
351
+ return [h for h in hosts if h]
352
+
353
+
354
+ # Reach the host-side vault broker from inside the sandbox, cross-platform.
355
+ # Docker Desktop / OrbStack inject `host.docker.internal`; plain Linux Docker
356
+ # does not, so we map it to the default-route gateway (the host) in /etc/hosts.
357
+ # That keeps a single broker address (host.docker.internal) working everywhere —
358
+ # the agent's base_urls / MCP urls never have to change per host. Written to the
359
+ # io dir and run by `_preflight_broker`; exit 0 iff the broker is reachable.
360
+ _BROKER_PREFLIGHT_SH = r"""#!/bin/sh
361
+ URL="http://host.docker.internal:$1/health"
362
+ fetch() {
363
+ curl -fsS -m 3 "$URL" >/dev/null 2>&1 && return 0
364
+ wget -q -T 3 -O - "$URL" >/dev/null 2>&1 && return 0
365
+ python3 -c 'import urllib.request,sys; urllib.request.urlopen(sys.argv[1],timeout=3)' "$URL" >/dev/null 2>&1
366
+ }
367
+ fetch && exit 0
368
+ gw="$(ip -4 route show default 2>/dev/null | awk '{print $3; exit}')"
369
+ if [ -z "$gw" ]; then
370
+ gw="$(python3 -c 'import socket, struct
371
+ for line in open("/proc/net/route").read().splitlines()[1:]:
372
+ f = line.split()
373
+ if len(f) > 3 and f[1] == "00000000" and int(f[3], 16) & 2:
374
+ print(socket.inet_ntoa(struct.pack("<L", int(f[2], 16)))); break' 2>/dev/null)"
375
+ fi
376
+ [ -n "$gw" ] && printf '%s host.docker.internal\n' "$gw" >> /etc/hosts 2>/dev/null
377
+ fetch
378
+ """
379
+
380
+
381
+ def _preflight_broker(box: SandboxSync, brk: broker.Broker) -> str | None:
382
+ """Confirm the sandbox can reach the vault broker before the agent runs (and
383
+ ensure host.docker.internal routes to the host on plain Linux Docker).
384
+ Returns an error string (fail closed) when it can't; never falls back to
385
+ placing the secret in the sandbox."""
386
+ try:
387
+ execution = box.commands.run(
388
+ f"sh {_IO_MNT}/broker-preflight.sh {brk.port}",
389
+ opts=RunCommandOpts(timeout=timedelta(seconds=25)),
390
+ )
391
+ except Exception as exc:
392
+ return f"vault broker preflight failed to run ({exc}); refusing to run"
393
+ if execution.exit_code not in (0, None):
394
+ return (
395
+ f"vault broker unreachable from the sandbox (host broker on port {brk.port}); "
396
+ "refusing to run rather than expose the secret"
397
+ )
398
+ return None
399
+
400
+
401
+ def _run_setup(box: SandboxSync, setup: list[str]) -> str | None:
402
+ """Run the env's setup commands in the workspace; first failure reports."""
403
+ for command in setup:
404
+ try:
405
+ execution = box.commands.run(
406
+ command,
407
+ opts=RunCommandOpts(
408
+ working_directory=_WORKSPACE_MNT, timeout=timedelta(minutes=10)
409
+ ),
410
+ )
411
+ except Exception as exc:
412
+ return f"setup failed ({command!r}): {exc}"
413
+ if execution.exit_code not in (0, None):
414
+ tail = _logs_tail(execution)
415
+ return f"setup exit {execution.exit_code} ({command!r}): {tail}"
416
+ return None
417
+
418
+
419
+ def _exec(box: SandboxSync, command: str, timeout_s: float, io_dir: Path) -> str | None:
420
+ """Run the agent command and wait; return an error string or None."""
421
+ try:
422
+ execution = box.commands.run(
423
+ command,
424
+ opts=RunCommandOpts(
425
+ working_directory=_WORKSPACE_MNT, timeout=timedelta(seconds=timeout_s)
426
+ ),
427
+ )
428
+ except Exception as exc:
429
+ return _timeout_or(f"agent run failed: {exc}", exc, timeout_s)
430
+ return _exit_error(execution, io_dir)
431
+
432
+
433
+ def _exec_streaming(
434
+ box: SandboxSync,
435
+ command: str,
436
+ timeout_s: float,
437
+ io_dir: Path,
438
+ traj_host: Path,
439
+ on_event: Callable[[BaseModel | dict[str, Any]], None],
440
+ ) -> str | None:
441
+ """Run the agent command in a worker thread while tailing the growing
442
+ trajectory file (visible on the host through the io bind mount)."""
443
+ with ThreadPoolExecutor(max_workers=1) as pool:
444
+ future = pool.submit(_exec, box, command, timeout_s, io_dir)
445
+ deadline = time.monotonic() + timeout_s + _SANDBOX_TTL_MARGIN_S
446
+ pos = 0
447
+ buf = ""
448
+ try:
449
+ while not future.done():
450
+ pos, buf = _drain(traj_host, pos, buf, on_event)
451
+ if time.monotonic() > deadline: # belt-and-suspenders over execd's timeout
452
+ return f"timed out after {timeout_s:.0f}s"
453
+ time.sleep(_TAIL_INTERVAL)
454
+ except KeyboardInterrupt:
455
+ with contextlib.suppress(Exception):
456
+ box.kill() # unblocks the worker; the caller decides what stops
457
+ raise
458
+ _drain(traj_host, pos, buf, on_event) # lines written between last poll and exit
459
+ return future.result()
460
+
461
+
462
+ def _exit_error(execution: Any, io_dir: Path) -> str | None:
463
+ if execution.exit_code in (0, None):
464
+ return None
465
+ stderr = io_dir / "stderr.log"
466
+ tail = ""
467
+ if stderr.exists():
468
+ tail = "\n".join(stderr.read_text().strip().splitlines()[-3:])
469
+ return f"exit {execution.exit_code}: {tail or _logs_tail(execution) or '(no stderr)'}"
470
+
471
+
472
+ def _logs_tail(execution: Any, lines: int = 3) -> str:
473
+ chunks = [log.text for log in (execution.logs.stderr or execution.logs.stdout or [])]
474
+ return "\n".join("".join(chunks).strip().splitlines()[-lines:])
475
+
476
+
477
+ def _timeout_or(message: str, exc: Exception, timeout_s: float) -> str:
478
+ """execd surfaces a run timeout as an SDK exception; report it as ours."""
479
+ if "timeout" in str(exc).lower() or "timed out" in str(exc).lower():
480
+ return f"timed out after {timeout_s:.0f}s"
481
+ return message
482
+
483
+
484
+ def read_trajectory(path: Path) -> list[BaseModel | dict[str, Any]]:
485
+ """Parse a finished NDJSON trajectory file into events (custom types as dicts).
486
+
487
+ The inverse of what an agent's `--out` stream produces; used both to return a
488
+ run's events and to re-read a previously completed run for `--resume`.
489
+ """
490
+ events: list[BaseModel | dict[str, Any]] = []
491
+ for line in path.read_text().splitlines():
492
+ line = line.strip()
493
+ if line:
494
+ events.append(parse_event(json.loads(line)))
495
+ return events
496
+
497
+
498
+ def describe_agent(
499
+ manifest: AgentManifest,
500
+ manifest_cwd: Path,
501
+ *,
502
+ timeout_s: float = 120.0,
503
+ ) -> tuple[AgentDescriptor | None, str | None]:
504
+ """Fetch an agent's self-description via `<command> describe --out <file>`.
505
+
506
+ This is the spec's pre-flight view: the agent boots, lists its surface, and
507
+ exits without a model turn, so it's free, and it runs on the host (no
508
+ sandbox; nothing untrusted executes).
509
+ """
510
+ import tempfile
511
+
512
+ env = {**os.environ, **manifest.env}
513
+ with tempfile.TemporaryDirectory() as tmp:
514
+ out = Path(tmp) / "descriptor.json"
515
+ cmd = [*manifest.command, "describe", "--out", str(out)]
516
+ err = _run_blocking(cmd, manifest_cwd, env, timeout_s)
517
+ if err is not None:
518
+ return None, err
519
+ if not out.exists():
520
+ return None, "agent exited 0 but wrote no descriptor"
521
+ try:
522
+ return AgentDescriptor.model_validate(json.loads(out.read_text())), None
523
+ except Exception as exc: # malformed / not a descriptor
524
+ return None, f"unparseable descriptor: {exc}"
525
+
526
+
527
+ def _run_blocking(cmd: list[str], cwd: Path, env: dict[str, str], timeout_s: float) -> str | None:
528
+ """Spawn a host subprocess and wait; return an error string or None."""
529
+ try:
530
+ result = subprocess.run(
531
+ cmd, cwd=cwd, env=env, capture_output=True, text=True, timeout=timeout_s
532
+ )
533
+ except subprocess.TimeoutExpired:
534
+ return f"timed out after {timeout_s:.0f}s"
535
+ except OSError as exc:
536
+ return f"spawn failed: {exc}"
537
+ if result.returncode != 0:
538
+ tail = "\n".join(result.stderr.strip().splitlines()[-3:]) or "(no stderr)"
539
+ return f"exit {result.returncode}: {tail}"
540
+ return None
541
+
542
+
543
+ def _drain(
544
+ out: Path, pos: int, buf: str, on_event: Callable[[BaseModel | dict[str, Any]], None]
545
+ ) -> tuple[int, str]:
546
+ """Read `out` from byte offset `pos`, emit each complete NDJSON line to
547
+ `on_event`. Returns the new offset and any trailing partial line."""
548
+ if not out.exists():
549
+ return pos, buf
550
+ try:
551
+ with out.open("r") as f:
552
+ f.seek(pos)
553
+ buf += f.read()
554
+ pos = f.tell()
555
+ except (OSError, UnicodeDecodeError):
556
+ return pos, buf # transient (mid-write / partial multibyte); retry next tick
557
+ while "\n" in buf:
558
+ line, buf = buf.split("\n", 1)
559
+ line = line.strip()
560
+ if not line:
561
+ continue
562
+ # A partial line or a live-display glitch must not abort the run; the
563
+ # final full-file parse in run_agent is authoritative anyway.
564
+ with contextlib.suppress(Exception):
565
+ on_event(parse_event(json.loads(line)))
566
+ return pos, buf