optio-codex 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optio_codex/__init__.py +66 -0
- optio_codex/conversation.py +553 -0
- optio_codex/conversation_listener.py +322 -0
- optio_codex/cred_watcher.py +138 -0
- optio_codex/fs_allowlist.py +149 -0
- optio_codex/host_actions.py +1070 -0
- optio_codex/models.py +68 -0
- optio_codex/prompt.py +184 -0
- optio_codex/seed_manifest.py +91 -0
- optio_codex/session.py +731 -0
- optio_codex/snapshots.py +147 -0
- optio_codex/types.py +325 -0
- optio_codex/verify.py +352 -0
- optio_codex-0.1.0.dist-info/METADATA +220 -0
- optio_codex-0.1.0.dist-info/RECORD +17 -0
- optio_codex-0.1.0.dist-info/WHEEL +5 -0
- optio_codex-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1070 @@
|
|
|
1
|
+
"""Codex-specific actions over a generic Host."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import hashlib
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import shlex
|
|
11
|
+
import uuid
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
from optio_agents import RESUME_NOTICE, SYSTEM_MESSAGE_PREFIX
|
|
16
|
+
from optio_host.host import proc_wait
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from optio_agents import HookContextProtocol
|
|
20
|
+
from optio_host import Host
|
|
21
|
+
from optio_host.host import ProcessHandle
|
|
22
|
+
|
|
23
|
+
_LOG = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_TTYD_READY_RE = re.compile(
|
|
26
|
+
r"(?:port[\s:]+(\d+))|(?:http://[^\s]+?:(\d+)(?:/|\s|$))"
|
|
27
|
+
)
|
|
28
|
+
_SUBMIT_SETTLE_S = "1.0"
|
|
29
|
+
_TTYD_VERSION = "1.7.7"
|
|
30
|
+
_TTYD_RELEASE_BASE = (
|
|
31
|
+
f"https://github.com/tsl0922/ttyd/releases/download/{_TTYD_VERSION}"
|
|
32
|
+
)
|
|
33
|
+
_DEFAULT_INSTALL_SUBDIR = ".local/bin"
|
|
34
|
+
|
|
35
|
+
# Provider credential env vars scrubbed from the headless verify probe env.
|
|
36
|
+
# Codex authenticates either via the seed's planted ChatGPT-mode auth.json
|
|
37
|
+
# OR, as a fallback, an ambient ``OPENAI_API_KEY`` (API-key auth mode). The
|
|
38
|
+
# probe MUST authenticate only through the seed under test — an inherited key
|
|
39
|
+
# would authenticate the probe even when the seed's refresh token is dead,
|
|
40
|
+
# printing the challenge answer and marking a dead seed alive (false-alive).
|
|
41
|
+
# So these keys are removed from the probe env by construction rather than
|
|
42
|
+
# left to caller discipline.
|
|
43
|
+
_PROBE_SCRUB_ENV_KEYS = ("OPENAI_API_KEY",)
|
|
44
|
+
|
|
45
|
+
# The optio-owned codex binary cache lives on the WORKER, outside every task
|
|
46
|
+
# workdir and never the operator's ``~/.codex``. Default:
|
|
47
|
+
# ``${XDG_CACHE_HOME:-$HOME/.cache}/optio-codex/bin``; ``OPTIO_CODEX_CACHE_DIR``
|
|
48
|
+
# overrides. Resolved via a shell echo so RemoteHost gets the remote
|
|
49
|
+
# location, and so the cache stays shared + evictable (never snapshotted,
|
|
50
|
+
# re-seeded/re-downloaded on a miss).
|
|
51
|
+
_CODEX_CACHE_DIR_SHELL_DEFAULT = (
|
|
52
|
+
"${OPTIO_CODEX_CACHE_DIR:-${XDG_CACHE_HOME:-$HOME/.cache}/optio-codex/bin}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Pinned codex release for auto-download. This is the version the design
|
|
56
|
+
# doc's live probes ran against (2026-07-02); bump deliberately, re-probing
|
|
57
|
+
# the wire facts (exec JSONL vocabulary, app-server surface) on upgrade.
|
|
58
|
+
_CODEX_VERSION = "0.142.5"
|
|
59
|
+
_CODEX_RELEASE_BASE = (
|
|
60
|
+
f"https://github.com/openai/codex/releases/download/rust-v{_CODEX_VERSION}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def _expand_user_path(host: "Host", path: str) -> str:
|
|
65
|
+
"""Expand a leading ``~``/``~/`` against the HOST's home directory.
|
|
66
|
+
|
|
67
|
+
Downstream consumers shlex-quote every path, which defeats shell tilde
|
|
68
|
+
expansion — so a documented-valid ``~/bin`` override must be expanded
|
|
69
|
+
here, against the worker's home (never the engine's). ``~user`` forms
|
|
70
|
+
are rejected: resolving another user's home host-side is not supported.
|
|
71
|
+
"""
|
|
72
|
+
if path == "~" or path.startswith("~/"):
|
|
73
|
+
home = (await host.resolve_host_home()).rstrip("/")
|
|
74
|
+
return home if path == "~" else f"{home}/{path[2:]}"
|
|
75
|
+
if path.startswith("~"):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"install dir {path!r}: '~user' paths are not supported; use an "
|
|
78
|
+
f"absolute path or plain '~/'."
|
|
79
|
+
)
|
|
80
|
+
return path
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
async def _resolve_install_dir(host: "Host", install_dir: str | None) -> str:
|
|
84
|
+
if install_dir is not None:
|
|
85
|
+
return await _expand_user_path(host, install_dir)
|
|
86
|
+
host_home = await host.resolve_host_home()
|
|
87
|
+
return f"{host_home}/{_DEFAULT_INSTALL_SUBDIR}"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
async def resolve_codex(
|
|
91
|
+
host: "Host",
|
|
92
|
+
*,
|
|
93
|
+
install_dir: str | None = None,
|
|
94
|
+
install_if_missing: bool = True,
|
|
95
|
+
) -> str:
|
|
96
|
+
"""Resolve the ``codex`` binary on the host."""
|
|
97
|
+
if install_dir is not None:
|
|
98
|
+
install_dir = await _expand_user_path(host, install_dir)
|
|
99
|
+
candidate = f"{install_dir.rstrip('/')}/codex"
|
|
100
|
+
probe = await host.run_command(
|
|
101
|
+
f"[ -x {shlex.quote(candidate)} ] && echo OK || true"
|
|
102
|
+
)
|
|
103
|
+
if "OK" in (probe.stdout or ""):
|
|
104
|
+
return candidate
|
|
105
|
+
raise RuntimeError(
|
|
106
|
+
f"codex not present at {candidate!r} on host "
|
|
107
|
+
f"(codex_install_dir={install_dir!r})."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
result = await host.run_command("bash -lc 'command -v codex'")
|
|
111
|
+
path = (result.stdout or "").strip()
|
|
112
|
+
if result.exit_code == 0 and path:
|
|
113
|
+
return path
|
|
114
|
+
|
|
115
|
+
if not install_if_missing:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
"codex not found on host and install_if_missing=False; nothing to do."
|
|
118
|
+
)
|
|
119
|
+
raise RuntimeError(
|
|
120
|
+
"codex not found on the worker (looked via 'command -v codex'). "
|
|
121
|
+
"Install codex manually (npm i -g @openai/codex) or rely on the "
|
|
122
|
+
"optio cache auto-download via ensure_codex_installed."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def _resolve_codex_cache_dir(host: "Host", override: str | None) -> str:
|
|
127
|
+
"""Resolve the optio-owned codex binary-cache dir as an absolute worker
|
|
128
|
+
path.
|
|
129
|
+
|
|
130
|
+
``override`` (``config.codex_install_dir``) wins. Otherwise the worker's
|
|
131
|
+
real env decides via a shell echo: ``OPTIO_CODEX_CACHE_DIR`` else
|
|
132
|
+
``${XDG_CACHE_HOME:-$HOME/.cache}/optio-codex/bin`` — resolved on the
|
|
133
|
+
host so RemoteHost gets the remote location. Mirrors grok's
|
|
134
|
+
``_resolve_grok_cache_dir`` (the ttyd ``_resolve_install_dir`` is a
|
|
135
|
+
separate, home-relative resolver and is intentionally left untouched)."""
|
|
136
|
+
if override is not None:
|
|
137
|
+
return override.rstrip("/")
|
|
138
|
+
r = await host.run_command(f'printf %s "{_CODEX_CACHE_DIR_SHELL_DEFAULT}"')
|
|
139
|
+
path = (r.stdout or "").strip()
|
|
140
|
+
if r.exit_code != 0 or not path:
|
|
141
|
+
raise RuntimeError(
|
|
142
|
+
f"failed to resolve codex cache dir on host "
|
|
143
|
+
f"(exit {r.exit_code}): {(r.stderr or '').strip()[:200]}"
|
|
144
|
+
)
|
|
145
|
+
return path.rstrip("/")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def ensure_codex_installed(
|
|
149
|
+
hook_ctx: "HookContextProtocol",
|
|
150
|
+
*,
|
|
151
|
+
install_if_missing: bool = True,
|
|
152
|
+
install_dir: str | None = None,
|
|
153
|
+
) -> str:
|
|
154
|
+
"""Provision ``codex`` for this task from the optio-owned binary cache.
|
|
155
|
+
|
|
156
|
+
The cache dir (``_resolve_codex_cache_dir``) lives on the worker outside
|
|
157
|
+
any task workdir and never the operator's autoupdating ``~/.codex`` — so
|
|
158
|
+
it stays shared, evictable, and unsnapshotted. Resolution order:
|
|
159
|
+
|
|
160
|
+
- **cache hit** — ``<cache>/codex`` is already executable.
|
|
161
|
+
- **cache miss** — seed the cache from the resolved host ``codex``
|
|
162
|
+
(login-shell ``command -v codex`` via :func:`resolve_codex`), copying
|
|
163
|
+
it into ``<cache>/codex`` (``cp -L`` deref + chmod + re-verify).
|
|
164
|
+
- **no host codex** — download the pinned GitHub-release tarball
|
|
165
|
+
(:func:`_download_codex_into_cache`) and install it into the cache.
|
|
166
|
+
|
|
167
|
+
Whatever fills the cache, the RETURNED path is always the per-task
|
|
168
|
+
``<workdir>/home/.local/bin/codex`` symlink (via
|
|
169
|
+
:func:`_provision_task_home`) so teardown's anchored pkill stays scoped
|
|
170
|
+
to this task — the symlink simply points into the cache now.
|
|
171
|
+
|
|
172
|
+
Raises when the cache is empty and ``install_if_missing=False``.
|
|
173
|
+
"""
|
|
174
|
+
host = hook_ctx._host
|
|
175
|
+
hook_ctx.report_progress(None, "Locating codex…")
|
|
176
|
+
|
|
177
|
+
cache_dir = await _resolve_codex_cache_dir(host, install_dir)
|
|
178
|
+
cached = f"{cache_dir}/codex"
|
|
179
|
+
|
|
180
|
+
probe = await host.run_command(
|
|
181
|
+
f"[ -x {shlex.quote(cached)} ] && echo OK || true"
|
|
182
|
+
)
|
|
183
|
+
if "OK" in (probe.stdout or ""):
|
|
184
|
+
_LOG.info("ensure_codex_installed: cache HIT (%s)", cached)
|
|
185
|
+
return await _provision_task_home(host, shared_codex_path=cached)
|
|
186
|
+
|
|
187
|
+
if not install_if_missing:
|
|
188
|
+
raise RuntimeError(
|
|
189
|
+
f"codex not present in cache at {cached!r} and "
|
|
190
|
+
f"install_if_missing=False; nothing to do."
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Cache miss — seed the optio-owned cache from the resolved host codex.
|
|
194
|
+
try:
|
|
195
|
+
source = await resolve_codex(host, install_dir=None, install_if_missing=False)
|
|
196
|
+
except RuntimeError:
|
|
197
|
+
# No host codex to seed from — REAL auto-download of the pinned
|
|
198
|
+
# release (install_if_missing is genuinely honored from Stage 5 on).
|
|
199
|
+
await _download_codex_into_cache(hook_ctx, cache_dir=cache_dir, cached=cached)
|
|
200
|
+
return await _provision_task_home(host, shared_codex_path=cached)
|
|
201
|
+
|
|
202
|
+
hook_ctx.report_progress(None, "Seeding codex cache…")
|
|
203
|
+
await _install_into_cache_from_host(host, source=source, cached=cached,
|
|
204
|
+
cache_dir=cache_dir)
|
|
205
|
+
return await _provision_task_home(host, shared_codex_path=cached)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
async def _install_into_cache_from_host(
|
|
209
|
+
host: "Host", *, source: str, cached: str, cache_dir: str,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""Copy a resolved host binary into the cache: mkdir + ``cp -L`` (deref:
|
|
212
|
+
a symlinked host codex becomes a real, stable copy independent of the
|
|
213
|
+
operator's autoupdater) + chmod + re-verify."""
|
|
214
|
+
mk = await host.run_command(f"mkdir -p {shlex.quote(cache_dir)}")
|
|
215
|
+
if mk.exit_code != 0:
|
|
216
|
+
raise RuntimeError(
|
|
217
|
+
f"mkdir -p {cache_dir!r} failed (exit {mk.exit_code}): "
|
|
218
|
+
f"{(mk.stderr or '').strip()[:200]}"
|
|
219
|
+
)
|
|
220
|
+
cp = await host.run_command(
|
|
221
|
+
f"cp -L {shlex.quote(source)} {shlex.quote(cached)}"
|
|
222
|
+
)
|
|
223
|
+
if cp.exit_code != 0:
|
|
224
|
+
raise RuntimeError(
|
|
225
|
+
f"seeding codex cache (cp {source!r} -> {cached!r}) failed "
|
|
226
|
+
f"(exit {cp.exit_code}): {(cp.stderr or '').strip()[:200]}"
|
|
227
|
+
)
|
|
228
|
+
ch = await host.run_command(f"chmod +x {shlex.quote(cached)}")
|
|
229
|
+
if ch.exit_code != 0:
|
|
230
|
+
raise RuntimeError(
|
|
231
|
+
f"chmod +x {cached!r} failed (exit {ch.exit_code}): "
|
|
232
|
+
f"{(ch.stderr or '').strip()[:200]}"
|
|
233
|
+
)
|
|
234
|
+
verify = await host.run_command(
|
|
235
|
+
f"[ -x {shlex.quote(cached)} ] && echo OK || true"
|
|
236
|
+
)
|
|
237
|
+
if "OK" not in (verify.stdout or ""):
|
|
238
|
+
raise RuntimeError(
|
|
239
|
+
f"codex cache seed completed but {cached!r} is still not "
|
|
240
|
+
f"executable on the host. Check the seed source {source!r}."
|
|
241
|
+
)
|
|
242
|
+
_LOG.info("ensure_codex_installed: cache MISS -> seeded from %s", source)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
async def _detect_codex_asset_name(host: "Host") -> str:
|
|
246
|
+
"""Return the upstream release-asset filename for the host's arch/OS.
|
|
247
|
+
|
|
248
|
+
Codex publishes single-binary tar.gz assets per target triple; the
|
|
249
|
+
static musl builds are the portable Linux choice. Raises RuntimeError
|
|
250
|
+
on unsupported (OS, arch) combinations (darwin support = pre-install
|
|
251
|
+
codex on the worker or seed the cache manually).
|
|
252
|
+
"""
|
|
253
|
+
r_os = await host.run_command("uname -s")
|
|
254
|
+
os_name = (r_os.stdout or "").strip()
|
|
255
|
+
if r_os.exit_code != 0 or os_name != "Linux":
|
|
256
|
+
raise RuntimeError(
|
|
257
|
+
f"unsupported host OS {os_name!r} for codex auto-download "
|
|
258
|
+
f"(Linux musl builds only; on macOS pre-install codex or "
|
|
259
|
+
f"pre-populate the cache)."
|
|
260
|
+
)
|
|
261
|
+
r_arch = await host.run_command("uname -m")
|
|
262
|
+
arch = (r_arch.stdout or "").strip()
|
|
263
|
+
if r_arch.exit_code != 0 or arch not in {"x86_64", "aarch64"}:
|
|
264
|
+
raise RuntimeError(
|
|
265
|
+
f"unsupported host arch {arch!r} for codex auto-download. "
|
|
266
|
+
f"See https://github.com/openai/codex/releases for available "
|
|
267
|
+
f"assets."
|
|
268
|
+
)
|
|
269
|
+
return f"codex-{arch}-unknown-linux-musl.tar.gz"
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
async def _download_codex_into_cache(
|
|
273
|
+
hook_ctx: "HookContextProtocol", *, cache_dir: str, cached: str,
|
|
274
|
+
) -> None:
|
|
275
|
+
"""Download the pinned codex release tarball and install its single
|
|
276
|
+
binary member as ``<cache>/codex``.
|
|
277
|
+
|
|
278
|
+
The tarball carries exactly one file (the static binary, named by
|
|
279
|
+
triple); a tarball with any other shape is refused rather than guessed
|
|
280
|
+
at. Everything runs through Host primitives + ``hook_ctx.download_file``
|
|
281
|
+
(byte-progress in the dashboard), so it is remote-correct.
|
|
282
|
+
|
|
283
|
+
Concurrency-safe cache fill (the normal cold-cache fleet spin-up races N
|
|
284
|
+
tasks through here at once): the download tarball and the extraction
|
|
285
|
+
scratch dir carry a PER-INVOCATION token (pid + uuid), so two tasks never
|
|
286
|
+
write, extract into, or ``rm -rf`` the same private paths — the earlier
|
|
287
|
+
fixed shared paths let a concurrent task truncate the tarball, wipe a
|
|
288
|
+
freshly-extracted tree, or delete another's in-flight files. The final
|
|
289
|
+
install is an atomic rename of an already-``chmod +x``'d binary onto
|
|
290
|
+
``cached``, so a concurrent task never observes a half-installed or
|
|
291
|
+
non-executable ``<cache>/codex`` (last writer wins with identical bytes).
|
|
292
|
+
"""
|
|
293
|
+
host = hook_ctx._host
|
|
294
|
+
asset = await _detect_codex_asset_name(host)
|
|
295
|
+
url = f"{_CODEX_RELEASE_BASE}/{asset}"
|
|
296
|
+
|
|
297
|
+
mk = await host.run_command(f"mkdir -p {shlex.quote(cache_dir)}")
|
|
298
|
+
if mk.exit_code != 0:
|
|
299
|
+
raise RuntimeError(
|
|
300
|
+
f"mkdir -p {cache_dir!r} failed (exit {mk.exit_code}): "
|
|
301
|
+
f"{(mk.stderr or '').strip()[:200]}"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
token = f"{os.getpid()}-{uuid.uuid4().hex}"
|
|
305
|
+
tarball = f"{cache_dir}/.codex-download.{token}.tar.gz"
|
|
306
|
+
scratch = f"{cache_dir}/.codex-extract.{token}"
|
|
307
|
+
hook_ctx.report_progress(None, f"Downloading codex {_CODEX_VERSION} ({asset})…")
|
|
308
|
+
try:
|
|
309
|
+
await hook_ctx.download_file(url, tarball)
|
|
310
|
+
|
|
311
|
+
r = await host.run_command(
|
|
312
|
+
f"rm -rf {shlex.quote(scratch)} && mkdir -p {shlex.quote(scratch)} "
|
|
313
|
+
f"&& tar -xzf {shlex.quote(tarball)} -C {shlex.quote(scratch)}"
|
|
314
|
+
)
|
|
315
|
+
if r.exit_code != 0:
|
|
316
|
+
raise RuntimeError(
|
|
317
|
+
f"extracting codex release {asset!r} failed "
|
|
318
|
+
f"(exit {r.exit_code}): {(r.stderr or '').strip()[:200]}"
|
|
319
|
+
)
|
|
320
|
+
listing = await host.run_command(
|
|
321
|
+
f"find {shlex.quote(scratch)} -mindepth 1"
|
|
322
|
+
)
|
|
323
|
+
entries = [l for l in (listing.stdout or "").splitlines() if l.strip()]
|
|
324
|
+
if len(entries) != 1:
|
|
325
|
+
raise RuntimeError(
|
|
326
|
+
f"codex release {asset!r} must contain exactly one file; "
|
|
327
|
+
f"found {len(entries)} entries: {entries[:5]!r}. Refusing to "
|
|
328
|
+
f"guess which member is the binary."
|
|
329
|
+
)
|
|
330
|
+
# Install atomically: chmod the extracted binary in its private
|
|
331
|
+
# scratch, THEN rename onto ``cached``. rename(2) within the cache
|
|
332
|
+
# dir is atomic, so ``<cache>/codex`` flips from absent/old to a
|
|
333
|
+
# complete, already-executable binary in one step — no truncated or
|
|
334
|
+
# non-executable window for a concurrent task's [ -x cached ] check.
|
|
335
|
+
mv = await host.run_command(
|
|
336
|
+
f"chmod +x {shlex.quote(entries[0])} "
|
|
337
|
+
f"&& mv -f {shlex.quote(entries[0])} {shlex.quote(cached)}"
|
|
338
|
+
)
|
|
339
|
+
if mv.exit_code != 0:
|
|
340
|
+
raise RuntimeError(
|
|
341
|
+
f"installing codex into cache failed (exit {mv.exit_code}): "
|
|
342
|
+
f"{(mv.stderr or '').strip()[:200]}"
|
|
343
|
+
)
|
|
344
|
+
verify = await host.run_command(
|
|
345
|
+
f"[ -x {shlex.quote(cached)} ] && echo OK || true"
|
|
346
|
+
)
|
|
347
|
+
if "OK" not in (verify.stdout or ""):
|
|
348
|
+
raise RuntimeError(
|
|
349
|
+
f"codex download completed but {cached!r} is still not "
|
|
350
|
+
f"executable on the host."
|
|
351
|
+
)
|
|
352
|
+
_LOG.info(
|
|
353
|
+
"ensure_codex_installed: cache MISS -> downloaded %s", url,
|
|
354
|
+
)
|
|
355
|
+
finally:
|
|
356
|
+
await host.run_command(
|
|
357
|
+
f"rm -rf {shlex.quote(tarball)} {shlex.quote(scratch)}"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def build_host(ssh, taskdir: str) -> "Host":
|
|
362
|
+
from optio_host.host import LocalHost, RemoteHost
|
|
363
|
+
|
|
364
|
+
if ssh is None:
|
|
365
|
+
os.makedirs(taskdir, exist_ok=True)
|
|
366
|
+
host: "Host" = LocalHost(taskdir=taskdir)
|
|
367
|
+
os.makedirs(host.workdir, exist_ok=True)
|
|
368
|
+
return host
|
|
369
|
+
return RemoteHost(ssh_config=ssh, taskdir=taskdir)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _isolation_env(workdir: str) -> dict[str, str]:
|
|
373
|
+
"""Per-task HOME / CODEX_HOME / XDG identity rooted at ``<workdir>/home``."""
|
|
374
|
+
home = f"{workdir.rstrip('/')}/home"
|
|
375
|
+
return {
|
|
376
|
+
"HOME": home,
|
|
377
|
+
"CODEX_HOME": f"{home}/.codex",
|
|
378
|
+
"XDG_CONFIG_HOME": f"{home}/.config",
|
|
379
|
+
"XDG_DATA_HOME": f"{home}/.local/share",
|
|
380
|
+
"XDG_CACHE_HOME": f"{home}/.cache",
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _codex_isolation_env(host: "Host") -> dict[str, str]:
|
|
385
|
+
"""Per-task isolation env for a headless probe, derived from
|
|
386
|
+
``host.workdir`` via :func:`_isolation_env` (the single source of truth)
|
|
387
|
+
— so the probe reads the seed's planted ``home/.codex/auth.json`` under
|
|
388
|
+
the same HOME/CODEX_HOME/XDG identity as the launch.
|
|
389
|
+
|
|
390
|
+
``run_command`` replaces (not merges) the child env, so PATH is carried
|
|
391
|
+
explicitly (the worker's PATH plus the per-task ``.local/bin``) or a
|
|
392
|
+
missing interpreter/bash would break the probe."""
|
|
393
|
+
iso = _isolation_env(host.workdir)
|
|
394
|
+
base_path = os.environ.get("PATH", "/usr/local/bin:/usr/bin:/bin")
|
|
395
|
+
return {**iso, "PATH": f"{iso['HOME']}/.local/bin:{base_path}"}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
async def run_codex_probe(
|
|
399
|
+
host: "Host",
|
|
400
|
+
*,
|
|
401
|
+
codex_executable: str,
|
|
402
|
+
prompt: str,
|
|
403
|
+
timeout_s: float = 180.0,
|
|
404
|
+
) -> "tuple[str, int]":
|
|
405
|
+
"""Headless one-shot ``codex exec --json -s read-only
|
|
406
|
+
--skip-git-repo-check '<prompt>'`` under the per-task isolation env.
|
|
407
|
+
Returns (stdout, exit_code).
|
|
408
|
+
|
|
409
|
+
``exec`` mode has no approvals (hard approval_policy=never) and
|
|
410
|
+
``-s read-only`` keeps the probe from touching anything; the JSONL
|
|
411
|
+
events land on stdout. The caller's verdict is a challenge-answer match
|
|
412
|
+
on stdout; the exit code is diagnostics only.
|
|
413
|
+
|
|
414
|
+
Stage 8 note: ``-s read-only`` here is DELIBERATELY fixed — the tightest
|
|
415
|
+
posture for a throwaway credential check — and is NOT derived from the
|
|
416
|
+
task's SandboxSettings SSOT (build_sandbox_cli_args). A verify probe never
|
|
417
|
+
writes, so it needs no writable roots or network; a task-configured mode
|
|
418
|
+
would only loosen it."""
|
|
419
|
+
argv = [
|
|
420
|
+
codex_executable, "exec", "--json", "-s", "read-only",
|
|
421
|
+
"--skip-git-repo-check", prompt,
|
|
422
|
+
]
|
|
423
|
+
inner = " ".join(shlex.quote(a) for a in argv)
|
|
424
|
+
cmd = f"cd {shlex.quote(host.workdir.rstrip('/'))} && {inner}"
|
|
425
|
+
# Layer the per-task HOME/CODEX_HOME overrides on top of the ambient
|
|
426
|
+
# env, mirroring the session launch (which inherits, not ``env -i``).
|
|
427
|
+
# run_command replaces the child env, so the merge is explicit here.
|
|
428
|
+
# Provider API keys are scrubbed (see _PROBE_SCRUB_ENV_KEYS) so an
|
|
429
|
+
# ambient OPENAI_API_KEY cannot mask a dead ChatGPT-mode seed by
|
|
430
|
+
# authenticating the probe via codex's API-key fallback.
|
|
431
|
+
env = {**os.environ, **_codex_isolation_env(host)}
|
|
432
|
+
for _k in _PROBE_SCRUB_ENV_KEYS:
|
|
433
|
+
env.pop(_k, None)
|
|
434
|
+
result = await asyncio.wait_for(
|
|
435
|
+
host.run_command(f"bash -lc {shlex.quote(cmd)}", env=env),
|
|
436
|
+
timeout=timeout_s,
|
|
437
|
+
)
|
|
438
|
+
return (result.stdout or "", result.exit_code)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
async def _provision_task_home(host: "Host", *, shared_codex_path: str) -> str:
|
|
442
|
+
"""Create the per-task isolation home tree and the per-task codex path.
|
|
443
|
+
|
|
444
|
+
C1: codex must never launch into a nonexistent $HOME/$CODEX_HOME — the
|
|
445
|
+
claudecode reference guarantees the tree via its install step
|
|
446
|
+
(optio-claudecode host_actions.py:328-337); codex has no install step at
|
|
447
|
+
Stage 0, so the tree is created explicitly here.
|
|
448
|
+
|
|
449
|
+
C2: teardown pkills an anchored pattern on the codex path. That is only
|
|
450
|
+
safe when the path is unique per task (see claudecode's
|
|
451
|
+
_claude_pgrep_pattern docstring). The shared binary is therefore
|
|
452
|
+
symlinked to <workdir>/home/.local/bin/codex and launched via that
|
|
453
|
+
per-task path; the anchored pkill then reaches only this task's process.
|
|
454
|
+
|
|
455
|
+
Returns the per-task launch path.
|
|
456
|
+
"""
|
|
457
|
+
workdir = host.workdir.rstrip("/")
|
|
458
|
+
home = f"{workdir}/home"
|
|
459
|
+
bin_dir = f"{home}/.local/bin"
|
|
460
|
+
per_task_codex = f"{bin_dir}/codex"
|
|
461
|
+
dirs = [
|
|
462
|
+
f"{home}/.codex",
|
|
463
|
+
bin_dir,
|
|
464
|
+
f"{home}/.config",
|
|
465
|
+
f"{home}/.local/share",
|
|
466
|
+
f"{home}/.cache",
|
|
467
|
+
]
|
|
468
|
+
quoted = " ".join(shlex.quote(d) for d in dirs)
|
|
469
|
+
r = await host.run_command(f"mkdir -p {quoted}")
|
|
470
|
+
if r.exit_code != 0:
|
|
471
|
+
raise RuntimeError(
|
|
472
|
+
f"per-task home provisioning (mkdir -p) failed "
|
|
473
|
+
f"(exit {r.exit_code}): {r.stderr.strip()[:200]}"
|
|
474
|
+
)
|
|
475
|
+
r = await host.run_command(
|
|
476
|
+
f"ln -sfn {shlex.quote(shared_codex_path)} {shlex.quote(per_task_codex)}"
|
|
477
|
+
)
|
|
478
|
+
if r.exit_code != 0:
|
|
479
|
+
raise RuntimeError(
|
|
480
|
+
f"per-task codex symlink failed (exit {r.exit_code}): "
|
|
481
|
+
f"{r.stderr.strip()[:200]}"
|
|
482
|
+
)
|
|
483
|
+
return per_task_codex
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
async def ensure_workdir_trusted(host: "Host") -> None:
|
|
487
|
+
"""Ensure ``home/.codex/config.toml`` pre-trusts this task's workdir.
|
|
488
|
+
|
|
489
|
+
Codex gates operation on per-directory trust recorded as
|
|
490
|
+
``[projects."<dir>"] trust_level = "trusted"`` in config.toml. A seeded
|
|
491
|
+
fresh workdir was never trusted by the operator, so the session's
|
|
492
|
+
``_prepare`` calls this right after ``merge_seed`` (the design doc's
|
|
493
|
+
"post-merge edit" decision — the entry is cwd-dependent, so it cannot
|
|
494
|
+
live in the cwd-independent seed blob or a manifest transform).
|
|
495
|
+
|
|
496
|
+
Deliberately minimal and idempotent: append the entry only when the
|
|
497
|
+
exact ``[projects."<workdir>"]`` header is absent; never rewrite or
|
|
498
|
+
reorder the rest of the file (codex itself rewrites config.toml at
|
|
499
|
+
runtime — optio must not fight it). Also safe when the seed carried no
|
|
500
|
+
config.toml at all (the file is created).
|
|
501
|
+
"""
|
|
502
|
+
workdir = host.workdir.rstrip("/")
|
|
503
|
+
config_rel = "home/.codex/config.toml"
|
|
504
|
+
config_abs = f"{workdir}/{config_rel}"
|
|
505
|
+
header = f'[projects."{workdir}"]'
|
|
506
|
+
try:
|
|
507
|
+
current = (await host.fetch_bytes_from_host(config_abs)).decode("utf-8")
|
|
508
|
+
except FileNotFoundError:
|
|
509
|
+
current = ""
|
|
510
|
+
if header in current:
|
|
511
|
+
return
|
|
512
|
+
entry = f'{header}\ntrust_level = "trusted"\n'
|
|
513
|
+
if current and not current.endswith("\n"):
|
|
514
|
+
current += "\n"
|
|
515
|
+
# host.write_text is workdir-relative and creates parent dirs itself
|
|
516
|
+
# (LocalHost/RemoteHost both os.makedirs / mkdir -p the parent), so no
|
|
517
|
+
# explicit mkdir is needed; keep the whole-file write (small file).
|
|
518
|
+
await host.write_text(config_rel, current + entry)
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def _build_codex_shell_command(
|
|
522
|
+
*,
|
|
523
|
+
codex_path: str,
|
|
524
|
+
workdir: str,
|
|
525
|
+
extra_env: dict[str, str] | None,
|
|
526
|
+
codex_flags: list[str],
|
|
527
|
+
) -> tuple[list[str], str]:
|
|
528
|
+
workdir_clean = workdir.rstrip("/")
|
|
529
|
+
iso = _isolation_env(workdir_clean)
|
|
530
|
+
home_dir = iso["HOME"]
|
|
531
|
+
home_local_bin = f"{home_dir}/.local/bin"
|
|
532
|
+
|
|
533
|
+
extra = dict(extra_env or {})
|
|
534
|
+
# PATH is composed on the HOST inside the bash payload (below), never
|
|
535
|
+
# baked in from the engine's os.environ — the command may run on a
|
|
536
|
+
# remote worker whose PATH differs. Deliberate divergence from the
|
|
537
|
+
# claudecode template, which still bakes the engine PATH in.
|
|
538
|
+
path_override = extra.pop("PATH", None)
|
|
539
|
+
env_assignments: list[str] = [f"{k}={v}" for k, v in iso.items()]
|
|
540
|
+
for k, v in extra.items():
|
|
541
|
+
env_assignments.append(f"{k}={v}")
|
|
542
|
+
|
|
543
|
+
codex_argv = " ".join(shlex.quote(c) for c in [codex_path, *codex_flags])
|
|
544
|
+
log_path = f"{workdir_clean}/optio.log"
|
|
545
|
+
|
|
546
|
+
if path_override is not None:
|
|
547
|
+
path_expr = (
|
|
548
|
+
f"export PATH={shlex.quote(f'{home_local_bin}:{path_override}')}; "
|
|
549
|
+
)
|
|
550
|
+
else:
|
|
551
|
+
path_expr = f'export PATH={shlex.quote(home_local_bin)}:"$PATH"; '
|
|
552
|
+
bash_payload = (
|
|
553
|
+
f"{path_expr}"
|
|
554
|
+
f"cd {shlex.quote(workdir_clean)} && {codex_argv}; rc=$?; "
|
|
555
|
+
f'if [ "$rc" = 0 ]; then echo DONE >> {shlex.quote(log_path)}; '
|
|
556
|
+
f"else printf 'ERROR: codex exited %s\\n' \"$rc\" >> {shlex.quote(log_path)}; fi"
|
|
557
|
+
)
|
|
558
|
+
shell_command = "env " + " ".join(
|
|
559
|
+
shlex.quote(x) for x in [*env_assignments, "bash", "-c", bash_payload]
|
|
560
|
+
)
|
|
561
|
+
return env_assignments, shell_command
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def build_codex_flags(
|
|
565
|
+
*,
|
|
566
|
+
model: str | None,
|
|
567
|
+
ask_for_approval: str = "never",
|
|
568
|
+
sandbox_args: list[str],
|
|
569
|
+
) -> list[str]:
|
|
570
|
+
"""Translate CodexTaskConfig knobs to an interactive ``codex`` argv list.
|
|
571
|
+
|
|
572
|
+
``sandbox_args`` come pre-rendered from
|
|
573
|
+
``fs_allowlist.build_sandbox_cli_args`` — the settings SSOT; this
|
|
574
|
+
function stays the single argv-composition seam.
|
|
575
|
+
"""
|
|
576
|
+
out: list[str] = ["--ask-for-approval", ask_for_approval, *sandbox_args]
|
|
577
|
+
if model:
|
|
578
|
+
out += ["--model", model]
|
|
579
|
+
return out
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
AUTO_START_PROMPT = "Read AGENTS.md and execute the task it describes"
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def build_auto_start_args(
|
|
586
|
+
*, auto_start: bool, resuming: bool = False, prompt: str = AUTO_START_PROMPT,
|
|
587
|
+
) -> list[str]:
|
|
588
|
+
"""Trailing positional prompt for an auto-start FRESH launch.
|
|
589
|
+
|
|
590
|
+
Returns ``[prompt]`` when ``auto_start`` and not ``resuming``; empty
|
|
591
|
+
otherwise. On resume the session continues via ``codex resume <id>``
|
|
592
|
+
and no positional is appended: re-issuing the kickoff prompt would
|
|
593
|
+
enqueue a duplicate task on top of the resumed conversation.
|
|
594
|
+
"""
|
|
595
|
+
return [prompt] if (auto_start and not resuming) else []
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def build_resume_notice_args(*, resuming: bool) -> list[str]:
|
|
599
|
+
"""Trailing positional that notifies a resumed codex TUI session.
|
|
600
|
+
|
|
601
|
+
Returns ``[f"{SYSTEM_MESSAGE_PREFIX}{RESUME_NOTICE}"]`` on resume (codex
|
|
602
|
+
relaunches with the ``resume <id>`` subcommand, so a trailing positional is
|
|
603
|
+
processed as the resumed session's first turn — mirrors claudecode's
|
|
604
|
+
``claude --continue '<text>'`` and grok's ``grok -c '<text>'``). Empty on a
|
|
605
|
+
fresh launch. This is the PUSH half of resume awareness — it makes codex
|
|
606
|
+
notice the resume promptly; ``resume.log`` remains the pull-based source of
|
|
607
|
+
truth. Codex is taught the ``System:`` convention in BOTH protocol modes
|
|
608
|
+
(the keyword docs when ``host_protocol=True``; ``_SYSTEM_PREFIX_EXPLAINER``
|
|
609
|
+
when ``False``; plus the resume section's own ``System:`` note whenever
|
|
610
|
+
``supports_resume=True``), so — like grok — no ``host_protocol`` gate is
|
|
611
|
+
needed. Mutually exclusive with :func:`build_auto_start_args` (auto_start
|
|
612
|
+
fires only on a FRESH launch; the notice only on a RESUME).
|
|
613
|
+
"""
|
|
614
|
+
return [f"{SYSTEM_MESSAGE_PREFIX}{RESUME_NOTICE}"] if resuming else []
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
async def _ttyd_present(host: "Host", ttyd_path: str) -> bool:
|
|
618
|
+
cmd = f"[ -x {shlex.quote(ttyd_path)} ] && {shlex.quote(ttyd_path)} --version"
|
|
619
|
+
result = await host.run_command(cmd)
|
|
620
|
+
blob = (result.stdout or "") + (result.stderr or "")
|
|
621
|
+
return result.exit_code == 0 and "ttyd" in blob.lower()
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
async def _detect_ttyd_asset_name(host: "Host") -> str:
|
|
625
|
+
r_arch = await host.run_command("uname -m")
|
|
626
|
+
if r_arch.exit_code != 0:
|
|
627
|
+
raise RuntimeError(
|
|
628
|
+
f"uname -m failed on host (exit {r_arch.exit_code}): "
|
|
629
|
+
f"{r_arch.stderr.strip()[:200]}"
|
|
630
|
+
)
|
|
631
|
+
arch = r_arch.stdout.strip()
|
|
632
|
+
r_os = await host.run_command("uname -s")
|
|
633
|
+
if r_os.exit_code != 0:
|
|
634
|
+
raise RuntimeError(
|
|
635
|
+
f"uname -s failed on host (exit {r_os.exit_code}): "
|
|
636
|
+
f"{r_os.stderr.strip()[:200]}"
|
|
637
|
+
)
|
|
638
|
+
os_name = r_os.stdout.strip()
|
|
639
|
+
if os_name != "Linux":
|
|
640
|
+
raise RuntimeError(
|
|
641
|
+
f"unsupported host OS {os_name!r} for ttyd auto-install "
|
|
642
|
+
f"(v1 supports Linux only)."
|
|
643
|
+
)
|
|
644
|
+
if arch not in {"x86_64", "aarch64", "armv7l"}:
|
|
645
|
+
raise RuntimeError(
|
|
646
|
+
f"unsupported host arch {arch!r} for ttyd auto-install."
|
|
647
|
+
)
|
|
648
|
+
return f"ttyd.{arch}"
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
async def ensure_ttyd_installed(
|
|
652
|
+
hook_ctx: "HookContextProtocol",
|
|
653
|
+
*,
|
|
654
|
+
install_if_missing: bool = True,
|
|
655
|
+
install_dir: str | None = None,
|
|
656
|
+
) -> str:
|
|
657
|
+
host = hook_ctx._host
|
|
658
|
+
resolved_install_dir = await _resolve_install_dir(host, install_dir)
|
|
659
|
+
ttyd_path = f"{resolved_install_dir}/ttyd"
|
|
660
|
+
|
|
661
|
+
hook_ctx.report_progress(None, "Checking ttyd installation…")
|
|
662
|
+
if await _ttyd_present(host, ttyd_path):
|
|
663
|
+
return ttyd_path
|
|
664
|
+
|
|
665
|
+
if not install_if_missing:
|
|
666
|
+
raise RuntimeError(
|
|
667
|
+
f"ttyd not present at {ttyd_path!r} on host and "
|
|
668
|
+
f"install_ttyd_if_missing=False; nothing to do."
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
hook_ctx.report_progress(None, "Detecting ttyd release asset…")
|
|
672
|
+
asset = await _detect_ttyd_asset_name(host)
|
|
673
|
+
url = f"{_TTYD_RELEASE_BASE}/{asset}"
|
|
674
|
+
|
|
675
|
+
r = await host.run_command(f"mkdir -p {shlex.quote(resolved_install_dir)}")
|
|
676
|
+
if r.exit_code != 0:
|
|
677
|
+
raise RuntimeError(
|
|
678
|
+
f"mkdir -p {resolved_install_dir!r} failed (exit {r.exit_code}): "
|
|
679
|
+
f"{r.stderr.strip()[:200]}"
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
hook_ctx.report_progress(None, f"Downloading ttyd ({asset})…")
|
|
683
|
+
await hook_ctx.download_file(url, ttyd_path)
|
|
684
|
+
|
|
685
|
+
r = await host.run_command(f"chmod +x {shlex.quote(ttyd_path)}")
|
|
686
|
+
if r.exit_code != 0:
|
|
687
|
+
raise RuntimeError(
|
|
688
|
+
f"chmod +x {ttyd_path!r} failed (exit {r.exit_code}): "
|
|
689
|
+
f"{r.stderr.strip()[:200]}"
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
if not await _ttyd_present(host, ttyd_path):
|
|
693
|
+
raise RuntimeError(
|
|
694
|
+
f"ttyd install completed but {ttyd_path!r} is still not "
|
|
695
|
+
f"executable on the host."
|
|
696
|
+
)
|
|
697
|
+
return ttyd_path
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def build_tmux_session_argv(
|
|
701
|
+
*,
|
|
702
|
+
tmux_path: str,
|
|
703
|
+
codex_path: str,
|
|
704
|
+
workdir: str,
|
|
705
|
+
socket_path: str,
|
|
706
|
+
session_name: str,
|
|
707
|
+
extra_env: dict[str, str] | None,
|
|
708
|
+
codex_flags: list[str],
|
|
709
|
+
) -> list[str]:
|
|
710
|
+
_, shell_command = _build_codex_shell_command(
|
|
711
|
+
codex_path=codex_path,
|
|
712
|
+
workdir=workdir,
|
|
713
|
+
extra_env=extra_env,
|
|
714
|
+
codex_flags=codex_flags,
|
|
715
|
+
)
|
|
716
|
+
return [
|
|
717
|
+
tmux_path, "-S", socket_path, "new-session", "-d",
|
|
718
|
+
"-s", session_name, "-x", "200", "-y", "50",
|
|
719
|
+
shell_command,
|
|
720
|
+
]
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def build_ttyd_attach_argv(
|
|
724
|
+
*,
|
|
725
|
+
ttyd_path: str,
|
|
726
|
+
tmux_path: str,
|
|
727
|
+
socket_path: str,
|
|
728
|
+
session_name: str,
|
|
729
|
+
bind_iface: str,
|
|
730
|
+
port: int,
|
|
731
|
+
) -> list[str]:
|
|
732
|
+
return [
|
|
733
|
+
ttyd_path, "-W",
|
|
734
|
+
"-i", bind_iface,
|
|
735
|
+
"-p", str(port),
|
|
736
|
+
"-t", "disableLeaveAlert=true",
|
|
737
|
+
"-T", "xterm-256color",
|
|
738
|
+
"--",
|
|
739
|
+
tmux_path, "-S", socket_path, "attach", "-t", session_name,
|
|
740
|
+
]
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def _tmux_socket_path(host: "Host") -> str:
|
|
744
|
+
digest = hashlib.sha256(host.workdir.encode("utf-8")).hexdigest()[:16]
|
|
745
|
+
return f"/tmp/optio-cx-{digest}.sock"
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
async def _require_tmux(host: "Host") -> str:
|
|
749
|
+
result = await host.run_command("bash -lc 'command -v tmux'")
|
|
750
|
+
path = (result.stdout or "").strip()
|
|
751
|
+
if result.exit_code != 0 or not path:
|
|
752
|
+
raise RuntimeError(
|
|
753
|
+
"tmux is required on the worker for optio-codex (codex runs inside a "
|
|
754
|
+
"detached tmux session). Install tmux or add it to the worker image."
|
|
755
|
+
)
|
|
756
|
+
return path
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
async def _launch_detached_checked(
|
|
760
|
+
host: "Host", cmd: str, *, env_remove: list[str] | None, what: str,
|
|
761
|
+
) -> list[str]:
|
|
762
|
+
handle = await host.launch_subprocess(cmd, env_remove=env_remove)
|
|
763
|
+
out: list[str] = []
|
|
764
|
+
async for raw in handle.stdout:
|
|
765
|
+
out.append(
|
|
766
|
+
raw.decode("utf-8", errors="replace")
|
|
767
|
+
if isinstance(raw, bytes) else str(raw)
|
|
768
|
+
)
|
|
769
|
+
code = await proc_wait(handle)
|
|
770
|
+
if code != 0:
|
|
771
|
+
raise RuntimeError(f"{what} failed (exit {code}): {''.join(out).strip()[:500]}")
|
|
772
|
+
return out
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
async def launch_ttyd_with_codex(
|
|
776
|
+
host: "Host",
|
|
777
|
+
*,
|
|
778
|
+
ttyd_path: str,
|
|
779
|
+
codex_path: str,
|
|
780
|
+
bind_iface: str,
|
|
781
|
+
extra_env: dict[str, str] | None,
|
|
782
|
+
codex_flags: list[str],
|
|
783
|
+
ready_timeout_s: float = 30.0,
|
|
784
|
+
env_remove: list[str] | None = None,
|
|
785
|
+
session_name: str = "optio",
|
|
786
|
+
) -> "tuple[ProcessHandle, str, int, str, str]":
|
|
787
|
+
"""Returns ``(ttyd_handle, tmux_path, port, socket_path, session_name)``."""
|
|
788
|
+
tmux_path = await _require_tmux(host)
|
|
789
|
+
socket_path = _tmux_socket_path(host)
|
|
790
|
+
|
|
791
|
+
session_argv = build_tmux_session_argv(
|
|
792
|
+
tmux_path=tmux_path,
|
|
793
|
+
codex_path=codex_path,
|
|
794
|
+
workdir=host.workdir,
|
|
795
|
+
socket_path=socket_path,
|
|
796
|
+
session_name=session_name,
|
|
797
|
+
extra_env=extra_env,
|
|
798
|
+
codex_flags=codex_flags,
|
|
799
|
+
)
|
|
800
|
+
session_cmd = " ".join(shlex.quote(a) for a in session_argv)
|
|
801
|
+
await _launch_detached_checked(
|
|
802
|
+
host, session_cmd, env_remove=env_remove, what="tmux new-session",
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
ttyd_argv = build_ttyd_attach_argv(
|
|
806
|
+
ttyd_path=ttyd_path,
|
|
807
|
+
tmux_path=tmux_path,
|
|
808
|
+
socket_path=socket_path,
|
|
809
|
+
session_name=session_name,
|
|
810
|
+
bind_iface=bind_iface,
|
|
811
|
+
port=0,
|
|
812
|
+
)
|
|
813
|
+
command = " ".join(shlex.quote(a) for a in ttyd_argv)
|
|
814
|
+
handle = await host.launch_subprocess(command)
|
|
815
|
+
|
|
816
|
+
async def _read_port() -> int:
|
|
817
|
+
async for raw in handle.stdout:
|
|
818
|
+
line = raw.decode("utf-8", errors="replace").rstrip() if isinstance(raw, bytes) else str(raw).rstrip()
|
|
819
|
+
m = _TTYD_READY_RE.search(line)
|
|
820
|
+
if m:
|
|
821
|
+
port_str = m.group(1) or m.group(2)
|
|
822
|
+
return int(port_str)
|
|
823
|
+
raise RuntimeError("ttyd exited before printing a listening URL")
|
|
824
|
+
|
|
825
|
+
try:
|
|
826
|
+
port = await asyncio.wait_for(_read_port(), timeout=ready_timeout_s)
|
|
827
|
+
except asyncio.TimeoutError:
|
|
828
|
+
await host.terminate_subprocess(handle, aggressive=True)
|
|
829
|
+
await _kill_tmux_session(host, tmux_path, socket_path, session_name)
|
|
830
|
+
raise TimeoutError(
|
|
831
|
+
f"ttyd did not print a listening URL within {ready_timeout_s}s"
|
|
832
|
+
)
|
|
833
|
+
except BaseException:
|
|
834
|
+
await host.terminate_subprocess(handle, aggressive=True)
|
|
835
|
+
await _kill_tmux_session(host, tmux_path, socket_path, session_name)
|
|
836
|
+
raise
|
|
837
|
+
return handle, tmux_path, port, socket_path, session_name
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
async def _kill_tmux_session(
|
|
841
|
+
host: "Host", tmux_path: str, socket_path: str, session_name: str,
|
|
842
|
+
) -> None:
|
|
843
|
+
try:
|
|
844
|
+
await host.run_command(
|
|
845
|
+
f"{shlex.quote(tmux_path)} -S {shlex.quote(socket_path)} "
|
|
846
|
+
f"kill-session -t {shlex.quote(session_name)}"
|
|
847
|
+
)
|
|
848
|
+
except Exception: # noqa: BLE001
|
|
849
|
+
_LOG.exception("tmux kill-session failed (socket=%s)", socket_path)
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def _socket_pkill_pattern(socket_path: str) -> str:
|
|
853
|
+
"""Anchored pkill -f pattern matching the orphan ttyd carrying
|
|
854
|
+
``socket_path`` in its cmdline (``ttyd … -- tmux -S <socket> attach``).
|
|
855
|
+
``[t]tyd`` keeps pkill's own argv from self-matching; the verbatim
|
|
856
|
+
socket path scopes the match to this task's private socket."""
|
|
857
|
+
if not socket_path:
|
|
858
|
+
return socket_path
|
|
859
|
+
return f"[t]tyd.*{socket_path}"
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
async def _kill_ttyd_by_socket(host: "Host", socket_path: str) -> None:
|
|
863
|
+
"""Reap a detached orphan ttyd that has no tracked launch handle.
|
|
864
|
+
|
|
865
|
+
Normal teardown kills ttyd via ``terminate_subprocess(handle)``; a crash
|
|
866
|
+
orphan's ttyd is re-parented to init with no handle, so it is reaped
|
|
867
|
+
host-side by an anchored ``pkill -f`` on its private socket path.
|
|
868
|
+
Best-effort: pkill exits non-zero when nothing matches."""
|
|
869
|
+
pattern = _socket_pkill_pattern(socket_path)
|
|
870
|
+
await host.run_command(f"pkill -KILL -f {shlex.quote(pattern)} || true")
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _codex_pgrep_pattern(codex_path: str) -> str:
|
|
874
|
+
body = (
|
|
875
|
+
codex_path[:-5] + "[c]odex" if codex_path.endswith("codex") else codex_path
|
|
876
|
+
)
|
|
877
|
+
return "^" + body
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
async def kill_codex_processes(
|
|
881
|
+
host: "Host", codex_path: str, *, signal: str = "KILL",
|
|
882
|
+
) -> None:
|
|
883
|
+
pattern = _codex_pgrep_pattern(codex_path)
|
|
884
|
+
await host.run_command(f"pkill -{signal} -f {shlex.quote(pattern)} || true")
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
async def await_codex_gone(
|
|
888
|
+
host: "Host", codex_path: str, *, timeout_s: float = 15.0, poll_s: float = 1.0,
|
|
889
|
+
) -> bool:
|
|
890
|
+
pattern = _codex_pgrep_pattern(codex_path)
|
|
891
|
+
waited = 0.0
|
|
892
|
+
while True:
|
|
893
|
+
r = await host.run_command(f"pgrep -f {shlex.quote(pattern)} || true")
|
|
894
|
+
if not (r.stdout or "").strip():
|
|
895
|
+
return True
|
|
896
|
+
if waited >= timeout_s:
|
|
897
|
+
_LOG.warning(
|
|
898
|
+
"await_codex_gone: codex still running after %.0fs (path=%s); "
|
|
899
|
+
"proceeding anyway", timeout_s, codex_path,
|
|
900
|
+
)
|
|
901
|
+
return False
|
|
902
|
+
await asyncio.sleep(poll_s)
|
|
903
|
+
waited += poll_s
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
async def teardown_session_tree(
|
|
907
|
+
host: "Host",
|
|
908
|
+
*,
|
|
909
|
+
tmux_path: str,
|
|
910
|
+
tmux_socket: str,
|
|
911
|
+
tmux_session: str,
|
|
912
|
+
codex_path: str,
|
|
913
|
+
ttyd_handle: "ProcessHandle | None" = None,
|
|
914
|
+
aggressive: bool,
|
|
915
|
+
) -> None:
|
|
916
|
+
if ttyd_handle is not None:
|
|
917
|
+
try:
|
|
918
|
+
await host.terminate_subprocess(ttyd_handle, aggressive=aggressive)
|
|
919
|
+
except Exception:
|
|
920
|
+
_LOG.exception("terminate_subprocess (ttyd) failed")
|
|
921
|
+
else:
|
|
922
|
+
try:
|
|
923
|
+
await _kill_ttyd_by_socket(host, tmux_socket)
|
|
924
|
+
except Exception:
|
|
925
|
+
_LOG.exception("orphan ttyd reap failed (socket=%s)", tmux_socket)
|
|
926
|
+
|
|
927
|
+
try:
|
|
928
|
+
await _kill_tmux_session(host, tmux_path, tmux_socket, tmux_session)
|
|
929
|
+
except Exception:
|
|
930
|
+
_LOG.exception("tmux session teardown failed")
|
|
931
|
+
|
|
932
|
+
try:
|
|
933
|
+
await kill_codex_processes(host, codex_path)
|
|
934
|
+
except Exception:
|
|
935
|
+
_LOG.exception("kill_codex_processes failed")
|
|
936
|
+
|
|
937
|
+
try:
|
|
938
|
+
await await_codex_gone(host, codex_path)
|
|
939
|
+
except Exception:
|
|
940
|
+
_LOG.exception("await_codex_gone failed; proceeding")
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
async def tmux_session_alive(
|
|
944
|
+
host: "Host", tmux_path: str, socket_path: str, session_name: str,
|
|
945
|
+
) -> bool:
|
|
946
|
+
r = await host.run_command(
|
|
947
|
+
f"{shlex.quote(tmux_path)} -S {shlex.quote(socket_path)} "
|
|
948
|
+
f"has-session -t {shlex.quote(session_name)}"
|
|
949
|
+
)
|
|
950
|
+
return r.exit_code == 0
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
async def send_text_to_codex(
|
|
954
|
+
host: "Host", tmux_path: str, tmux_socket: str, tmux_session: str, text: str,
|
|
955
|
+
) -> None:
|
|
956
|
+
s = shlex.quote(tmux_socket)
|
|
957
|
+
sess = shlex.quote(tmux_session)
|
|
958
|
+
tp = shlex.quote(tmux_path)
|
|
959
|
+
buf = "optio-feedback"
|
|
960
|
+
cmd = (
|
|
961
|
+
f"{tp} -S {s} set-buffer -b {buf} -- {shlex.quote(text)} && "
|
|
962
|
+
f"{tp} -S {s} paste-buffer -d -b {buf} -t {sess} && "
|
|
963
|
+
f"sleep {_SUBMIT_SETTLE_S} && "
|
|
964
|
+
f"{tp} -S {s} send-keys -t {sess} Enter"
|
|
965
|
+
)
|
|
966
|
+
result = await host.run_command(cmd)
|
|
967
|
+
if result.exit_code != 0:
|
|
968
|
+
raise RuntimeError(
|
|
969
|
+
f"send_text_to_codex: tmux injection failed "
|
|
970
|
+
f"(exit {result.exit_code}): {result.stderr!r}"
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
# --- resume bookkeeping (Stage 2; adapted from optio-grok) ------------------
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
# codex rollout filenames: ``rollout-<timestamp>-<uuid>.jsonl`` under
|
|
978
|
+
# ``$CODEX_HOME/sessions/YYYY/MM/DD/``. The UUID (v7 in real codex; any UUID
|
|
979
|
+
# shape accepted here) is the session id ``codex resume`` takes.
|
|
980
|
+
_ROLLOUT_UUID_RE = re.compile(
|
|
981
|
+
r"rollout-.*-([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}"
|
|
982
|
+
r"-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})\.jsonl$"
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
async def read_latest_session_id(host: "Host") -> str | None:
|
|
987
|
+
"""Session id of the newest rollout under ``<workdir>/home/.codex/sessions``.
|
|
988
|
+
|
|
989
|
+
Newest by FILENAME (lexicographic): rollout names embed an ISO-ordered
|
|
990
|
+
timestamp, so a name sort IS a chronological sort — and unlike mtime it
|
|
991
|
+
survives a workdir tar restore. Returns None when no rollout exists yet
|
|
992
|
+
(codex never persisted a session). The derived sqlite index is
|
|
993
|
+
deliberately not consulted: it is excluded from snapshots (absolute
|
|
994
|
+
rollout paths) and codex rebuilds it from the rollout files.
|
|
995
|
+
"""
|
|
996
|
+
sessions_dir = f"{host.workdir.rstrip('/')}/home/.codex/sessions"
|
|
997
|
+
r = await host.run_command(
|
|
998
|
+
f"find {shlex.quote(sessions_dir)} -type f -name 'rollout-*.jsonl' "
|
|
999
|
+
f"2>/dev/null | sort | tail -n 1"
|
|
1000
|
+
)
|
|
1001
|
+
newest = (r.stdout or "").strip()
|
|
1002
|
+
if not newest:
|
|
1003
|
+
return None
|
|
1004
|
+
m = _ROLLOUT_UUID_RE.search(newest)
|
|
1005
|
+
if m is None:
|
|
1006
|
+
_LOG.warning(
|
|
1007
|
+
"read_latest_session_id: unparseable rollout filename %r", newest,
|
|
1008
|
+
)
|
|
1009
|
+
return None
|
|
1010
|
+
return m.group(1)
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def build_resume_args(session_id: str | None) -> list[str]:
|
|
1014
|
+
"""Leading argv for relaunching into a recorded session.
|
|
1015
|
+
|
|
1016
|
+
``resume`` is a codex SUBCOMMAND: it and the explicit session id must
|
|
1017
|
+
PRECEDE every flag — ``codex resume <id> [flags]``. Never
|
|
1018
|
+
``resume --last``: it is cwd-filtered and silently starts a NEW session
|
|
1019
|
+
on a miss (design-doc probe), so resume is always by explicit id.
|
|
1020
|
+
Returns ``[]`` when ``session_id`` is None (fresh launch).
|
|
1021
|
+
"""
|
|
1022
|
+
return ["resume", session_id] if session_id else []
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
async def _rotate_optio_log(host: "Host") -> None:
|
|
1026
|
+
"""Append the restored optio.log to optio.log.old, then truncate it.
|
|
1027
|
+
|
|
1028
|
+
Preserves historical log content across consecutive resumes while
|
|
1029
|
+
ensuring the tail driver only sees fresh lines from the resumed run (a
|
|
1030
|
+
stale DONE/ERROR carried in the restored log would otherwise be replayed
|
|
1031
|
+
and end the session immediately).
|
|
1032
|
+
"""
|
|
1033
|
+
workdir = host.workdir.rstrip("/")
|
|
1034
|
+
log_abs = f"{workdir}/optio.log"
|
|
1035
|
+
old_abs = f"{workdir}/optio.log.old"
|
|
1036
|
+
try:
|
|
1037
|
+
current = (await host.fetch_bytes_from_host(log_abs)).decode("utf-8")
|
|
1038
|
+
except FileNotFoundError:
|
|
1039
|
+
current = ""
|
|
1040
|
+
if not current:
|
|
1041
|
+
await host.write_text("optio.log", "")
|
|
1042
|
+
return
|
|
1043
|
+
try:
|
|
1044
|
+
existing_old = (await host.fetch_bytes_from_host(old_abs)).decode("utf-8")
|
|
1045
|
+
except FileNotFoundError:
|
|
1046
|
+
existing_old = ""
|
|
1047
|
+
await host.write_text("optio.log.old", existing_old + current)
|
|
1048
|
+
await host.write_text("optio.log", "")
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
async def _append_resume_log_entry(
|
|
1052
|
+
host: "Host", *, refreshed: list[str] | None = None,
|
|
1053
|
+
) -> None:
|
|
1054
|
+
"""Append one line to ``<workdir>/resume.log``.
|
|
1055
|
+
|
|
1056
|
+
Line format: ``<ISO 8601 UTC timestamp>[ REFRESHED:<comma-separated names>]``.
|
|
1057
|
+
The first line is the original launch; each later line marks a resume.
|
|
1058
|
+
The caller gates this on ``config.supports_resume``.
|
|
1059
|
+
"""
|
|
1060
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
1061
|
+
line = f"{ts} REFRESHED:{','.join(refreshed)}" if refreshed else ts
|
|
1062
|
+
target = f"{host.workdir.rstrip('/')}/resume.log"
|
|
1063
|
+
result = await host.run_command(
|
|
1064
|
+
f"echo {shlex.quote(line)} >> {shlex.quote(target)}"
|
|
1065
|
+
)
|
|
1066
|
+
if result.exit_code != 0:
|
|
1067
|
+
raise RuntimeError(
|
|
1068
|
+
f"failed to append to resume.log: exit {result.exit_code}: "
|
|
1069
|
+
f"{result.stderr!r}"
|
|
1070
|
+
)
|