optio-opencode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optio_opencode/__init__.py +38 -0
- optio_opencode/host_actions.py +432 -0
- optio_opencode/prompt.py +153 -0
- optio_opencode/session.py +552 -0
- optio_opencode/snapshots.py +101 -0
- optio_opencode/types.py +55 -0
- optio_opencode-0.1.0.dist-info/METADATA +84 -0
- optio_opencode-0.1.0.dist-info/RECORD +10 -0
- optio_opencode-0.1.0.dist-info/WHEEL +5 -0
- optio_opencode-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
"""The state machine that runs one optio-opencode session.
|
|
2
|
+
|
|
3
|
+
Orchestrates a Host (local or remote) through the lifecycle described in
|
|
4
|
+
Section 4 of the design spec. The public entry point is the factory
|
|
5
|
+
``create_opencode_task(...)`` which wraps ``run_opencode_session`` in a
|
|
6
|
+
``TaskInstance`` and sets ``ui_widget="iframe"``.
|
|
7
|
+
|
|
8
|
+
Most of the per-session work is generic log/deliverables protocol
|
|
9
|
+
plumbing (parse ``optio.log``, fetch deliverables, watch for cancel) and
|
|
10
|
+
lives in ``optio_host.protocol.run_log_protocol_session``. This module
|
|
11
|
+
keeps only the opencode-specific work — write AGENTS.md / opencode.json,
|
|
12
|
+
install/launch the opencode binary, set up tunnel and widget, and the
|
|
13
|
+
resume/snapshot brackets around the protocol session.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import base64
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
import os
|
|
23
|
+
import secrets
|
|
24
|
+
import shlex
|
|
25
|
+
import tempfile
|
|
26
|
+
from datetime import datetime, timezone
|
|
27
|
+
from typing import AsyncIterator, Callable
|
|
28
|
+
|
|
29
|
+
from optio_core.context import ProcessContext
|
|
30
|
+
from optio_core.models import BasicAuth, TaskInstance
|
|
31
|
+
|
|
32
|
+
from optio_host.context import HookContext
|
|
33
|
+
from optio_host.host import Host, LocalHost, ProcessHandle, RemoteHost
|
|
34
|
+
from optio_host.paths import task_dir
|
|
35
|
+
from optio_host.protocol.session import _SessionFailed, run_log_protocol_session
|
|
36
|
+
from optio_opencode import host_actions
|
|
37
|
+
from optio_opencode.prompt import compose_agents_md
|
|
38
|
+
from optio_opencode.snapshots import (
|
|
39
|
+
insert_snapshot,
|
|
40
|
+
load_latest_snapshot,
|
|
41
|
+
prune_snapshots,
|
|
42
|
+
)
|
|
43
|
+
from optio_opencode.types import OpencodeTaskConfig
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
_LOG = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
READY_TIMEOUT_S = 30.0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _build_host(config: OpencodeTaskConfig, process_id: str) -> Host:
|
|
53
|
+
"""Construct the appropriate Host object for the given config.
|
|
54
|
+
|
|
55
|
+
Extracted so tests can monkeypatch ``optio_opencode.session._build_host``
|
|
56
|
+
to inject a fake host without launching real subprocesses or SSH.
|
|
57
|
+
"""
|
|
58
|
+
taskdir = task_dir(
|
|
59
|
+
ssh=config.ssh, process_id=process_id, consumer_name="optio-opencode",
|
|
60
|
+
)
|
|
61
|
+
if config.ssh is None:
|
|
62
|
+
os.makedirs(taskdir, exist_ok=True)
|
|
63
|
+
host: Host = LocalHost(taskdir=taskdir)
|
|
64
|
+
os.makedirs(host.workdir, exist_ok=True)
|
|
65
|
+
return host
|
|
66
|
+
else:
|
|
67
|
+
return RemoteHost(ssh_config=config.ssh, taskdir=taskdir)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def run_opencode_session(ctx: ProcessContext, config: OpencodeTaskConfig) -> None:
|
|
71
|
+
"""Execute function body for one optio-opencode task instance."""
|
|
72
|
+
# --- per-task filesystem layout ---------------------------------------
|
|
73
|
+
host: Host = _build_host(config, ctx.process_id)
|
|
74
|
+
taskdir = task_dir(
|
|
75
|
+
ssh=config.ssh, process_id=ctx.process_id, consumer_name="optio-opencode",
|
|
76
|
+
)
|
|
77
|
+
opencode_db = f"{taskdir}/opencode.db"
|
|
78
|
+
|
|
79
|
+
password = secrets.token_urlsafe(32)
|
|
80
|
+
cancelled = False
|
|
81
|
+
launched_handle: ProcessHandle | None = None
|
|
82
|
+
opencode_exec: str = "opencode"
|
|
83
|
+
session_id: str | None = None
|
|
84
|
+
preserved_session_id: str | None = None
|
|
85
|
+
|
|
86
|
+
# --- resume decision (BEFORE the protocol session starts) -------------
|
|
87
|
+
resume_requested = bool(getattr(ctx, "resume", False))
|
|
88
|
+
snapshot: dict | None = None
|
|
89
|
+
if resume_requested:
|
|
90
|
+
snapshot = await load_latest_snapshot(
|
|
91
|
+
ctx._db, prefix=ctx._prefix, process_id=ctx.process_id,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Connect + install BEFORE deciding fresh vs resume. The resume path
|
|
95
|
+
# needs ``opencode import`` to replay the saved session DB, which
|
|
96
|
+
# requires opencode to be installed on the host and resolved to an
|
|
97
|
+
# absolute path. Hoisting also lets the fresh path skip the redundant
|
|
98
|
+
# ``host.connect()`` later. ``setup_workdir`` is idempotent (mkdir -p)
|
|
99
|
+
# and the protocol driver still calls it again for the fresh path —
|
|
100
|
+
# harmless. Install progress reports through ``ctx``, so the
|
|
101
|
+
# dashboard sees activity from the very first step.
|
|
102
|
+
await host.connect()
|
|
103
|
+
await host.setup_workdir()
|
|
104
|
+
opencode_exec = await host_actions.ensure_opencode_installed(
|
|
105
|
+
HookContext(ctx, host),
|
|
106
|
+
install_if_missing=config.install_if_missing,
|
|
107
|
+
install_dir=config.opencode_install_dir,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Resume restore must run BEFORE the protocol session begins, so the
|
|
111
|
+
# driver's tail_task does not subscribe to the restored stale optio.log
|
|
112
|
+
# (which contains last run's DONE / ERROR events). The body below sees
|
|
113
|
+
# ``resuming`` already decided.
|
|
114
|
+
resuming = snapshot is not None
|
|
115
|
+
if resuming:
|
|
116
|
+
await host.remove_file(opencode_db)
|
|
117
|
+
try:
|
|
118
|
+
await host.restore_workdir(_stream_blob(ctx, snapshot["workdirBlobId"]))
|
|
119
|
+
session_bytes_raw = await _read_blob_bytes(ctx, snapshot["sessionBlobId"])
|
|
120
|
+
decrypt = config.session_blob_decrypt or (lambda b: b)
|
|
121
|
+
session_bytes = decrypt(session_bytes_raw)
|
|
122
|
+
await host_actions.opencode_import(
|
|
123
|
+
host, opencode_db, session_bytes,
|
|
124
|
+
opencode_executable=opencode_exec,
|
|
125
|
+
)
|
|
126
|
+
# Move the restored log channel out of the way BEFORE the
|
|
127
|
+
# protocol driver subscribes its tail. The snapshot tar
|
|
128
|
+
# includes optio.log from the previous run; without rotation,
|
|
129
|
+
# ``tail -F -n +1`` would re-emit every old DELIVERABLE /
|
|
130
|
+
# DONE / ERROR line and the resumed process would terminate
|
|
131
|
+
# within seconds of launch. Preserve the historical content
|
|
132
|
+
# by appending it to optio.log.old.
|
|
133
|
+
await _rotate_optio_log(host)
|
|
134
|
+
preserved_session_id = snapshot["sessionId"]
|
|
135
|
+
except Exception as resume_exc:
|
|
136
|
+
# If the failure was the session-blob decrypt hook raising,
|
|
137
|
+
# this indicates the snapshot was tampered with or the
|
|
138
|
+
# consumer's keypair changed. Fail loud — silently dropping
|
|
139
|
+
# to fresh-start would mask the security-relevant signal.
|
|
140
|
+
if "decrypt" in repr(resume_exc).lower() and "blob" in repr(resume_exc).lower():
|
|
141
|
+
_LOG.error(
|
|
142
|
+
"resume restore failed inside session_blob_decrypt; "
|
|
143
|
+
"refusing to fall through to fresh-start. Operator must "
|
|
144
|
+
"investigate the snapshot blob.",
|
|
145
|
+
)
|
|
146
|
+
raise
|
|
147
|
+
_LOG.exception(
|
|
148
|
+
"resume restore failed; falling back to fresh-start path "
|
|
149
|
+
"(Mongo blob preserved for inspection)",
|
|
150
|
+
)
|
|
151
|
+
await host.remove_file(opencode_db)
|
|
152
|
+
resuming = False
|
|
153
|
+
preserved_session_id = None
|
|
154
|
+
|
|
155
|
+
async def _opencode_body(host: Host, hook_ctx: HookContext) -> None:
|
|
156
|
+
"""Opencode-specific body that runs inside the protocol driver.
|
|
157
|
+
|
|
158
|
+
Captures launch state via nonlocal so the outer ``finally`` can
|
|
159
|
+
terminate the subprocess and capture the snapshot.
|
|
160
|
+
"""
|
|
161
|
+
nonlocal launched_handle, opencode_exec, session_id, preserved_session_id
|
|
162
|
+
|
|
163
|
+
if not resuming:
|
|
164
|
+
# Fresh start: the protocol driver has already created the
|
|
165
|
+
# workdir, deliverables/ subdir, and empty optio.log. Ensure
|
|
166
|
+
# any stale opencode db from a prior crashed run is gone, then
|
|
167
|
+
# write the fresh AGENTS.md and opencode.json that the agent
|
|
168
|
+
# consumes.
|
|
169
|
+
await host.remove_file(opencode_db)
|
|
170
|
+
await host.write_text(
|
|
171
|
+
"AGENTS.md",
|
|
172
|
+
compose_agents_md(
|
|
173
|
+
config.consumer_instructions,
|
|
174
|
+
workdir_exclude=config.workdir_exclude,
|
|
175
|
+
supports_resume=config.supports_resume,
|
|
176
|
+
),
|
|
177
|
+
)
|
|
178
|
+
await host.write_text(
|
|
179
|
+
"opencode.json", json.dumps(config.opencode_config, indent=2),
|
|
180
|
+
)
|
|
181
|
+
# Note: do NOT call ctx.clear_has_saved_state() here. The spec
|
|
182
|
+
# described it as "belt-and-braces", but in practice it makes
|
|
183
|
+
# `hasSavedState` track the live session rather than the durable
|
|
184
|
+
# snapshot collection. A worker crash mid-Restart would then
|
|
185
|
+
# leave hasSavedState=false even though perfectly good prior
|
|
186
|
+
# snapshots are still in Mongo, hiding the Resume affordance
|
|
187
|
+
# from the UI. The flag is now only ever flipped true by
|
|
188
|
+
# mark_has_saved_state at terminal capture; resume's stale-flag
|
|
189
|
+
# self-healing (snapshot lookup returns None → fresh-start
|
|
190
|
+
# fallback) handles the rare case where the flag is true but
|
|
191
|
+
# no snapshot exists.
|
|
192
|
+
|
|
193
|
+
if config.supports_resume:
|
|
194
|
+
await _append_resume_log_entry(host)
|
|
195
|
+
|
|
196
|
+
# opencode is already installed by run_opencode_session before
|
|
197
|
+
# this body runs (so resume restore can call opencode_import
|
|
198
|
+
# against a known-good absolute path). ``opencode_exec`` is set
|
|
199
|
+
# on the enclosing closure.
|
|
200
|
+
|
|
201
|
+
# --- before_execute hook ----------------------------------------
|
|
202
|
+
# Fires after the binary is in place and before opencode launches,
|
|
203
|
+
# so consumer hooks can ship per-task files via hook_ctx.copy_file
|
|
204
|
+
# and run setup commands via hook_ctx.run_on_host.
|
|
205
|
+
if config.before_execute is not None:
|
|
206
|
+
await config.before_execute(hook_ctx)
|
|
207
|
+
|
|
208
|
+
# --- launch ------------------------------------------------------
|
|
209
|
+
version = await host_actions.opencode_version(
|
|
210
|
+
host, opencode_executable=opencode_exec,
|
|
211
|
+
)
|
|
212
|
+
version_suffix = f" {version}" if version else ""
|
|
213
|
+
ctx.report_progress(None, f"Launching opencode{version_suffix}…")
|
|
214
|
+
handle, opencode_port = await host_actions.launch_opencode(
|
|
215
|
+
host, password,
|
|
216
|
+
ready_timeout_s=READY_TIMEOUT_S,
|
|
217
|
+
opencode_executable=opencode_exec,
|
|
218
|
+
)
|
|
219
|
+
launched_handle = handle
|
|
220
|
+
|
|
221
|
+
# --- tunnel + widget registration --------------------------------
|
|
222
|
+
worker_port = await host.establish_tunnel(opencode_port)
|
|
223
|
+
|
|
224
|
+
if preserved_session_id is not None:
|
|
225
|
+
session_id = preserved_session_id
|
|
226
|
+
else:
|
|
227
|
+
# Pre-create a single opencode session for this task instance.
|
|
228
|
+
# All dashboards that embed this widget navigate to the same
|
|
229
|
+
# session ID via the iframe URL, so concurrent viewers share
|
|
230
|
+
# live state (events over SSE) rather than each creating a fresh
|
|
231
|
+
# isolated session on load. Matches optio's mental model: one
|
|
232
|
+
# background process, N observers.
|
|
233
|
+
session_id = await _create_opencode_session(
|
|
234
|
+
worker_port, password, host.workdir,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
await ctx.set_widget_upstream(
|
|
238
|
+
f"http://127.0.0.1:{worker_port}",
|
|
239
|
+
inner_auth=BasicAuth(username="opencode", password=password),
|
|
240
|
+
)
|
|
241
|
+
# Point the iframe directly at the pre-created session so viewers
|
|
242
|
+
# skip both the project picker and the "new session" default.
|
|
243
|
+
# opencode's SPA expects the :dir router param to be a URL-safe
|
|
244
|
+
# base64 encoding of the directory path (see
|
|
245
|
+
# packages/app/src/utils/base64.ts in opencode) — NOT percent-
|
|
246
|
+
# encoding. The {widgetProxyUrl} token is resolved by the iframe
|
|
247
|
+
# widget at mount time.
|
|
248
|
+
_workdir_b64 = (
|
|
249
|
+
base64.urlsafe_b64encode(host.workdir.encode("utf-8"))
|
|
250
|
+
.decode("ascii").rstrip("=")
|
|
251
|
+
)
|
|
252
|
+
await ctx.set_widget_data({
|
|
253
|
+
"iframeSrc": f"{{widgetProxyUrl}}{_workdir_b64}/session/{session_id}",
|
|
254
|
+
"localStorageOverrides": {
|
|
255
|
+
"opencode.settings.dat:defaultServerUrl": "{widgetProxyUrl}",
|
|
256
|
+
},
|
|
257
|
+
})
|
|
258
|
+
ctx.report_progress(None, "opencode is live")
|
|
259
|
+
|
|
260
|
+
# --- await opencode subprocess exit -----------------------------
|
|
261
|
+
# The protocol driver runs this body alongside the tail dispatcher
|
|
262
|
+
# and a cancel watcher. When the user cancels, the driver cancels
|
|
263
|
+
# this body's task; when the agent emits DONE/ERROR, the driver
|
|
264
|
+
# returns / raises and again cancels this body. In either case the
|
|
265
|
+
# await below is interrupted via CancelledError before proc exits.
|
|
266
|
+
# If, however, opencode exits on its own without emitting DONE
|
|
267
|
+
# first, the body returns normally and the driver detects this as
|
|
268
|
+
# "premature body exit" and raises _SessionFailed.
|
|
269
|
+
proc = launched_handle.pid_like
|
|
270
|
+
await proc.wait() # type: ignore[union-attr]
|
|
271
|
+
|
|
272
|
+
# --- run the protocol session -----------------------------------------
|
|
273
|
+
# host.connect() already happened up-front (before install + resume).
|
|
274
|
+
session_error: BaseException | None = None
|
|
275
|
+
try:
|
|
276
|
+
# before_execute is wired manually inside _opencode_body (after
|
|
277
|
+
# install, before launch) per opencode's documented timing.
|
|
278
|
+
# after_execute is left to the protocol driver — it fires after
|
|
279
|
+
# the body terminates and before the outer finally runs the
|
|
280
|
+
# snapshot capture, matching the documented contract.
|
|
281
|
+
await run_log_protocol_session(
|
|
282
|
+
host, ctx,
|
|
283
|
+
body=_opencode_body,
|
|
284
|
+
on_deliverable=config.on_deliverable,
|
|
285
|
+
after_execute=config.after_execute,
|
|
286
|
+
)
|
|
287
|
+
except _SessionFailed as fail:
|
|
288
|
+
session_error = fail
|
|
289
|
+
raise RuntimeError(str(fail)) from None
|
|
290
|
+
except BaseException as exc:
|
|
291
|
+
session_error = exc
|
|
292
|
+
raise
|
|
293
|
+
|
|
294
|
+
finally:
|
|
295
|
+
# Cancellation detection. The protocol driver swallows cancellation
|
|
296
|
+
# cleanly and returns; we observe it here via the ProcessContext
|
|
297
|
+
# flag. ``aggressive=True`` triggers SIGKILL behaviour for a
|
|
298
|
+
# cancelled session vs. a clean SIGTERM for a normal exit.
|
|
299
|
+
if not ctx.should_continue():
|
|
300
|
+
cancelled = True
|
|
301
|
+
|
|
302
|
+
if launched_handle is not None:
|
|
303
|
+
try:
|
|
304
|
+
await host.terminate_subprocess(launched_handle, aggressive=cancelled)
|
|
305
|
+
except Exception: # noqa: BLE001
|
|
306
|
+
_LOG.exception("terminate_subprocess failed")
|
|
307
|
+
|
|
308
|
+
if config.supports_resume and session_id is not None:
|
|
309
|
+
try:
|
|
310
|
+
await _capture_snapshot(
|
|
311
|
+
ctx, host,
|
|
312
|
+
session_id=preserved_session_id or session_id,
|
|
313
|
+
opencode_db=opencode_db,
|
|
314
|
+
end_state="cancelled" if cancelled else "done",
|
|
315
|
+
workdir_exclude=config.workdir_exclude,
|
|
316
|
+
opencode_executable=opencode_exec,
|
|
317
|
+
session_blob_encrypt=config.session_blob_encrypt,
|
|
318
|
+
)
|
|
319
|
+
except Exception: # noqa: BLE001
|
|
320
|
+
_LOG.exception(
|
|
321
|
+
"snapshot capture failed; proceeding with workdir wipe",
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
await host.cleanup_taskdir(aggressive=cancelled)
|
|
326
|
+
except Exception: # noqa: BLE001
|
|
327
|
+
_LOG.exception("cleanup_taskdir failed")
|
|
328
|
+
try:
|
|
329
|
+
await host.disconnect()
|
|
330
|
+
except Exception: # noqa: BLE001
|
|
331
|
+
_LOG.exception("host.disconnect failed")
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# --- helpers ---------------------------------------------------------------
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
async def _stream_blob(ctx: ProcessContext, blob_id) -> "AsyncIterator[bytes]":
|
|
338
|
+
async with ctx.load_blob(blob_id) as reader:
|
|
339
|
+
while True:
|
|
340
|
+
chunk = await reader.read(1 << 20)
|
|
341
|
+
if not chunk:
|
|
342
|
+
break
|
|
343
|
+
yield chunk
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
async def _read_blob_bytes(ctx: ProcessContext, blob_id) -> bytes:
|
|
347
|
+
out = bytearray()
|
|
348
|
+
async with ctx.load_blob(blob_id) as reader:
|
|
349
|
+
while True:
|
|
350
|
+
chunk = await reader.read(1 << 20)
|
|
351
|
+
if not chunk:
|
|
352
|
+
break
|
|
353
|
+
out.extend(chunk)
|
|
354
|
+
return bytes(out)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
async def _capture_snapshot(
|
|
358
|
+
ctx: ProcessContext,
|
|
359
|
+
host: Host,
|
|
360
|
+
*,
|
|
361
|
+
session_id: str,
|
|
362
|
+
opencode_db: str,
|
|
363
|
+
end_state: str,
|
|
364
|
+
workdir_exclude: list[str] | None,
|
|
365
|
+
opencode_executable: str = "opencode",
|
|
366
|
+
session_blob_encrypt: "Callable[[bytes], bytes] | None" = None,
|
|
367
|
+
) -> None:
|
|
368
|
+
session_json = await host_actions.opencode_export(
|
|
369
|
+
host, opencode_db, session_id,
|
|
370
|
+
opencode_executable=opencode_executable,
|
|
371
|
+
)
|
|
372
|
+
expected_len_plain = len(session_json)
|
|
373
|
+
_LOG.info(
|
|
374
|
+
"snapshot capture: session_json plaintext bytes=%d session_id=%s",
|
|
375
|
+
expected_len_plain, session_id,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
encrypt = session_blob_encrypt or (lambda b: b)
|
|
379
|
+
session_blob_payload = encrypt(session_json)
|
|
380
|
+
expected_len_payload = len(session_blob_payload)
|
|
381
|
+
|
|
382
|
+
async with ctx.store_blob("workdir") as wwriter:
|
|
383
|
+
async for chunk in host.archive_workdir(workdir_exclude):
|
|
384
|
+
await wwriter.write(chunk)
|
|
385
|
+
workdir_blob_id = wwriter.file_id
|
|
386
|
+
|
|
387
|
+
async with ctx.store_blob("session") as swriter:
|
|
388
|
+
await swriter.write(session_blob_payload)
|
|
389
|
+
session_blob_id = swriter.file_id
|
|
390
|
+
# Belt-and-braces: GridIn._position is the byte count actually
|
|
391
|
+
# written so far. Compare against the encrypted payload length
|
|
392
|
+
# (NOT the plaintext length) — short-write would be a real failure.
|
|
393
|
+
written = getattr(swriter, "_position", None)
|
|
394
|
+
if written is not None and written != expected_len_payload:
|
|
395
|
+
raise RuntimeError(
|
|
396
|
+
f"snapshot session blob short-write: expected "
|
|
397
|
+
f"{expected_len_payload} bytes, GridIn._position is {written}"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
await insert_snapshot(
|
|
401
|
+
ctx._db,
|
|
402
|
+
prefix=ctx._prefix,
|
|
403
|
+
process_id=ctx.process_id,
|
|
404
|
+
end_state=end_state,
|
|
405
|
+
session_id=session_id,
|
|
406
|
+
session_blob_id=session_blob_id,
|
|
407
|
+
workdir_blob_id=workdir_blob_id,
|
|
408
|
+
deliverables_emitted=[],
|
|
409
|
+
)
|
|
410
|
+
pruned = await prune_snapshots(
|
|
411
|
+
ctx._db, prefix=ctx._prefix, process_id=ctx.process_id,
|
|
412
|
+
)
|
|
413
|
+
for p in pruned:
|
|
414
|
+
try:
|
|
415
|
+
await ctx.delete_blob(p["sessionBlobId"])
|
|
416
|
+
except Exception: # noqa: BLE001
|
|
417
|
+
_LOG.exception("delete_blob(session) failed")
|
|
418
|
+
try:
|
|
419
|
+
await ctx.delete_blob(p["workdirBlobId"])
|
|
420
|
+
except Exception: # noqa: BLE001
|
|
421
|
+
_LOG.exception("delete_blob(workdir) failed")
|
|
422
|
+
|
|
423
|
+
await ctx.mark_has_saved_state()
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
async def _rotate_optio_log(host: Host) -> None:
|
|
427
|
+
"""Append the restored optio.log to optio.log.old, then truncate optio.log.
|
|
428
|
+
|
|
429
|
+
Preserves the historical log content across consecutive resumes
|
|
430
|
+
(rather than discarding it) while ensuring `tail -F -n +1` only sees
|
|
431
|
+
fresh lines emitted in the resumed run.
|
|
432
|
+
"""
|
|
433
|
+
workdir = host.workdir.rstrip("/")
|
|
434
|
+
log_abs = f"{workdir}/optio.log"
|
|
435
|
+
old_abs = f"{workdir}/optio.log.old"
|
|
436
|
+
try:
|
|
437
|
+
current = (await host.fetch_bytes_from_host(log_abs)).decode("utf-8")
|
|
438
|
+
except FileNotFoundError:
|
|
439
|
+
current = ""
|
|
440
|
+
if not current:
|
|
441
|
+
# Nothing to rotate. Still ensure optio.log exists empty so the
|
|
442
|
+
# tail process has something to follow.
|
|
443
|
+
await host.write_text("optio.log", "")
|
|
444
|
+
return
|
|
445
|
+
try:
|
|
446
|
+
existing_old = (await host.fetch_bytes_from_host(old_abs)).decode("utf-8")
|
|
447
|
+
except FileNotFoundError:
|
|
448
|
+
existing_old = ""
|
|
449
|
+
await host.write_text("optio.log.old", existing_old + current)
|
|
450
|
+
await host.write_text("optio.log", "")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
async def _append_resume_log_entry(host) -> None:
|
|
454
|
+
"""Append one ISO 8601 UTC timestamp line to <workdir>/resume.log.
|
|
455
|
+
|
|
456
|
+
Creates the file if missing (via shell `>>`). Caller is responsible
|
|
457
|
+
for gating this on config.supports_resume.
|
|
458
|
+
"""
|
|
459
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
460
|
+
target = f"{host.workdir}/resume.log"
|
|
461
|
+
result = await host.run_command(
|
|
462
|
+
f"echo {shlex.quote(ts)} >> {shlex.quote(target)}"
|
|
463
|
+
)
|
|
464
|
+
if result.exit_code != 0:
|
|
465
|
+
raise RuntimeError(
|
|
466
|
+
f"failed to append to resume.log: exit {result.exit_code}: "
|
|
467
|
+
f"{result.stderr!r}"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _pick_local_workdir() -> str:
|
|
472
|
+
return tempfile.mkdtemp(prefix="optio-opencode-")
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _create_opencode_session_sync(port: int, password: str, directory: str) -> str:
|
|
476
|
+
"""Blocking HTTP POST to opencode's /session route. Returns the new session id.
|
|
477
|
+
|
|
478
|
+
Called via an executor from :func:`_create_opencode_session` so the main
|
|
479
|
+
event loop isn't blocked on the synchronous urllib call.
|
|
480
|
+
|
|
481
|
+
Retries on transient connect/read errors because over a freshly-opened
|
|
482
|
+
SSH local forward the first request occasionally drops (asyncssh needs
|
|
483
|
+
a moment before the channel is wired up).
|
|
484
|
+
"""
|
|
485
|
+
import base64 as _b64
|
|
486
|
+
import time
|
|
487
|
+
import urllib.parse
|
|
488
|
+
import urllib.request
|
|
489
|
+
from urllib.error import URLError
|
|
490
|
+
|
|
491
|
+
auth_token = _b64.b64encode(f"opencode:{password}".encode("utf-8")).decode("ascii")
|
|
492
|
+
url = (
|
|
493
|
+
f"http://127.0.0.1:{port}/session"
|
|
494
|
+
f"?directory={urllib.parse.quote(directory, safe='')}"
|
|
495
|
+
)
|
|
496
|
+
headers = {
|
|
497
|
+
"content-type": "application/json",
|
|
498
|
+
"authorization": f"Basic {auth_token}",
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
last_exc: Exception | None = None
|
|
502
|
+
for attempt in range(4):
|
|
503
|
+
if attempt > 0:
|
|
504
|
+
time.sleep(0.15 * attempt)
|
|
505
|
+
req = urllib.request.Request(url, method="POST", data=b"{}", headers=headers)
|
|
506
|
+
try:
|
|
507
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
508
|
+
body = resp.read().decode("utf-8")
|
|
509
|
+
break
|
|
510
|
+
except (URLError, ConnectionError, OSError) as exc:
|
|
511
|
+
last_exc = exc
|
|
512
|
+
continue
|
|
513
|
+
else:
|
|
514
|
+
raise RuntimeError(
|
|
515
|
+
f"opencode /session failed after retries: {last_exc!r}"
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
data = json.loads(body)
|
|
519
|
+
session_id = data.get("id")
|
|
520
|
+
if not isinstance(session_id, str) or not session_id:
|
|
521
|
+
raise RuntimeError(
|
|
522
|
+
f"opencode /session response has no string 'id' field: {body!r}"
|
|
523
|
+
)
|
|
524
|
+
return session_id
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
async def _create_opencode_session(port: int, password: str, directory: str) -> str:
|
|
528
|
+
loop = asyncio.get_event_loop()
|
|
529
|
+
return await loop.run_in_executor(
|
|
530
|
+
None, _create_opencode_session_sync, port, password, directory
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def create_opencode_task(
|
|
535
|
+
process_id: str,
|
|
536
|
+
name: str,
|
|
537
|
+
config: OpencodeTaskConfig,
|
|
538
|
+
description: str | None = None,
|
|
539
|
+
) -> TaskInstance:
|
|
540
|
+
"""Return a TaskInstance that runs one opencode web session."""
|
|
541
|
+
|
|
542
|
+
async def _execute(ctx: ProcessContext) -> None:
|
|
543
|
+
await run_opencode_session(ctx, config)
|
|
544
|
+
|
|
545
|
+
return TaskInstance(
|
|
546
|
+
execute=_execute,
|
|
547
|
+
process_id=process_id,
|
|
548
|
+
name=name,
|
|
549
|
+
description=description,
|
|
550
|
+
ui_widget="iframe",
|
|
551
|
+
supports_resume=config.supports_resume,
|
|
552
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""MongoDB `{prefix}_opencode_session_snapshots` collection helpers.
|
|
2
|
+
|
|
3
|
+
One document per terminal run per process_id. Layout:
|
|
4
|
+
|
|
5
|
+
{
|
|
6
|
+
_id: ObjectId,
|
|
7
|
+
processId: str,
|
|
8
|
+
capturedAt: datetime,
|
|
9
|
+
endState: str, # "done" | "failed" | "cancelled"
|
|
10
|
+
sessionId: str, # opencode session id (preserved across export→import)
|
|
11
|
+
sessionBlobId: ObjectId, # GridFS file id for the session JSON
|
|
12
|
+
workdirBlobId: ObjectId, # GridFS file id for the workdir tar.gz
|
|
13
|
+
deliverablesEmitted: list, # audit metadata only; not replayed
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
Retention: keep the latest `SNAPSHOT_RETENTION` per processId. Older rows
|
|
17
|
+
are deleted by `prune_snapshots` and their GridFS blobs are expected to be
|
|
18
|
+
deleted by the caller using the ids returned.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from bson import ObjectId
|
|
25
|
+
from motor.motor_asyncio import AsyncIOMotorDatabase
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
SESSION_SNAPSHOT_COLLECTION_SUFFIX = "_opencode_session_snapshots"
|
|
29
|
+
SNAPSHOT_RETENTION = 5
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _collection(db: AsyncIOMotorDatabase, prefix: str):
|
|
33
|
+
return db[f"{prefix}{SESSION_SNAPSHOT_COLLECTION_SUFFIX}"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def ensure_indexes(db: AsyncIOMotorDatabase, prefix: str) -> None:
|
|
37
|
+
"""Idempotent index creation — called lazily by insert_snapshot."""
|
|
38
|
+
await _collection(db, prefix).create_index(
|
|
39
|
+
[("processId", 1), ("capturedAt", -1)],
|
|
40
|
+
name="by_processId_capturedAt_desc",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def insert_snapshot(
|
|
45
|
+
db: AsyncIOMotorDatabase,
|
|
46
|
+
*,
|
|
47
|
+
prefix: str,
|
|
48
|
+
process_id: str,
|
|
49
|
+
end_state: str,
|
|
50
|
+
session_id: str,
|
|
51
|
+
session_blob_id: ObjectId,
|
|
52
|
+
workdir_blob_id: ObjectId,
|
|
53
|
+
deliverables_emitted: list,
|
|
54
|
+
) -> dict:
|
|
55
|
+
await ensure_indexes(db, prefix)
|
|
56
|
+
doc = {
|
|
57
|
+
"processId": process_id,
|
|
58
|
+
"capturedAt": datetime.now(timezone.utc),
|
|
59
|
+
"endState": end_state,
|
|
60
|
+
"sessionId": session_id,
|
|
61
|
+
"sessionBlobId": session_blob_id,
|
|
62
|
+
"workdirBlobId": workdir_blob_id,
|
|
63
|
+
"deliverablesEmitted": deliverables_emitted,
|
|
64
|
+
}
|
|
65
|
+
result = await _collection(db, prefix).insert_one(doc)
|
|
66
|
+
doc["_id"] = result.inserted_id
|
|
67
|
+
return doc
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def load_latest_snapshot(
|
|
71
|
+
db: AsyncIOMotorDatabase, *, prefix: str, process_id: str,
|
|
72
|
+
) -> dict | None:
|
|
73
|
+
return await _collection(db, prefix).find_one(
|
|
74
|
+
{"processId": process_id}, sort=[("capturedAt", -1)],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def prune_snapshots(
|
|
79
|
+
db: AsyncIOMotorDatabase, *, prefix: str, process_id: str,
|
|
80
|
+
) -> list[dict]:
|
|
81
|
+
"""Keep the latest SNAPSHOT_RETENTION; delete the rest.
|
|
82
|
+
|
|
83
|
+
Returns a list of `{sessionBlobId, workdirBlobId}` dicts for the
|
|
84
|
+
deleted snapshots so the caller can remove the corresponding GridFS
|
|
85
|
+
blobs.
|
|
86
|
+
"""
|
|
87
|
+
coll = _collection(db, prefix)
|
|
88
|
+
all_docs = await coll.find(
|
|
89
|
+
{"processId": process_id},
|
|
90
|
+
projection={"sessionBlobId": 1, "workdirBlobId": 1, "capturedAt": 1},
|
|
91
|
+
sort=[("capturedAt", -1)],
|
|
92
|
+
).to_list(None)
|
|
93
|
+
stale = all_docs[SNAPSHOT_RETENTION:]
|
|
94
|
+
if not stale:
|
|
95
|
+
return []
|
|
96
|
+
stale_ids = [d["_id"] for d in stale]
|
|
97
|
+
await coll.delete_many({"_id": {"$in": stale_ids}})
|
|
98
|
+
return [
|
|
99
|
+
{"sessionBlobId": d["sessionBlobId"], "workdirBlobId": d["workdirBlobId"]}
|
|
100
|
+
for d in stale
|
|
101
|
+
]
|