optio-opencode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,552 @@
1
+ """The state machine that runs one optio-opencode session.
2
+
3
+ Orchestrates a Host (local or remote) through the lifecycle described in
4
+ Section 4 of the design spec. The public entry point is the factory
5
+ ``create_opencode_task(...)`` which wraps ``run_opencode_session`` in a
6
+ ``TaskInstance`` and sets ``ui_widget="iframe"``.
7
+
8
+ Most of the per-session work is generic log/deliverables protocol
9
+ plumbing (parse ``optio.log``, fetch deliverables, watch for cancel) and
10
+ lives in ``optio_host.protocol.run_log_protocol_session``. This module
11
+ keeps only the opencode-specific work — write AGENTS.md / opencode.json,
12
+ install/launch the opencode binary, set up tunnel and widget, and the
13
+ resume/snapshot brackets around the protocol session.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import base64
20
+ import json
21
+ import logging
22
+ import os
23
+ import secrets
24
+ import shlex
25
+ import tempfile
26
+ from datetime import datetime, timezone
27
+ from typing import AsyncIterator, Callable
28
+
29
+ from optio_core.context import ProcessContext
30
+ from optio_core.models import BasicAuth, TaskInstance
31
+
32
+ from optio_host.context import HookContext
33
+ from optio_host.host import Host, LocalHost, ProcessHandle, RemoteHost
34
+ from optio_host.paths import task_dir
35
+ from optio_host.protocol.session import _SessionFailed, run_log_protocol_session
36
+ from optio_opencode import host_actions
37
+ from optio_opencode.prompt import compose_agents_md
38
+ from optio_opencode.snapshots import (
39
+ insert_snapshot,
40
+ load_latest_snapshot,
41
+ prune_snapshots,
42
+ )
43
+ from optio_opencode.types import OpencodeTaskConfig
44
+
45
+
46
+ _LOG = logging.getLogger(__name__)
47
+
48
+
49
+ READY_TIMEOUT_S = 30.0
50
+
51
+
52
+ def _build_host(config: OpencodeTaskConfig, process_id: str) -> Host:
53
+ """Construct the appropriate Host object for the given config.
54
+
55
+ Extracted so tests can monkeypatch ``optio_opencode.session._build_host``
56
+ to inject a fake host without launching real subprocesses or SSH.
57
+ """
58
+ taskdir = task_dir(
59
+ ssh=config.ssh, process_id=process_id, consumer_name="optio-opencode",
60
+ )
61
+ if config.ssh is None:
62
+ os.makedirs(taskdir, exist_ok=True)
63
+ host: Host = LocalHost(taskdir=taskdir)
64
+ os.makedirs(host.workdir, exist_ok=True)
65
+ return host
66
+ else:
67
+ return RemoteHost(ssh_config=config.ssh, taskdir=taskdir)
68
+
69
+
70
+ async def run_opencode_session(ctx: ProcessContext, config: OpencodeTaskConfig) -> None:
71
+ """Execute function body for one optio-opencode task instance."""
72
+ # --- per-task filesystem layout ---------------------------------------
73
+ host: Host = _build_host(config, ctx.process_id)
74
+ taskdir = task_dir(
75
+ ssh=config.ssh, process_id=ctx.process_id, consumer_name="optio-opencode",
76
+ )
77
+ opencode_db = f"{taskdir}/opencode.db"
78
+
79
+ password = secrets.token_urlsafe(32)
80
+ cancelled = False
81
+ launched_handle: ProcessHandle | None = None
82
+ opencode_exec: str = "opencode"
83
+ session_id: str | None = None
84
+ preserved_session_id: str | None = None
85
+
86
+ # --- resume decision (BEFORE the protocol session starts) -------------
87
+ resume_requested = bool(getattr(ctx, "resume", False))
88
+ snapshot: dict | None = None
89
+ if resume_requested:
90
+ snapshot = await load_latest_snapshot(
91
+ ctx._db, prefix=ctx._prefix, process_id=ctx.process_id,
92
+ )
93
+
94
+ # Connect + install BEFORE deciding fresh vs resume. The resume path
95
+ # needs ``opencode import`` to replay the saved session DB, which
96
+ # requires opencode to be installed on the host and resolved to an
97
+ # absolute path. Hoisting also lets the fresh path skip the redundant
98
+ # ``host.connect()`` later. ``setup_workdir`` is idempotent (mkdir -p)
99
+ # and the protocol driver still calls it again for the fresh path —
100
+ # harmless. Install progress reports through ``ctx``, so the
101
+ # dashboard sees activity from the very first step.
102
+ await host.connect()
103
+ await host.setup_workdir()
104
+ opencode_exec = await host_actions.ensure_opencode_installed(
105
+ HookContext(ctx, host),
106
+ install_if_missing=config.install_if_missing,
107
+ install_dir=config.opencode_install_dir,
108
+ )
109
+
110
+ # Resume restore must run BEFORE the protocol session begins, so the
111
+ # driver's tail_task does not subscribe to the restored stale optio.log
112
+ # (which contains last run's DONE / ERROR events). The body below sees
113
+ # ``resuming`` already decided.
114
+ resuming = snapshot is not None
115
+ if resuming:
116
+ await host.remove_file(opencode_db)
117
+ try:
118
+ await host.restore_workdir(_stream_blob(ctx, snapshot["workdirBlobId"]))
119
+ session_bytes_raw = await _read_blob_bytes(ctx, snapshot["sessionBlobId"])
120
+ decrypt = config.session_blob_decrypt or (lambda b: b)
121
+ session_bytes = decrypt(session_bytes_raw)
122
+ await host_actions.opencode_import(
123
+ host, opencode_db, session_bytes,
124
+ opencode_executable=opencode_exec,
125
+ )
126
+ # Move the restored log channel out of the way BEFORE the
127
+ # protocol driver subscribes its tail. The snapshot tar
128
+ # includes optio.log from the previous run; without rotation,
129
+ # ``tail -F -n +1`` would re-emit every old DELIVERABLE /
130
+ # DONE / ERROR line and the resumed process would terminate
131
+ # within seconds of launch. Preserve the historical content
132
+ # by appending it to optio.log.old.
133
+ await _rotate_optio_log(host)
134
+ preserved_session_id = snapshot["sessionId"]
135
+ except Exception as resume_exc:
136
+ # If the failure was the session-blob decrypt hook raising,
137
+ # this indicates the snapshot was tampered with or the
138
+ # consumer's keypair changed. Fail loud — silently dropping
139
+ # to fresh-start would mask the security-relevant signal.
140
+ if "decrypt" in repr(resume_exc).lower() and "blob" in repr(resume_exc).lower():
141
+ _LOG.error(
142
+ "resume restore failed inside session_blob_decrypt; "
143
+ "refusing to fall through to fresh-start. Operator must "
144
+ "investigate the snapshot blob.",
145
+ )
146
+ raise
147
+ _LOG.exception(
148
+ "resume restore failed; falling back to fresh-start path "
149
+ "(Mongo blob preserved for inspection)",
150
+ )
151
+ await host.remove_file(opencode_db)
152
+ resuming = False
153
+ preserved_session_id = None
154
+
155
+ async def _opencode_body(host: Host, hook_ctx: HookContext) -> None:
156
+ """Opencode-specific body that runs inside the protocol driver.
157
+
158
+ Captures launch state via nonlocal so the outer ``finally`` can
159
+ terminate the subprocess and capture the snapshot.
160
+ """
161
+ nonlocal launched_handle, opencode_exec, session_id, preserved_session_id
162
+
163
+ if not resuming:
164
+ # Fresh start: the protocol driver has already created the
165
+ # workdir, deliverables/ subdir, and empty optio.log. Ensure
166
+ # any stale opencode db from a prior crashed run is gone, then
167
+ # write the fresh AGENTS.md and opencode.json that the agent
168
+ # consumes.
169
+ await host.remove_file(opencode_db)
170
+ await host.write_text(
171
+ "AGENTS.md",
172
+ compose_agents_md(
173
+ config.consumer_instructions,
174
+ workdir_exclude=config.workdir_exclude,
175
+ supports_resume=config.supports_resume,
176
+ ),
177
+ )
178
+ await host.write_text(
179
+ "opencode.json", json.dumps(config.opencode_config, indent=2),
180
+ )
181
+ # Note: do NOT call ctx.clear_has_saved_state() here. The spec
182
+ # described it as "belt-and-braces", but in practice it makes
183
+ # `hasSavedState` track the live session rather than the durable
184
+ # snapshot collection. A worker crash mid-Restart would then
185
+ # leave hasSavedState=false even though perfectly good prior
186
+ # snapshots are still in Mongo, hiding the Resume affordance
187
+ # from the UI. The flag is now only ever flipped true by
188
+ # mark_has_saved_state at terminal capture; resume's stale-flag
189
+ # self-healing (snapshot lookup returns None → fresh-start
190
+ # fallback) handles the rare case where the flag is true but
191
+ # no snapshot exists.
192
+
193
+ if config.supports_resume:
194
+ await _append_resume_log_entry(host)
195
+
196
+ # opencode is already installed by run_opencode_session before
197
+ # this body runs (so resume restore can call opencode_import
198
+ # against a known-good absolute path). ``opencode_exec`` is set
199
+ # on the enclosing closure.
200
+
201
+ # --- before_execute hook ----------------------------------------
202
+ # Fires after the binary is in place and before opencode launches,
203
+ # so consumer hooks can ship per-task files via hook_ctx.copy_file
204
+ # and run setup commands via hook_ctx.run_on_host.
205
+ if config.before_execute is not None:
206
+ await config.before_execute(hook_ctx)
207
+
208
+ # --- launch ------------------------------------------------------
209
+ version = await host_actions.opencode_version(
210
+ host, opencode_executable=opencode_exec,
211
+ )
212
+ version_suffix = f" {version}" if version else ""
213
+ ctx.report_progress(None, f"Launching opencode{version_suffix}…")
214
+ handle, opencode_port = await host_actions.launch_opencode(
215
+ host, password,
216
+ ready_timeout_s=READY_TIMEOUT_S,
217
+ opencode_executable=opencode_exec,
218
+ )
219
+ launched_handle = handle
220
+
221
+ # --- tunnel + widget registration --------------------------------
222
+ worker_port = await host.establish_tunnel(opencode_port)
223
+
224
+ if preserved_session_id is not None:
225
+ session_id = preserved_session_id
226
+ else:
227
+ # Pre-create a single opencode session for this task instance.
228
+ # All dashboards that embed this widget navigate to the same
229
+ # session ID via the iframe URL, so concurrent viewers share
230
+ # live state (events over SSE) rather than each creating a fresh
231
+ # isolated session on load. Matches optio's mental model: one
232
+ # background process, N observers.
233
+ session_id = await _create_opencode_session(
234
+ worker_port, password, host.workdir,
235
+ )
236
+
237
+ await ctx.set_widget_upstream(
238
+ f"http://127.0.0.1:{worker_port}",
239
+ inner_auth=BasicAuth(username="opencode", password=password),
240
+ )
241
+ # Point the iframe directly at the pre-created session so viewers
242
+ # skip both the project picker and the "new session" default.
243
+ # opencode's SPA expects the :dir router param to be a URL-safe
244
+ # base64 encoding of the directory path (see
245
+ # packages/app/src/utils/base64.ts in opencode) — NOT percent-
246
+ # encoding. The {widgetProxyUrl} token is resolved by the iframe
247
+ # widget at mount time.
248
+ _workdir_b64 = (
249
+ base64.urlsafe_b64encode(host.workdir.encode("utf-8"))
250
+ .decode("ascii").rstrip("=")
251
+ )
252
+ await ctx.set_widget_data({
253
+ "iframeSrc": f"{{widgetProxyUrl}}{_workdir_b64}/session/{session_id}",
254
+ "localStorageOverrides": {
255
+ "opencode.settings.dat:defaultServerUrl": "{widgetProxyUrl}",
256
+ },
257
+ })
258
+ ctx.report_progress(None, "opencode is live")
259
+
260
+ # --- await opencode subprocess exit -----------------------------
261
+ # The protocol driver runs this body alongside the tail dispatcher
262
+ # and a cancel watcher. When the user cancels, the driver cancels
263
+ # this body's task; when the agent emits DONE/ERROR, the driver
264
+ # returns / raises and again cancels this body. In either case the
265
+ # await below is interrupted via CancelledError before proc exits.
266
+ # If, however, opencode exits on its own without emitting DONE
267
+ # first, the body returns normally and the driver detects this as
268
+ # "premature body exit" and raises _SessionFailed.
269
+ proc = launched_handle.pid_like
270
+ await proc.wait() # type: ignore[union-attr]
271
+
272
+ # --- run the protocol session -----------------------------------------
273
+ # host.connect() already happened up-front (before install + resume).
274
+ session_error: BaseException | None = None
275
+ try:
276
+ # before_execute is wired manually inside _opencode_body (after
277
+ # install, before launch) per opencode's documented timing.
278
+ # after_execute is left to the protocol driver — it fires after
279
+ # the body terminates and before the outer finally runs the
280
+ # snapshot capture, matching the documented contract.
281
+ await run_log_protocol_session(
282
+ host, ctx,
283
+ body=_opencode_body,
284
+ on_deliverable=config.on_deliverable,
285
+ after_execute=config.after_execute,
286
+ )
287
+ except _SessionFailed as fail:
288
+ session_error = fail
289
+ raise RuntimeError(str(fail)) from None
290
+ except BaseException as exc:
291
+ session_error = exc
292
+ raise
293
+
294
+ finally:
295
+ # Cancellation detection. The protocol driver swallows cancellation
296
+ # cleanly and returns; we observe it here via the ProcessContext
297
+ # flag. ``aggressive=True`` triggers SIGKILL behaviour for a
298
+ # cancelled session vs. a clean SIGTERM for a normal exit.
299
+ if not ctx.should_continue():
300
+ cancelled = True
301
+
302
+ if launched_handle is not None:
303
+ try:
304
+ await host.terminate_subprocess(launched_handle, aggressive=cancelled)
305
+ except Exception: # noqa: BLE001
306
+ _LOG.exception("terminate_subprocess failed")
307
+
308
+ if config.supports_resume and session_id is not None:
309
+ try:
310
+ await _capture_snapshot(
311
+ ctx, host,
312
+ session_id=preserved_session_id or session_id,
313
+ opencode_db=opencode_db,
314
+ end_state="cancelled" if cancelled else "done",
315
+ workdir_exclude=config.workdir_exclude,
316
+ opencode_executable=opencode_exec,
317
+ session_blob_encrypt=config.session_blob_encrypt,
318
+ )
319
+ except Exception: # noqa: BLE001
320
+ _LOG.exception(
321
+ "snapshot capture failed; proceeding with workdir wipe",
322
+ )
323
+
324
+ try:
325
+ await host.cleanup_taskdir(aggressive=cancelled)
326
+ except Exception: # noqa: BLE001
327
+ _LOG.exception("cleanup_taskdir failed")
328
+ try:
329
+ await host.disconnect()
330
+ except Exception: # noqa: BLE001
331
+ _LOG.exception("host.disconnect failed")
332
+
333
+
334
+ # --- helpers ---------------------------------------------------------------
335
+
336
+
337
+ async def _stream_blob(ctx: ProcessContext, blob_id) -> "AsyncIterator[bytes]":
338
+ async with ctx.load_blob(blob_id) as reader:
339
+ while True:
340
+ chunk = await reader.read(1 << 20)
341
+ if not chunk:
342
+ break
343
+ yield chunk
344
+
345
+
346
+ async def _read_blob_bytes(ctx: ProcessContext, blob_id) -> bytes:
347
+ out = bytearray()
348
+ async with ctx.load_blob(blob_id) as reader:
349
+ while True:
350
+ chunk = await reader.read(1 << 20)
351
+ if not chunk:
352
+ break
353
+ out.extend(chunk)
354
+ return bytes(out)
355
+
356
+
357
+ async def _capture_snapshot(
358
+ ctx: ProcessContext,
359
+ host: Host,
360
+ *,
361
+ session_id: str,
362
+ opencode_db: str,
363
+ end_state: str,
364
+ workdir_exclude: list[str] | None,
365
+ opencode_executable: str = "opencode",
366
+ session_blob_encrypt: "Callable[[bytes], bytes] | None" = None,
367
+ ) -> None:
368
+ session_json = await host_actions.opencode_export(
369
+ host, opencode_db, session_id,
370
+ opencode_executable=opencode_executable,
371
+ )
372
+ expected_len_plain = len(session_json)
373
+ _LOG.info(
374
+ "snapshot capture: session_json plaintext bytes=%d session_id=%s",
375
+ expected_len_plain, session_id,
376
+ )
377
+
378
+ encrypt = session_blob_encrypt or (lambda b: b)
379
+ session_blob_payload = encrypt(session_json)
380
+ expected_len_payload = len(session_blob_payload)
381
+
382
+ async with ctx.store_blob("workdir") as wwriter:
383
+ async for chunk in host.archive_workdir(workdir_exclude):
384
+ await wwriter.write(chunk)
385
+ workdir_blob_id = wwriter.file_id
386
+
387
+ async with ctx.store_blob("session") as swriter:
388
+ await swriter.write(session_blob_payload)
389
+ session_blob_id = swriter.file_id
390
+ # Belt-and-braces: GridIn._position is the byte count actually
391
+ # written so far. Compare against the encrypted payload length
392
+ # (NOT the plaintext length) — short-write would be a real failure.
393
+ written = getattr(swriter, "_position", None)
394
+ if written is not None and written != expected_len_payload:
395
+ raise RuntimeError(
396
+ f"snapshot session blob short-write: expected "
397
+ f"{expected_len_payload} bytes, GridIn._position is {written}"
398
+ )
399
+
400
+ await insert_snapshot(
401
+ ctx._db,
402
+ prefix=ctx._prefix,
403
+ process_id=ctx.process_id,
404
+ end_state=end_state,
405
+ session_id=session_id,
406
+ session_blob_id=session_blob_id,
407
+ workdir_blob_id=workdir_blob_id,
408
+ deliverables_emitted=[],
409
+ )
410
+ pruned = await prune_snapshots(
411
+ ctx._db, prefix=ctx._prefix, process_id=ctx.process_id,
412
+ )
413
+ for p in pruned:
414
+ try:
415
+ await ctx.delete_blob(p["sessionBlobId"])
416
+ except Exception: # noqa: BLE001
417
+ _LOG.exception("delete_blob(session) failed")
418
+ try:
419
+ await ctx.delete_blob(p["workdirBlobId"])
420
+ except Exception: # noqa: BLE001
421
+ _LOG.exception("delete_blob(workdir) failed")
422
+
423
+ await ctx.mark_has_saved_state()
424
+
425
+
426
+ async def _rotate_optio_log(host: Host) -> None:
427
+ """Append the restored optio.log to optio.log.old, then truncate optio.log.
428
+
429
+ Preserves the historical log content across consecutive resumes
430
+ (rather than discarding it) while ensuring `tail -F -n +1` only sees
431
+ fresh lines emitted in the resumed run.
432
+ """
433
+ workdir = host.workdir.rstrip("/")
434
+ log_abs = f"{workdir}/optio.log"
435
+ old_abs = f"{workdir}/optio.log.old"
436
+ try:
437
+ current = (await host.fetch_bytes_from_host(log_abs)).decode("utf-8")
438
+ except FileNotFoundError:
439
+ current = ""
440
+ if not current:
441
+ # Nothing to rotate. Still ensure optio.log exists empty so the
442
+ # tail process has something to follow.
443
+ await host.write_text("optio.log", "")
444
+ return
445
+ try:
446
+ existing_old = (await host.fetch_bytes_from_host(old_abs)).decode("utf-8")
447
+ except FileNotFoundError:
448
+ existing_old = ""
449
+ await host.write_text("optio.log.old", existing_old + current)
450
+ await host.write_text("optio.log", "")
451
+
452
+
453
+ async def _append_resume_log_entry(host) -> None:
454
+ """Append one ISO 8601 UTC timestamp line to <workdir>/resume.log.
455
+
456
+ Creates the file if missing (via shell `>>`). Caller is responsible
457
+ for gating this on config.supports_resume.
458
+ """
459
+ ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
460
+ target = f"{host.workdir}/resume.log"
461
+ result = await host.run_command(
462
+ f"echo {shlex.quote(ts)} >> {shlex.quote(target)}"
463
+ )
464
+ if result.exit_code != 0:
465
+ raise RuntimeError(
466
+ f"failed to append to resume.log: exit {result.exit_code}: "
467
+ f"{result.stderr!r}"
468
+ )
469
+
470
+
471
+ def _pick_local_workdir() -> str:
472
+ return tempfile.mkdtemp(prefix="optio-opencode-")
473
+
474
+
475
+ def _create_opencode_session_sync(port: int, password: str, directory: str) -> str:
476
+ """Blocking HTTP POST to opencode's /session route. Returns the new session id.
477
+
478
+ Called via an executor from :func:`_create_opencode_session` so the main
479
+ event loop isn't blocked on the synchronous urllib call.
480
+
481
+ Retries on transient connect/read errors because over a freshly-opened
482
+ SSH local forward the first request occasionally drops (asyncssh needs
483
+ a moment before the channel is wired up).
484
+ """
485
+ import base64 as _b64
486
+ import time
487
+ import urllib.parse
488
+ import urllib.request
489
+ from urllib.error import URLError
490
+
491
+ auth_token = _b64.b64encode(f"opencode:{password}".encode("utf-8")).decode("ascii")
492
+ url = (
493
+ f"http://127.0.0.1:{port}/session"
494
+ f"?directory={urllib.parse.quote(directory, safe='')}"
495
+ )
496
+ headers = {
497
+ "content-type": "application/json",
498
+ "authorization": f"Basic {auth_token}",
499
+ }
500
+
501
+ last_exc: Exception | None = None
502
+ for attempt in range(4):
503
+ if attempt > 0:
504
+ time.sleep(0.15 * attempt)
505
+ req = urllib.request.Request(url, method="POST", data=b"{}", headers=headers)
506
+ try:
507
+ with urllib.request.urlopen(req, timeout=15) as resp:
508
+ body = resp.read().decode("utf-8")
509
+ break
510
+ except (URLError, ConnectionError, OSError) as exc:
511
+ last_exc = exc
512
+ continue
513
+ else:
514
+ raise RuntimeError(
515
+ f"opencode /session failed after retries: {last_exc!r}"
516
+ )
517
+
518
+ data = json.loads(body)
519
+ session_id = data.get("id")
520
+ if not isinstance(session_id, str) or not session_id:
521
+ raise RuntimeError(
522
+ f"opencode /session response has no string 'id' field: {body!r}"
523
+ )
524
+ return session_id
525
+
526
+
527
+ async def _create_opencode_session(port: int, password: str, directory: str) -> str:
528
+ loop = asyncio.get_event_loop()
529
+ return await loop.run_in_executor(
530
+ None, _create_opencode_session_sync, port, password, directory
531
+ )
532
+
533
+
534
+ def create_opencode_task(
535
+ process_id: str,
536
+ name: str,
537
+ config: OpencodeTaskConfig,
538
+ description: str | None = None,
539
+ ) -> TaskInstance:
540
+ """Return a TaskInstance that runs one opencode web session."""
541
+
542
+ async def _execute(ctx: ProcessContext) -> None:
543
+ await run_opencode_session(ctx, config)
544
+
545
+ return TaskInstance(
546
+ execute=_execute,
547
+ process_id=process_id,
548
+ name=name,
549
+ description=description,
550
+ ui_widget="iframe",
551
+ supports_resume=config.supports_resume,
552
+ )
@@ -0,0 +1,101 @@
1
+ """MongoDB `{prefix}_opencode_session_snapshots` collection helpers.
2
+
3
+ One document per terminal run per process_id. Layout:
4
+
5
+ {
6
+ _id: ObjectId,
7
+ processId: str,
8
+ capturedAt: datetime,
9
+ endState: str, # "done" | "failed" | "cancelled"
10
+ sessionId: str, # opencode session id (preserved across export→import)
11
+ sessionBlobId: ObjectId, # GridFS file id for the session JSON
12
+ workdirBlobId: ObjectId, # GridFS file id for the workdir tar.gz
13
+ deliverablesEmitted: list, # audit metadata only; not replayed
14
+ }
15
+
16
+ Retention: keep the latest `SNAPSHOT_RETENTION` per processId. Older rows
17
+ are deleted by `prune_snapshots` and their GridFS blobs are expected to be
18
+ deleted by the caller using the ids returned.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from datetime import datetime, timezone
24
+ from bson import ObjectId
25
+ from motor.motor_asyncio import AsyncIOMotorDatabase
26
+
27
+
28
+ SESSION_SNAPSHOT_COLLECTION_SUFFIX = "_opencode_session_snapshots"
29
+ SNAPSHOT_RETENTION = 5
30
+
31
+
32
+ def _collection(db: AsyncIOMotorDatabase, prefix: str):
33
+ return db[f"{prefix}{SESSION_SNAPSHOT_COLLECTION_SUFFIX}"]
34
+
35
+
36
+ async def ensure_indexes(db: AsyncIOMotorDatabase, prefix: str) -> None:
37
+ """Idempotent index creation — called lazily by insert_snapshot."""
38
+ await _collection(db, prefix).create_index(
39
+ [("processId", 1), ("capturedAt", -1)],
40
+ name="by_processId_capturedAt_desc",
41
+ )
42
+
43
+
44
+ async def insert_snapshot(
45
+ db: AsyncIOMotorDatabase,
46
+ *,
47
+ prefix: str,
48
+ process_id: str,
49
+ end_state: str,
50
+ session_id: str,
51
+ session_blob_id: ObjectId,
52
+ workdir_blob_id: ObjectId,
53
+ deliverables_emitted: list,
54
+ ) -> dict:
55
+ await ensure_indexes(db, prefix)
56
+ doc = {
57
+ "processId": process_id,
58
+ "capturedAt": datetime.now(timezone.utc),
59
+ "endState": end_state,
60
+ "sessionId": session_id,
61
+ "sessionBlobId": session_blob_id,
62
+ "workdirBlobId": workdir_blob_id,
63
+ "deliverablesEmitted": deliverables_emitted,
64
+ }
65
+ result = await _collection(db, prefix).insert_one(doc)
66
+ doc["_id"] = result.inserted_id
67
+ return doc
68
+
69
+
70
+ async def load_latest_snapshot(
71
+ db: AsyncIOMotorDatabase, *, prefix: str, process_id: str,
72
+ ) -> dict | None:
73
+ return await _collection(db, prefix).find_one(
74
+ {"processId": process_id}, sort=[("capturedAt", -1)],
75
+ )
76
+
77
+
78
+ async def prune_snapshots(
79
+ db: AsyncIOMotorDatabase, *, prefix: str, process_id: str,
80
+ ) -> list[dict]:
81
+ """Keep the latest SNAPSHOT_RETENTION; delete the rest.
82
+
83
+ Returns a list of `{sessionBlobId, workdirBlobId}` dicts for the
84
+ deleted snapshots so the caller can remove the corresponding GridFS
85
+ blobs.
86
+ """
87
+ coll = _collection(db, prefix)
88
+ all_docs = await coll.find(
89
+ {"processId": process_id},
90
+ projection={"sessionBlobId": 1, "workdirBlobId": 1, "capturedAt": 1},
91
+ sort=[("capturedAt", -1)],
92
+ ).to_list(None)
93
+ stale = all_docs[SNAPSHOT_RETENTION:]
94
+ if not stale:
95
+ return []
96
+ stale_ids = [d["_id"] for d in stale]
97
+ await coll.delete_many({"_id": {"$in": stale_ids}})
98
+ return [
99
+ {"sessionBlobId": d["sessionBlobId"], "workdirBlobId": d["workdirBlobId"]}
100
+ for d in stale
101
+ ]