cc-plugin-codex 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cc_plugin_codex/__init__.py +5 -0
- cc_plugin_codex/claude.py +284 -0
- cc_plugin_codex/cli_contract.py +122 -0
- cc_plugin_codex/config.py +172 -0
- cc_plugin_codex/context.py +210 -0
- cc_plugin_codex/jobs.py +561 -0
- cc_plugin_codex/normalize.py +243 -0
- cc_plugin_codex/preflight.py +94 -0
- cc_plugin_codex/py.typed +0 -0
- cc_plugin_codex/schemas.py +344 -0
- cc_plugin_codex/server.py +1656 -0
- cc_plugin_codex-0.1.4.dist-info/METADATA +223 -0
- cc_plugin_codex-0.1.4.dist-info/RECORD +16 -0
- cc_plugin_codex-0.1.4.dist-info/WHEEL +4 -0
- cc_plugin_codex-0.1.4.dist-info/entry_points.txt +2 -0
- cc_plugin_codex-0.1.4.dist-info/licenses/LICENSE +21 -0
cc_plugin_codex/jobs.py
ADDED
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
"""Detached background jobs for long Claude reviews.
|
|
2
|
+
|
|
3
|
+
This server drives a one-shot ``claude -p --output-format json`` call, so a job's
|
|
4
|
+
terminal output is a single JSON envelope written to ``result.json`` — completion
|
|
5
|
+
is "the process exited and the envelope is present", with NO interactive-log or
|
|
6
|
+
TUI scraping. That makes background mode far simpler and more robust here than in
|
|
7
|
+
a harness that tails an interactive CLI.
|
|
8
|
+
|
|
9
|
+
State lives on disk (keyed by workspace), so status/result/cancel keep working
|
|
10
|
+
across MCP server restarts. There is no daemon: single-job lifecycle calls refresh
|
|
11
|
+
and TTL-clean the requested job, list calls clean the workspace, and the count cap
|
|
12
|
+
is enforced when jobs start. ``--max-budget-usd`` still applies its best-effort
|
|
13
|
+
spend stop threshold (not a hard cap) even for a job nobody polls.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import contextlib
|
|
19
|
+
import hashlib
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import signal
|
|
23
|
+
import subprocess
|
|
24
|
+
import threading
|
|
25
|
+
import time
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from datetime import UTC, datetime
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import cast
|
|
30
|
+
from uuid import uuid4
|
|
31
|
+
|
|
32
|
+
from cc_plugin_codex.claude import contract_changed_error
|
|
33
|
+
from cc_plugin_codex.cli_contract import is_contract_drift
|
|
34
|
+
from cc_plugin_codex.normalize import apply_cost_usage, normalize_envelope
|
|
35
|
+
from cc_plugin_codex.schemas import (
|
|
36
|
+
FINGERPRINT,
|
|
37
|
+
ContextSummary,
|
|
38
|
+
ErrorCode,
|
|
39
|
+
ErrorInfo,
|
|
40
|
+
ErrorResult,
|
|
41
|
+
Meta,
|
|
42
|
+
workspace_warning_for,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
STATE_ENV = "CC_PLUGIN_CODEX_STATE_DIR"
|
|
46
|
+
TTL_ENV = "CC_PLUGIN_CODEX_JOB_TTL"
|
|
47
|
+
MAX_SECONDS_ENV = "CC_PLUGIN_CODEX_JOB_MAX_SECONDS"
|
|
48
|
+
MAX_COUNT_ENV = "CC_PLUGIN_CODEX_JOB_MAX_COUNT"
|
|
49
|
+
|
|
50
|
+
DEFAULT_TTL = 86_400 # delete terminal job records after 24h
|
|
51
|
+
DEFAULT_MAX_SECONDS = 1_800 # wall-clock cap; a poll past this reaps the job
|
|
52
|
+
DEFAULT_MAX_COUNT = 50 # retained jobs per workspace; evict oldest terminal
|
|
53
|
+
|
|
54
|
+
_TERMINAL = {"done", "failed", "cancelled", "timeout"}
|
|
55
|
+
_JOBS_LOCK = threading.RLock()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _int_env(name: str, default: int) -> int:
|
|
59
|
+
try:
|
|
60
|
+
return int(os.environ.get(name, ""))
|
|
61
|
+
except (TypeError, ValueError):
|
|
62
|
+
return default
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def max_seconds() -> int:
|
|
66
|
+
return _int_env(MAX_SECONDS_ENV, DEFAULT_MAX_SECONDS)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def ttl_seconds() -> int:
|
|
70
|
+
return _int_env(TTL_ENV, DEFAULT_TTL)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def poll_after_ms() -> int:
|
|
74
|
+
return 1000
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _state_root() -> Path:
|
|
78
|
+
root = os.environ.get(STATE_ENV)
|
|
79
|
+
if root:
|
|
80
|
+
return Path(root)
|
|
81
|
+
return Path.home() / ".cache" / "cc-plugin-codex" / "jobs"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _ws_dir(cwd: str) -> Path:
|
|
85
|
+
canonical = os.path.realpath(cwd)
|
|
86
|
+
digest = hashlib.sha256(canonical.encode()).hexdigest()[:12]
|
|
87
|
+
# os.path.basename on the realpath string keeps the dir-name derivation stable
|
|
88
|
+
# (and matches the digest input); Path.name differs on trailing-slash handling.
|
|
89
|
+
base = os.path.basename(canonical.rstrip("/")) or "workspace" # noqa: PTH119
|
|
90
|
+
safe = "".join(c if (c.isalnum() or c in "._-") else "-" for c in base)[:40] or "ws"
|
|
91
|
+
return _state_root() / f"{safe}-{digest}"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _job_dir(cwd: str, job_id: str) -> Path:
|
|
95
|
+
return _ws_dir(cwd) / job_id
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _pid_alive(pid: int | None) -> bool:
|
|
99
|
+
if not pid:
|
|
100
|
+
return False
|
|
101
|
+
try:
|
|
102
|
+
os.kill(pid, 0)
|
|
103
|
+
except ProcessLookupError:
|
|
104
|
+
return False
|
|
105
|
+
except PermissionError:
|
|
106
|
+
return True
|
|
107
|
+
return True
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _is_running(pid: int | None) -> bool:
|
|
111
|
+
"""Whether the job process is still running.
|
|
112
|
+
|
|
113
|
+
The job is launched detached but is still our child until it exits, so we
|
|
114
|
+
must reap it with waitpid — otherwise it lingers as a zombie that kill(0)
|
|
115
|
+
reports as 'alive' forever. waitpid(WNOHANG) returns (pid, _) once it exits
|
|
116
|
+
(reaping it), (0, 0) while it runs, and raises ChildProcessError if it is not
|
|
117
|
+
our child (e.g. after a server restart), where we fall back to a kill(0)
|
|
118
|
+
liveness probe."""
|
|
119
|
+
if not pid:
|
|
120
|
+
return False
|
|
121
|
+
try:
|
|
122
|
+
reaped, _ = os.waitpid(pid, os.WNOHANG)
|
|
123
|
+
if reaped == pid:
|
|
124
|
+
return False
|
|
125
|
+
if reaped == 0:
|
|
126
|
+
return True
|
|
127
|
+
except ChildProcessError:
|
|
128
|
+
pass # not our child — use the liveness probe below
|
|
129
|
+
except OSError:
|
|
130
|
+
return False
|
|
131
|
+
return _pid_alive(pid)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _kill_pid_tree(pid: int | None) -> None:
|
|
135
|
+
"""Kill the detached job's process group (it is its own session leader), then
|
|
136
|
+
reap it if it was our child so it does not linger as a zombie."""
|
|
137
|
+
if not pid:
|
|
138
|
+
return
|
|
139
|
+
try:
|
|
140
|
+
if hasattr(os, "killpg"):
|
|
141
|
+
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
|
142
|
+
else: # pragma: no cover - non-POSIX fallback
|
|
143
|
+
os.kill(pid, signal.SIGKILL)
|
|
144
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
145
|
+
pass
|
|
146
|
+
with contextlib.suppress(ChildProcessError, OSError):
|
|
147
|
+
os.waitpid(pid, 0)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _read_meta(jd: Path) -> dict | None:
|
|
151
|
+
try:
|
|
152
|
+
return json.loads((jd / "meta.json").read_text())
|
|
153
|
+
except (OSError, json.JSONDecodeError):
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _write_meta(jd: Path, meta: dict) -> None:
|
|
158
|
+
(jd / "meta.json").write_text(json.dumps(meta))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _read_envelope(jd: Path) -> dict | None:
|
|
162
|
+
"""Parse the claude JSON envelope from result.json, or None if absent/partial."""
|
|
163
|
+
try:
|
|
164
|
+
text = (jd / "result.json").read_text()
|
|
165
|
+
except OSError:
|
|
166
|
+
return None
|
|
167
|
+
text = text.strip()
|
|
168
|
+
if not text:
|
|
169
|
+
return None
|
|
170
|
+
try:
|
|
171
|
+
env = json.loads(text)
|
|
172
|
+
except json.JSONDecodeError:
|
|
173
|
+
return None
|
|
174
|
+
return env if isinstance(env, dict) else None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class JobConfig:
|
|
179
|
+
kind: str
|
|
180
|
+
config_mode: str
|
|
181
|
+
access: str
|
|
182
|
+
scope: str | None
|
|
183
|
+
base: str | None
|
|
184
|
+
detail: str
|
|
185
|
+
timeout_seconds: int
|
|
186
|
+
workspace_source: str | None
|
|
187
|
+
context_summary: ContextSummary | None
|
|
188
|
+
requested_max_budget_usd: float | None = None
|
|
189
|
+
redacted_paths: list[str] | None = None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def start_job(cmd: list[str], cwd: str, cfg: JobConfig) -> tuple[str, str]:
|
|
193
|
+
"""Spawn the claude command detached and persist its record.
|
|
194
|
+
|
|
195
|
+
Returns (job_id, started_at_iso)."""
|
|
196
|
+
job_id = uuid4().hex
|
|
197
|
+
jd = _job_dir(cwd, job_id)
|
|
198
|
+
jd.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
# Best-effort: results contain the diff; keep the workspace tree user-only.
|
|
200
|
+
with contextlib.suppress(OSError):
|
|
201
|
+
_ws_dir(cwd).chmod(0o700)
|
|
202
|
+
started = time.time()
|
|
203
|
+
result_path = jd / "result.json"
|
|
204
|
+
stderr_path = jd / "stderr.log"
|
|
205
|
+
with result_path.open("w") as rf, stderr_path.open("w") as ef:
|
|
206
|
+
proc = subprocess.Popen(
|
|
207
|
+
cmd, cwd=cwd, stdout=rf, stderr=ef, text=True, start_new_session=True
|
|
208
|
+
)
|
|
209
|
+
summary = cfg.context_summary.model_dump() if cfg.context_summary else None
|
|
210
|
+
meta = {
|
|
211
|
+
"job_id": job_id,
|
|
212
|
+
"kind": cfg.kind,
|
|
213
|
+
"pid": proc.pid,
|
|
214
|
+
"started_epoch": started,
|
|
215
|
+
"started_at": datetime.now(UTC).isoformat(),
|
|
216
|
+
"deadline_epoch": started + max_seconds(),
|
|
217
|
+
"completed_epoch": None,
|
|
218
|
+
"terminal_status": None, # set by cancel/deadline reap
|
|
219
|
+
"config": {
|
|
220
|
+
"config_mode": cfg.config_mode,
|
|
221
|
+
"access": cfg.access,
|
|
222
|
+
"scope": cfg.scope,
|
|
223
|
+
"base": cfg.base,
|
|
224
|
+
"detail": cfg.detail,
|
|
225
|
+
"timeout_seconds": cfg.timeout_seconds,
|
|
226
|
+
"workspace_source": cfg.workspace_source,
|
|
227
|
+
"cwd": cwd,
|
|
228
|
+
"requested_max_budget_usd": cfg.requested_max_budget_usd,
|
|
229
|
+
"redacted_paths": cfg.redacted_paths or [],
|
|
230
|
+
},
|
|
231
|
+
"context_summary": summary,
|
|
232
|
+
}
|
|
233
|
+
_write_meta(jd, meta)
|
|
234
|
+
_enforce_count_cap(cwd)
|
|
235
|
+
return job_id, meta["started_at"]
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _status_of(jd: Path, meta: dict) -> str:
|
|
239
|
+
"""Compute the live status, killing + marking jobs that overran their deadline."""
|
|
240
|
+
terminal = meta.get("terminal_status")
|
|
241
|
+
if terminal:
|
|
242
|
+
return terminal
|
|
243
|
+
if _is_running(meta.get("pid")):
|
|
244
|
+
if time.time() > meta.get("deadline_epoch", float("inf")):
|
|
245
|
+
_kill_pid_tree(meta.get("pid"))
|
|
246
|
+
meta["terminal_status"] = "timeout"
|
|
247
|
+
meta["completed_epoch"] = time.time()
|
|
248
|
+
_write_meta(jd, meta)
|
|
249
|
+
return "timeout"
|
|
250
|
+
return "running"
|
|
251
|
+
# Process gone: done if it left a parseable envelope, else it crashed.
|
|
252
|
+
if meta.get("completed_epoch") is None:
|
|
253
|
+
meta["completed_epoch"] = time.time()
|
|
254
|
+
_write_meta(jd, meta)
|
|
255
|
+
return "done" if _read_envelope(jd) is not None else "failed"
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _elapsed_ms(meta: dict) -> int:
|
|
259
|
+
end = meta.get("completed_epoch") or time.time()
|
|
260
|
+
return max(0, int((end - meta.get("started_epoch", end)) * 1000))
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _deadline_seconds(meta: dict) -> int:
|
|
264
|
+
"""The wall-clock window the job was STARTED with (deadline minus start), not
|
|
265
|
+
the current env value — so status stays consistent if the env later changes."""
|
|
266
|
+
started = meta.get("started_epoch")
|
|
267
|
+
deadline = meta.get("deadline_epoch")
|
|
268
|
+
if started is not None and deadline is not None:
|
|
269
|
+
return max(0, round(deadline - started))
|
|
270
|
+
return max_seconds()
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _expires_at(meta: dict) -> str | None:
|
|
274
|
+
completed = meta.get("completed_epoch")
|
|
275
|
+
if completed is None:
|
|
276
|
+
return None
|
|
277
|
+
return datetime.fromtimestamp(completed + ttl_seconds(), UTC).isoformat()
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _reap_workspace(cwd: str) -> None:
|
|
281
|
+
"""Lazy maintenance: refresh statuses and delete expired terminal records."""
|
|
282
|
+
ws = _ws_dir(cwd)
|
|
283
|
+
if not ws.is_dir():
|
|
284
|
+
return
|
|
285
|
+
ttl = ttl_seconds()
|
|
286
|
+
now = time.time()
|
|
287
|
+
for jd in ws.iterdir():
|
|
288
|
+
if not jd.is_dir():
|
|
289
|
+
continue
|
|
290
|
+
meta = _read_meta(jd)
|
|
291
|
+
if meta is None:
|
|
292
|
+
continue
|
|
293
|
+
status = _status_of(jd, meta)
|
|
294
|
+
if status in _TERMINAL:
|
|
295
|
+
end = meta.get("completed_epoch") or meta.get("started_epoch") or now
|
|
296
|
+
if now - end > ttl:
|
|
297
|
+
_rmtree(jd)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _expired(meta: dict) -> bool:
|
|
301
|
+
completed = meta.get("completed_epoch")
|
|
302
|
+
if completed is None:
|
|
303
|
+
return False
|
|
304
|
+
return time.time() - completed > ttl_seconds()
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _read_live_job(cwd: str, job_id: str) -> tuple[Path, dict, str] | None:
|
|
308
|
+
"""Read and refresh a single job record.
|
|
309
|
+
|
|
310
|
+
Status/result/cancel are commonly called in tight polling loops. Refreshing
|
|
311
|
+
only the requested record avoids unrelated jobs causing latency or waitpid
|
|
312
|
+
races while still preserving the TTL contract for that record.
|
|
313
|
+
"""
|
|
314
|
+
jd = _job_dir(cwd, job_id)
|
|
315
|
+
meta = _read_meta(jd)
|
|
316
|
+
if meta is None:
|
|
317
|
+
return None
|
|
318
|
+
state = _status_of(jd, meta)
|
|
319
|
+
if state in _TERMINAL and _expired(meta):
|
|
320
|
+
_rmtree(jd)
|
|
321
|
+
return None
|
|
322
|
+
return jd, meta, state
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _enforce_count_cap(cwd: str) -> None:
|
|
326
|
+
ws = _ws_dir(cwd)
|
|
327
|
+
cap = _int_env(MAX_COUNT_ENV, DEFAULT_MAX_COUNT)
|
|
328
|
+
dirs = [d for d in ws.iterdir() if d.is_dir()] if ws.is_dir() else []
|
|
329
|
+
if len(dirs) <= cap:
|
|
330
|
+
return
|
|
331
|
+
# Evict oldest terminal jobs first; never kill a still-running one to fit.
|
|
332
|
+
scored = []
|
|
333
|
+
for jd in dirs:
|
|
334
|
+
meta = _read_meta(jd) or {}
|
|
335
|
+
status = _status_of(jd, meta)
|
|
336
|
+
scored.append((status in _TERMINAL, meta.get("started_epoch", 0.0), jd))
|
|
337
|
+
scored.sort(key=lambda t: (not t[0], t[1])) # terminal first, then oldest
|
|
338
|
+
for is_terminal, _epoch, jd in scored[: max(0, len(dirs) - cap)]:
|
|
339
|
+
if is_terminal:
|
|
340
|
+
_rmtree(jd)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _rmtree(jd: Path) -> None:
|
|
344
|
+
try:
|
|
345
|
+
for child in jd.iterdir():
|
|
346
|
+
child.unlink(missing_ok=True)
|
|
347
|
+
jd.rmdir()
|
|
348
|
+
except OSError:
|
|
349
|
+
pass
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _build_meta(meta: dict) -> Meta:
|
|
353
|
+
c = meta.get("config", {})
|
|
354
|
+
cwd = c.get("cwd", "")
|
|
355
|
+
source = c.get("workspace_source")
|
|
356
|
+
return Meta(
|
|
357
|
+
cwd=cwd,
|
|
358
|
+
workspace_source=source,
|
|
359
|
+
workspace_warning=workspace_warning_for(source, cwd),
|
|
360
|
+
config_mode=c.get("config_mode", "inherit"),
|
|
361
|
+
access=c.get("access", "toolless"),
|
|
362
|
+
scope=c.get("scope"),
|
|
363
|
+
base=c.get("base"),
|
|
364
|
+
timeout_seconds=c.get("timeout_seconds", max_seconds()),
|
|
365
|
+
requested_max_budget_usd=c.get("requested_max_budget_usd"),
|
|
366
|
+
redacted_paths=c.get("redacted_paths") or [],
|
|
367
|
+
elapsed_ms=_elapsed_ms(meta),
|
|
368
|
+
job_id=meta.get("job_id"),
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _terminal_cost(jd: Path, state: str) -> float | None:
|
|
373
|
+
"""Spend recorded by a terminal job, or None.
|
|
374
|
+
|
|
375
|
+
A cancelled/timeout job can still leave a parseable (possibly partial) envelope
|
|
376
|
+
that recorded cost, so we surface cost for ANY terminal state — matching the
|
|
377
|
+
result path (_job_error) and the JobStatus.cost_usd contract ('terminal jobs
|
|
378
|
+
that spent'), not just done."""
|
|
379
|
+
if state not in _TERMINAL:
|
|
380
|
+
return None
|
|
381
|
+
env = _read_envelope(jd) or {}
|
|
382
|
+
c = env.get("total_cost_usd")
|
|
383
|
+
return float(c) if isinstance(c, (int, float)) else None
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def status(cwd: str, job_id: str) -> dict | None:
|
|
387
|
+
"""Return a JobStatus dict, or None if the job does not exist."""
|
|
388
|
+
with _JOBS_LOCK:
|
|
389
|
+
live = _read_live_job(cwd, job_id)
|
|
390
|
+
if live is None:
|
|
391
|
+
return None
|
|
392
|
+
jd, meta, state = live
|
|
393
|
+
return _status_dict(jd, meta, state)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _status_dict(jd: Path, meta: dict, state: str) -> dict:
|
|
397
|
+
cost = _terminal_cost(jd, state)
|
|
398
|
+
detail = None
|
|
399
|
+
if state == "failed":
|
|
400
|
+
detail = _stderr_tail(jd)
|
|
401
|
+
return {
|
|
402
|
+
"ok": True,
|
|
403
|
+
"job_id": meta.get("job_id", jd.name),
|
|
404
|
+
"kind": meta.get("kind", ""),
|
|
405
|
+
"status": state,
|
|
406
|
+
"started_at": meta.get("started_at", ""),
|
|
407
|
+
"elapsed_ms": _elapsed_ms(meta),
|
|
408
|
+
"deadline_seconds": _deadline_seconds(meta),
|
|
409
|
+
"poll_after_ms": poll_after_ms(),
|
|
410
|
+
"ttl_seconds": ttl_seconds(),
|
|
411
|
+
"expires_at": _expires_at(meta),
|
|
412
|
+
"result_available": state == "done",
|
|
413
|
+
"cost_usd": cost,
|
|
414
|
+
"detail": detail,
|
|
415
|
+
"fingerprint": FINGERPRINT,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def list_jobs(cwd: str) -> dict:
|
|
420
|
+
"""Return a JobListResult dict of the workspace's known jobs, newest first.
|
|
421
|
+
|
|
422
|
+
Reaps first (like the other lifecycle calls), so listing can refresh statuses
|
|
423
|
+
and delete expired records — it is not strictly read-only."""
|
|
424
|
+
with _JOBS_LOCK:
|
|
425
|
+
_reap_workspace(cwd)
|
|
426
|
+
ws = _ws_dir(cwd)
|
|
427
|
+
summaries = []
|
|
428
|
+
if ws.is_dir():
|
|
429
|
+
for jd in ws.iterdir():
|
|
430
|
+
if not jd.is_dir():
|
|
431
|
+
continue
|
|
432
|
+
meta = _read_meta(jd)
|
|
433
|
+
if meta is None:
|
|
434
|
+
continue
|
|
435
|
+
state = _status_of(jd, meta)
|
|
436
|
+
summaries.append(
|
|
437
|
+
{
|
|
438
|
+
"_epoch": meta.get("started_epoch", 0.0),
|
|
439
|
+
"job_id": meta.get("job_id", jd.name),
|
|
440
|
+
"kind": meta.get("kind", ""),
|
|
441
|
+
"status": state,
|
|
442
|
+
"started_at": meta.get("started_at", ""),
|
|
443
|
+
"elapsed_ms": _elapsed_ms(meta),
|
|
444
|
+
"result_available": state == "done",
|
|
445
|
+
"expires_at": _expires_at(meta),
|
|
446
|
+
"cost_usd": _terminal_cost(jd, state),
|
|
447
|
+
}
|
|
448
|
+
)
|
|
449
|
+
summaries.sort(key=lambda s: s["_epoch"], reverse=True) # newest first
|
|
450
|
+
for s in summaries:
|
|
451
|
+
s.pop("_epoch", None)
|
|
452
|
+
return {"ok": True, "jobs": summaries, "fingerprint": FINGERPRINT}
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _stderr_tail(jd: Path, limit: int = 200) -> str | None:
|
|
456
|
+
try:
|
|
457
|
+
text = (jd / "stderr.log").read_text().strip()
|
|
458
|
+
except OSError:
|
|
459
|
+
return None
|
|
460
|
+
return text[-limit:] or None
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def result(cwd: str, job_id: str, consume: bool = False):
|
|
464
|
+
"""Return (payload, found). payload is the normalized SuccessResult|ErrorResult
|
|
465
|
+
dict; found is False when no such job exists."""
|
|
466
|
+
with _JOBS_LOCK:
|
|
467
|
+
live = _read_live_job(cwd, job_id)
|
|
468
|
+
if live is None:
|
|
469
|
+
return None, False
|
|
470
|
+
jd, meta, state = live
|
|
471
|
+
if state == "done":
|
|
472
|
+
env_text = (jd / "result.json").read_text()
|
|
473
|
+
summary = meta.get("context_summary")
|
|
474
|
+
ctx_summary = ContextSummary(**summary) if summary else None
|
|
475
|
+
payload = normalize_envelope(
|
|
476
|
+
meta.get("kind", "claude_review_changes"),
|
|
477
|
+
env_text,
|
|
478
|
+
_build_meta(meta),
|
|
479
|
+
detail=meta.get("config", {}).get("detail", "summary"),
|
|
480
|
+
context_summary=ctx_summary,
|
|
481
|
+
)
|
|
482
|
+
if consume:
|
|
483
|
+
_rmtree(jd)
|
|
484
|
+
return payload, True
|
|
485
|
+
# Non-done states map to an error envelope so the contract stays ok-discriminated.
|
|
486
|
+
payload = _job_error(meta, state, jd)
|
|
487
|
+
return payload, True
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
_STATE_TO_ERROR = {
|
|
491
|
+
"running": (
|
|
492
|
+
"job_running",
|
|
493
|
+
"The job is still running.",
|
|
494
|
+
"Poll claude_job_status; call claude_job_result once status=done.",
|
|
495
|
+
),
|
|
496
|
+
"cancelled": (
|
|
497
|
+
"job_cancelled",
|
|
498
|
+
"The job was cancelled.",
|
|
499
|
+
"Start a new job; a cancelled run cannot be resumed.",
|
|
500
|
+
),
|
|
501
|
+
"timeout": (
|
|
502
|
+
"job_timeout",
|
|
503
|
+
"The job exceeded its wall-clock deadline and was stopped.",
|
|
504
|
+
"Narrow the scope or raise CC_PLUGIN_CODEX_JOB_MAX_SECONDS, then start a new job.",
|
|
505
|
+
),
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _job_error(meta: dict, state: str, jd: Path) -> dict:
|
|
510
|
+
if state == "failed":
|
|
511
|
+
tail = _stderr_tail(jd)
|
|
512
|
+
# A failed job whose stderr carries a drift signature is the async twin of
|
|
513
|
+
# the sync cli_contract_changed path — classify it the same way so async
|
|
514
|
+
# callers get the same actionable error instead of a generic job_failed.
|
|
515
|
+
if is_contract_drift(tail):
|
|
516
|
+
info = contract_changed_error()
|
|
517
|
+
code, message, repair, retryable = (
|
|
518
|
+
info.code,
|
|
519
|
+
info.message,
|
|
520
|
+
info.repair,
|
|
521
|
+
info.retryable,
|
|
522
|
+
)
|
|
523
|
+
else:
|
|
524
|
+
code, message, repair = (
|
|
525
|
+
"job_failed",
|
|
526
|
+
f"The job failed without producing a result. {tail or ''}".strip(),
|
|
527
|
+
"Run claude_status to check the CLI is installed and authenticated, then retry.",
|
|
528
|
+
)
|
|
529
|
+
retryable = True
|
|
530
|
+
else:
|
|
531
|
+
code, message, repair = _STATE_TO_ERROR.get(
|
|
532
|
+
state, ("job_failed", "The job did not complete.", "Start a new job.")
|
|
533
|
+
)
|
|
534
|
+
retryable = state == "running"
|
|
535
|
+
bmeta = _build_meta(meta)
|
|
536
|
+
# Surface any spend the (possibly partial) envelope recorded.
|
|
537
|
+
env = _read_envelope(jd)
|
|
538
|
+
if env:
|
|
539
|
+
apply_cost_usage(bmeta, env)
|
|
540
|
+
return ErrorResult(
|
|
541
|
+
error=ErrorInfo(
|
|
542
|
+
code=cast("ErrorCode", code), message=message, repair=repair, retryable=retryable
|
|
543
|
+
),
|
|
544
|
+
meta=bmeta,
|
|
545
|
+
).model_dump(mode="json", exclude_none=True)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def cancel(cwd: str, job_id: str) -> dict | None:
|
|
549
|
+
"""Kill a running job and mark it cancelled. Returns a JobStatus dict or None."""
|
|
550
|
+
with _JOBS_LOCK:
|
|
551
|
+
live = _read_live_job(cwd, job_id)
|
|
552
|
+
if live is None:
|
|
553
|
+
return None
|
|
554
|
+
jd, meta, state = live
|
|
555
|
+
if state not in _TERMINAL:
|
|
556
|
+
_kill_pid_tree(meta.get("pid"))
|
|
557
|
+
meta["terminal_status"] = "cancelled"
|
|
558
|
+
meta["completed_epoch"] = time.time()
|
|
559
|
+
_write_meta(jd, meta)
|
|
560
|
+
state = "cancelled"
|
|
561
|
+
return _status_dict(jd, meta, state)
|