ophar 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/client.py +137 -0
- cli/commands/metrics.py +59 -0
- cli/commands/settings.py +25 -0
- cli/commands/system.py +76 -0
- cli/commands/tasks.py +104 -0
- cli/display/formatting.py +29 -0
- cli/main.py +19 -0
- ophar/__init__.py +7 -0
- ophar/_bundle/AGENTS.md +30 -0
- ophar/_bundle/CLAUDE.md +38 -0
- ophar/_bundle/harness/checkpoint.sh +106 -0
- ophar/_bundle/harness/dispatch.sh +194 -0
- ophar/_bundle/harness/ground-truth.sh +121 -0
- ophar/_bundle/harness/iterate.sh +137 -0
- ophar/_bundle/harness/land.sh +47 -0
- ophar/_bundle/harness/ledger.sh +39 -0
- ophar/_bundle/harness/lib/adapt-report.sh +37 -0
- ophar/_bundle/harness/lib/log-metrics.sh +71 -0
- ophar/_bundle/harness/lib/log-opus.sh +48 -0
- ophar/_bundle/harness/lib/mock-claude.sh +36 -0
- ophar/_bundle/harness/lib/mock-cursor-agent.sh +170 -0
- ophar/_bundle/harness/mcp_server.py +462 -0
- ophar/_bundle/harness/metrics-report.sh +175 -0
- ophar/_bundle/harness/orchestrate.sh +221 -0
- ophar/_bundle/harness/reconcile.sh +109 -0
- ophar/_bundle/harness/route-report.sh +111 -0
- ophar/_bundle/harness/run.sh +75 -0
- ophar/_bundle/harness/verdict.sh +91 -0
- ophar/_bundle/harness/verify-heldout.sh +126 -0
- ophar/_bundle/heldout/T-0002/manifest.json +8 -0
- ophar/_bundle/heldout/T-0002/test_heldout_signals.py +39 -0
- ophar/_bundle/heldout/T-1001/manifest.json +8 -0
- ophar/_bundle/heldout/T-1001/test_heldout_signals.py +55 -0
- ophar/_bundle/heldout/T-RESERVE-DEMO/manifest.json +12 -0
- ophar/_bundle/heldout/T-RESERVE-DEMO/test_place.py +15 -0
- ophar/_bundle/heldout/T-RESERVE-DEMO/test_reserve.py +16 -0
- ophar/_bundle/orchestrator-pipeline-plan.md +513 -0
- ophar/_bundle/state/STATE.md +77 -0
- ophar/_bundle/tasks/T-0001.json +12 -0
- ophar/_bundle/tasks/T-0002.json +13 -0
- ophar/_bundle/tasks/T-1002.json +13 -0
- ophar/bootstrap.py +84 -0
- ophar/mcp_entry.py +33 -0
- ophar/paths.py +51 -0
- ophar/setup_cmd.py +99 -0
- ophar-0.1.0.dist-info/METADATA +394 -0
- ophar-0.1.0.dist-info/RECORD +68 -0
- ophar-0.1.0.dist-info/WHEEL +5 -0
- ophar-0.1.0.dist-info/entry_points.txt +4 -0
- ophar-0.1.0.dist-info/licenses/LICENSE +21 -0
- ophar-0.1.0.dist-info/top_level.txt +3 -0
- server/__init__.py +0 -0
- server/config.py +83 -0
- server/main.py +59 -0
- server/models/__init__.py +85 -0
- server/routers/__init__.py +0 -0
- server/routers/ledger.py +36 -0
- server/routers/metrics.py +29 -0
- server/routers/settings.py +28 -0
- server/routers/state.py +21 -0
- server/routers/tasks.py +141 -0
- server/services/__init__.py +0 -0
- server/services/dispatch.py +175 -0
- server/services/metrics.py +85 -0
- server/services/registry.py +88 -0
- server/services/state.py +40 -0
- server/ws/__init__.py +0 -0
- server/ws/events.py +75 -0
cli/client.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""HTTP/WS client with auto-spawn of the local API server (OpenCode-style).
|
|
2
|
+
|
|
3
|
+
Discovery: state/server/server.json lockfile {pid, port, started_at}.
|
|
4
|
+
If the server is alive (/health) -> connect.
|
|
5
|
+
Otherwise -> spawn uvicorn in the background, wait for /health, write lockfile.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import shlex
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
import websockets
|
|
20
|
+
|
|
21
|
+
from ophar.paths import get_root
|
|
22
|
+
|
|
23
|
+
ROOT = get_root()
|
|
24
|
+
STATE_DIR = ROOT / "state" / "server"
|
|
25
|
+
LOCKFILE = STATE_DIR / "server.json"
|
|
26
|
+
PYTHON = sys.executable
|
|
27
|
+
DEFAULT_PORT = 8001
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _ensure_state_dir() -> None:
|
|
31
|
+
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _read_lockfile() -> dict | None:
|
|
35
|
+
if not LOCKFILE.exists():
|
|
36
|
+
return None
|
|
37
|
+
try:
|
|
38
|
+
return json.loads(LOCKFILE.read_text())
|
|
39
|
+
except (json.JSONDecodeError, IOError):
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _write_lockfile(data: dict) -> None:
|
|
44
|
+
_ensure_state_dir()
|
|
45
|
+
LOCKFILE.write_text(json.dumps(data, indent=2))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _is_alive(port: int) -> bool:
|
|
49
|
+
"""Check if the server at 127.0.0.1:<port> responds to /health."""
|
|
50
|
+
try:
|
|
51
|
+
resp = httpx.get(f"http://127.0.0.1:{port}/health", timeout=2)
|
|
52
|
+
return resp.status_code == 200 and resp.json().get("status") == "ok"
|
|
53
|
+
except Exception:
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _pid_alive(pid: int) -> bool:
|
|
58
|
+
try:
|
|
59
|
+
os.kill(pid, 0)
|
|
60
|
+
return True
|
|
61
|
+
except OSError:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_base_url() -> str:
|
|
66
|
+
"""Get the base URL of the API server, spawning it if necessary.
|
|
67
|
+
|
|
68
|
+
Returns the base URL (e.g. 'http://127.0.0.1:8000').
|
|
69
|
+
"""
|
|
70
|
+
lock = _read_lockfile()
|
|
71
|
+
if lock:
|
|
72
|
+
port = lock["port"]
|
|
73
|
+
pid = lock["pid"]
|
|
74
|
+
if _pid_alive(pid) and _is_alive(port):
|
|
75
|
+
return f"http://127.0.0.1:{port}"
|
|
76
|
+
|
|
77
|
+
# Spawn new server
|
|
78
|
+
port = int(os.environ.get("OPUS_PORT", str(DEFAULT_PORT)))
|
|
79
|
+
log_path = STATE_DIR / "server.log"
|
|
80
|
+
|
|
81
|
+
env = {**os.environ}
|
|
82
|
+
cmd = [
|
|
83
|
+
PYTHON, "-m", "uvicorn", "server.main:app",
|
|
84
|
+
"--host", "127.0.0.1",
|
|
85
|
+
"--port", str(port),
|
|
86
|
+
"--log-level", "warning",
|
|
87
|
+
]
|
|
88
|
+
with open(log_path, "a") as fh:
|
|
89
|
+
proc = subprocess.Popen(
|
|
90
|
+
cmd,
|
|
91
|
+
stdout=fh,
|
|
92
|
+
stderr=subprocess.STDOUT,
|
|
93
|
+
env=env,
|
|
94
|
+
preexec_fn=os.setsid,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
_write_lockfile({
|
|
98
|
+
"pid": proc.pid,
|
|
99
|
+
"port": port,
|
|
100
|
+
"started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
# Wait for /health (up to 15s)
|
|
104
|
+
base = f"http://127.0.0.1:{port}"
|
|
105
|
+
for _ in range(30):
|
|
106
|
+
if _is_alive(port):
|
|
107
|
+
return base
|
|
108
|
+
time.sleep(0.5)
|
|
109
|
+
|
|
110
|
+
raise RuntimeError(f"Server did not start on port {port}. Check {log_path} for errors.")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_ws_url(path: str) -> str:
|
|
114
|
+
base = get_base_url()
|
|
115
|
+
ws_base = base.replace("http://", "ws://")
|
|
116
|
+
return f"{ws_base}{path}"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def api_get(path: str, **params) -> dict | list:
|
|
120
|
+
base = get_base_url()
|
|
121
|
+
resp = httpx.get(f"{base}{path}", params=params, timeout=30)
|
|
122
|
+
resp.raise_for_status()
|
|
123
|
+
return resp.json()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def api_post(path: str, body: dict | None = None) -> dict:
|
|
127
|
+
base = get_base_url()
|
|
128
|
+
resp = httpx.post(f"{base}{path}", json=body or {}, timeout=120)
|
|
129
|
+
resp.raise_for_status()
|
|
130
|
+
return resp.json()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def api_put(path: str, body: dict) -> dict:
|
|
134
|
+
base = get_base_url()
|
|
135
|
+
resp = httpx.put(f"{base}{path}", json=body, timeout=30)
|
|
136
|
+
resp.raise_for_status()
|
|
137
|
+
return resp.json()
|
cli/commands/metrics.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Metrics commands."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.live import Live
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
|
|
11
|
+
from ..client import api_get
|
|
12
|
+
|
|
13
|
+
app = typer.Typer()
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _render_metrics_table(data: dict) -> Table:
|
|
18
|
+
raw = data["raw"]
|
|
19
|
+
table = Table(title="Metrics Snapshot")
|
|
20
|
+
table.add_column("Metric")
|
|
21
|
+
table.add_column("Value")
|
|
22
|
+
table.add_row("[bold]Runs[/]", str(raw.get("runs", "?")))
|
|
23
|
+
table.add_row("[bold]Work OK Rate[/]", f"{(raw.get('work_ok_rate', 0) or 0) * 100:.1f}%")
|
|
24
|
+
table.add_row("[bold]Overclaim Rate[/]", f"{(raw.get('overclaim_rate', 0) or 0) * 100:.1f}%")
|
|
25
|
+
table.add_row("[bold]Composer Tokens[/]", str(raw.get("composer_tokens_total", "?")))
|
|
26
|
+
q = raw.get("quantiles", {})
|
|
27
|
+
wc = q.get("wall_clock_s", {})
|
|
28
|
+
if wc:
|
|
29
|
+
table.add_row("[bold]Wall (p50/p95)[/]", f"{wc.get('p50', '?')} / {wc.get('p95', '?')}s")
|
|
30
|
+
opus = raw.get("opus", {})
|
|
31
|
+
if opus:
|
|
32
|
+
table.add_row("[bold]Opus Tokens Total[/]", str(opus.get("opus_tokens_total", "?")))
|
|
33
|
+
return table
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command()
|
|
37
|
+
def show(
|
|
38
|
+
json_: bool = typer.Option(False, "--json", help="Raw JSON output"),
|
|
39
|
+
watch: bool = typer.Option(False, "--watch", help="Live refreshing dashboard"),
|
|
40
|
+
classes: bool = typer.Option(False, "--classes", help="Breakdown by class"),
|
|
41
|
+
):
|
|
42
|
+
"""View metrics."""
|
|
43
|
+
if classes:
|
|
44
|
+
data = api_get("/api/metrics/classes")
|
|
45
|
+
console.print_json(data=data)
|
|
46
|
+
return
|
|
47
|
+
if watch:
|
|
48
|
+
with Live(refresh_per_second=0.3) as live:
|
|
49
|
+
while True:
|
|
50
|
+
data = api_get("/api/metrics")
|
|
51
|
+
live.update(_render_metrics_table(data))
|
|
52
|
+
time.sleep(3)
|
|
53
|
+
return
|
|
54
|
+
if json_:
|
|
55
|
+
data = api_get("/api/metrics")
|
|
56
|
+
console.print_json(data=data)
|
|
57
|
+
return
|
|
58
|
+
data = api_get("/api/metrics")
|
|
59
|
+
console.print(_render_metrics_table(data))
|
cli/commands/settings.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Settings commands."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from ..client import api_get, api_put
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def settings_get():
|
|
12
|
+
"""Show current settings."""
|
|
13
|
+
data = api_get("/api/settings")
|
|
14
|
+
for k, v in sorted(data.items()):
|
|
15
|
+
console.print(f"[bold]{k}[/] = {v}")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def settings_set(key: str = typer.Argument(...), value: str = typer.Argument(...)):
|
|
19
|
+
"""Set a setting (e.g. opctl settings-set MAX_ITERATIONS 5)."""
|
|
20
|
+
try:
|
|
21
|
+
parsed = int(value)
|
|
22
|
+
except ValueError:
|
|
23
|
+
parsed = value
|
|
24
|
+
result = api_put("/api/settings", {key: parsed})
|
|
25
|
+
console.print(f"[green]{key}[/] → {result.get(key)}")
|
cli/commands/system.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""System commands: serve, stop, status, reconcile.
|
|
2
|
+
|
|
3
|
+
The orchestrator is NOT a CLI command - it is reached through the `ophar`
|
|
4
|
+
MCP server (instructions + pipeline:// resources + tools). Run `claude` and the
|
|
5
|
+
registered MCP server makes the session an orchestrator. There is no `opctl chat`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import signal
|
|
10
|
+
|
|
11
|
+
import typer
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
|
|
15
|
+
from ..client import get_base_url, api_get, api_post
|
|
16
|
+
|
|
17
|
+
app = typer.Typer()
|
|
18
|
+
console = Console()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command()
|
|
22
|
+
def serve():
|
|
23
|
+
"""Start the API server explicitly (normally auto-spawned)."""
|
|
24
|
+
base = get_base_url()
|
|
25
|
+
console.print(f"[green]Server running at {base}[/]")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.command()
|
|
29
|
+
def stop():
|
|
30
|
+
"""Stop the API server."""
|
|
31
|
+
from ..client import _read_lockfile, LOCKFILE
|
|
32
|
+
lock = _read_lockfile()
|
|
33
|
+
if lock:
|
|
34
|
+
try:
|
|
35
|
+
os.kill(lock["pid"], signal.SIGTERM)
|
|
36
|
+
console.print(f"[yellow]Stopped server (pid {lock['pid']})[/]")
|
|
37
|
+
except OSError:
|
|
38
|
+
console.print("[dim]Server already stopped[/]")
|
|
39
|
+
LOCKFILE.unlink(missing_ok=True)
|
|
40
|
+
else:
|
|
41
|
+
console.print("[dim]No server running[/]")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@app.command()
|
|
45
|
+
def status():
|
|
46
|
+
"""Show pipeline status."""
|
|
47
|
+
base = get_base_url()
|
|
48
|
+
metrics = api_get("/api/metrics")
|
|
49
|
+
tasks_data = api_get("/api/tasks")
|
|
50
|
+
raw = metrics["raw"]
|
|
51
|
+
active_count = sum(1 for t in tasks_data if t["status"] in ("queued", "running"))
|
|
52
|
+
|
|
53
|
+
table = Table(title="Ophar status")
|
|
54
|
+
table.add_column("Key")
|
|
55
|
+
table.add_column("Value")
|
|
56
|
+
table.add_row("[bold]API server[/]", f"[green]running ({base})[/]")
|
|
57
|
+
table.add_row("[bold]Active tasks[/]", f"{active_count}")
|
|
58
|
+
table.add_row("[bold]Total runs[/]", str(raw.get("runs", "?")))
|
|
59
|
+
table.add_row("[bold]Work OK[/]", f"{(raw.get('work_ok_rate', 0) or 0) * 100:.1f}%")
|
|
60
|
+
table.add_row("[bold]Overclaim[/]", f"{(raw.get('overclaim_rate', 0) or 0) * 100:.1f}%")
|
|
61
|
+
table.add_row("[bold]Composer tokens[/]", f"{raw.get('composer_tokens_total', 0):,}")
|
|
62
|
+
opus = raw.get("opus", {}) or {}
|
|
63
|
+
if opus:
|
|
64
|
+
table.add_row("[bold]Opus tokens[/]", f"{opus.get('opus_tokens_total', 0):,}")
|
|
65
|
+
console.print(table)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@app.command()
|
|
69
|
+
def reconcile():
|
|
70
|
+
"""Run reconcile.sh against STATE.md claims."""
|
|
71
|
+
result = api_post("/api/state/reconcile")
|
|
72
|
+
d = result.get("discrepancies", "?")
|
|
73
|
+
c = result.get("checked", "?")
|
|
74
|
+
color = "green" if d == 0 else "red"
|
|
75
|
+
console.print(f"[{color}]Checked {c} claims, {d} discrepancies[/]")
|
|
76
|
+
|
cli/commands/tasks.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Task commands: submit, list, show, cancel, logs."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from ..client import api_get, api_post
|
|
10
|
+
from ..display.formatting import status_icon, status_color
|
|
11
|
+
|
|
12
|
+
app = typer.Typer()
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@app.command()
|
|
17
|
+
def submit(spec_file: str = typer.Argument(..., help="Path to task spec JSON")):
|
|
18
|
+
"""Submit a task specification."""
|
|
19
|
+
result = api_post("/api/tasks", {"spec_file": spec_file})
|
|
20
|
+
console.print(f"[green]Submitted[/] {result['task_id']} → {result['status']}")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@app.command()
|
|
24
|
+
def list(
|
|
25
|
+
status: str = typer.Option(None, help="Filter by status"),
|
|
26
|
+
class_: str = typer.Option(None, "--class", help="Filter by class"),
|
|
27
|
+
):
|
|
28
|
+
"""List tasks."""
|
|
29
|
+
params = {}
|
|
30
|
+
if status:
|
|
31
|
+
params["status"] = status
|
|
32
|
+
if class_:
|
|
33
|
+
params["class_"] = class_
|
|
34
|
+
data = api_get("/api/tasks", **params)
|
|
35
|
+
table = Table(title="Tasks")
|
|
36
|
+
table.add_column("Task ID")
|
|
37
|
+
table.add_column("Status")
|
|
38
|
+
table.add_column("Submitted")
|
|
39
|
+
for t in data:
|
|
40
|
+
s = t["status"]
|
|
41
|
+
table.add_row(
|
|
42
|
+
t["task_id"],
|
|
43
|
+
f"[{status_color(s)}]{status_icon(s)} {s}[/]",
|
|
44
|
+
t.get("submitted_at", "?"),
|
|
45
|
+
)
|
|
46
|
+
console.print(table)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.command()
|
|
50
|
+
def show(task_id: str = typer.Argument(...), diff: bool = typer.Option(False, "--diff")):
|
|
51
|
+
"""Show task details."""
|
|
52
|
+
data = api_get(f"/api/tasks/{task_id}")
|
|
53
|
+
console.print(f"[bold]Task:[/] {data['task_id']}")
|
|
54
|
+
console.print(f"[bold]Status:[/] [{status_color(data['status'])}]{data['status']}[/]")
|
|
55
|
+
if data.get("verdict"):
|
|
56
|
+
console.print(f"[bold]Verdict:[/] {data['verdict']}")
|
|
57
|
+
if data.get("landed_sha"):
|
|
58
|
+
console.print(f"[bold]Landed:[/] {data['landed_sha']}")
|
|
59
|
+
console.print(f"[bold]Iterations:[/] {len(data['iterations'])}")
|
|
60
|
+
if diff and data.get("diff"):
|
|
61
|
+
console.print(f"\n[bold cyan]Diff:[/]")
|
|
62
|
+
console.print(data["diff"])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@app.command()
|
|
66
|
+
def cancel(task_id: str = typer.Argument(...)):
|
|
67
|
+
"""Cancel a running or queued task."""
|
|
68
|
+
result = api_post(f"/api/tasks/{task_id}/cancel")
|
|
69
|
+
console.print(f"[yellow]Cancelled[/] {result['task_id']}")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@app.command()
|
|
73
|
+
def logs(task_id: str = typer.Argument(...), follow: bool = typer.Option(False, "-f", help="Follow live (WebSocket)")):
|
|
74
|
+
"""View per-task log."""
|
|
75
|
+
import asyncio
|
|
76
|
+
from ..client import get_ws_url
|
|
77
|
+
|
|
78
|
+
if follow:
|
|
79
|
+
import websockets
|
|
80
|
+
|
|
81
|
+
async def _follow():
|
|
82
|
+
url = get_ws_url(f"/ws/tasks/{task_id}")
|
|
83
|
+
async with websockets.connect(url) as ws:
|
|
84
|
+
async for msg in ws:
|
|
85
|
+
if isinstance(msg, bytes):
|
|
86
|
+
console.print(str(msg, "utf-8"), end="")
|
|
87
|
+
elif isinstance(msg, str):
|
|
88
|
+
try:
|
|
89
|
+
data = json.loads(msg)
|
|
90
|
+
if data.get("done"):
|
|
91
|
+
console.print(f"\n[bold]Task finished: {data['status']}[/]")
|
|
92
|
+
break
|
|
93
|
+
except json.JSONDecodeError:
|
|
94
|
+
console.print(msg, end="")
|
|
95
|
+
asyncio.run(_follow())
|
|
96
|
+
else:
|
|
97
|
+
# Read the log file directly
|
|
98
|
+
from pathlib import Path
|
|
99
|
+
from server.config import LOGS_DIR
|
|
100
|
+
log_path = LOGS_DIR / f"{task_id}.log"
|
|
101
|
+
if log_path.exists():
|
|
102
|
+
console.print(log_path.read_text())
|
|
103
|
+
else:
|
|
104
|
+
console.print(f"[dim]No log for {task_id}[/]")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Display formatting helpers."""
|
|
2
|
+
|
|
3
|
+
STATUS_ICONS = {
|
|
4
|
+
"queued": "◌",
|
|
5
|
+
"running": "●",
|
|
6
|
+
"accepted": "✓",
|
|
7
|
+
"rejected": "✗",
|
|
8
|
+
"blocked": "⊘",
|
|
9
|
+
"cancelled": "○",
|
|
10
|
+
"infra_error": "⚠",
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
STATUS_COLORS = {
|
|
14
|
+
"queued": "dim",
|
|
15
|
+
"running": "blue",
|
|
16
|
+
"accepted": "green",
|
|
17
|
+
"rejected": "red",
|
|
18
|
+
"blocked": "yellow",
|
|
19
|
+
"cancelled": "dim",
|
|
20
|
+
"infra_error": "red",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def status_icon(status: str) -> str:
|
|
25
|
+
return STATUS_ICONS.get(status, "?")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def status_color(status: str) -> str:
|
|
29
|
+
return STATUS_COLORS.get(status, "white")
|
cli/main.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""CLI for Ophar — opctl."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from .commands import tasks, metrics, system, settings
|
|
6
|
+
|
|
7
|
+
app = typer.Typer(help="Ophar CLI (opctl)")
|
|
8
|
+
app.add_typer(tasks.app, name="tasks", help="Submit and manage tasks")
|
|
9
|
+
app.add_typer(metrics.app, name="metrics", help="Metrics dashboard")
|
|
10
|
+
app.add_typer(system.app, name="system", help="Server lifecycle and reconcile")
|
|
11
|
+
|
|
12
|
+
# Flat commands
|
|
13
|
+
app.command()(settings.settings_get)
|
|
14
|
+
app.command(name="settings-set")(settings.settings_set)
|
|
15
|
+
app.command(name="serve")(system.serve)
|
|
16
|
+
app.command(name="stop")(system.stop)
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
app()
|
ophar/__init__.py
ADDED
ophar/_bundle/AGENTS.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Executor boundaries (Composer)
|
|
2
|
+
|
|
3
|
+
You are the **executor**. You run headless inside a git worktree of the target repo and
|
|
4
|
+
your output is **untrusted** — an independent verifier re-derives ground truth (git diff,
|
|
5
|
+
tests, typecheck/lint, hidden checks) and decides accept/iterate. Honesty and staying in
|
|
6
|
+
bounds are what make you useful; gaming the visible signal is always caught and counts
|
|
7
|
+
against you.
|
|
8
|
+
|
|
9
|
+
## Scope
|
|
10
|
+
- Modify **only** files inside the task's allowed scope. Touching anything outside it fails
|
|
11
|
+
verification (the change is rejected and logged as a scope violation).
|
|
12
|
+
- Stay inside the worktree. Do not reach outside the workspace.
|
|
13
|
+
|
|
14
|
+
## How to make tests pass
|
|
15
|
+
- Fix the **real cause**. Do **not** delete, skip, comment out, or weaken tests/assertions,
|
|
16
|
+
and do not special-case the exact inputs shown in the visible tests.
|
|
17
|
+
- Correctness is judged on **general behavior**, from a different angle than the visible
|
|
18
|
+
tests. A solution tuned to the shown cases fails the hidden checks.
|
|
19
|
+
- Do not search for, read, or try to reconstruct hidden/held-out checks. They are not in
|
|
20
|
+
your workspace during the run; looking for them is wasted effort and a trust red flag.
|
|
21
|
+
|
|
22
|
+
## Forbidden / destructive
|
|
23
|
+
- No `rm -rf` outside the workspace, no `git push`, no `git reset --hard` on shared refs,
|
|
24
|
+
no rewriting git history, no network exfiltration, no editing CI/secrets.
|
|
25
|
+
- Prefer the smallest diff that satisfies the acceptance criterion.
|
|
26
|
+
|
|
27
|
+
## Reporting
|
|
28
|
+
- Commit your work (or leave it staged) — the orchestrator snapshots the tree.
|
|
29
|
+
- Report **honestly**. If you are blocked or unsure, say so. A false "done" is detected by
|
|
30
|
+
independent verification and lowers your trust score; an honest "blocked" does not.
|
ophar/_bundle/CLAUDE.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Orchestrator delegation discipline (Opus)
|
|
2
|
+
|
|
3
|
+
You are **Opus, the orchestrator** of the Opus→Composer pipeline. Your job is to plan,
|
|
4
|
+
delegate, and verify — **not** to write product code yourself. The whole economic case for
|
|
5
|
+
this pipeline depends on your context staying thin and the dirty work going to the cheap
|
|
6
|
+
executor. Read `orchestrator-pipeline-plan.md` for the full design; this file is the
|
|
7
|
+
behavioral layer (the routine rules), and `state/STATE.md` is the live state.
|
|
8
|
+
|
|
9
|
+
## Session start (before trusting anything)
|
|
10
|
+
- Run `harness/reconcile.sh` FIRST. It checks `state/STATE.md`'s machine-checkable claims
|
|
11
|
+
against git/tests/files/ledger. Until it reports 0 discrepancies, treat the prose as a
|
|
12
|
+
hint, not truth.
|
|
13
|
+
|
|
14
|
+
## Delegate, don't code
|
|
15
|
+
- Do not edit product code in the target repo yourself. Write a task spec and dispatch the
|
|
16
|
+
executor. Your edits are limited to the harness, specs, and `state/`.
|
|
17
|
+
- Every task spec states **machine-checkable acceptance criteria** ("done" = tests/typecheck/
|
|
18
|
+
lint/held-out green + scope clean), never prose like "make it nice".
|
|
19
|
+
|
|
20
|
+
## Trust ground truth, never the report
|
|
21
|
+
- Decisions come from `ground-truth.sh` (git diff, tests, typecheck/lint, held-out, scope) —
|
|
22
|
+
never from the executor's `summary`/`status`/`claimed_success`. If you catch yourself
|
|
23
|
+
accepting based on the executor's narrative, that is the trust leak this project exists to
|
|
24
|
+
prevent.
|
|
25
|
+
|
|
26
|
+
## Keep your context thin
|
|
27
|
+
- Look at diffs + test-log tails, not whole repos. Do not read files wholesale.
|
|
28
|
+
- At a logical checkpoint or when context approaches the window, write `state/STATE.md` and
|
|
29
|
+
start a fresh session that rehydrates from disk + reconcile.
|
|
30
|
+
|
|
31
|
+
## State authorship
|
|
32
|
+
- You are the sole author of `state/STATE.md` and the ledger. Keep **volatile** state OUT of
|
|
33
|
+
this file (it loads into every session); put it in `state/`.
|
|
34
|
+
|
|
35
|
+
## Held-out (anti-overfit)
|
|
36
|
+
- Held-out checks are authored trusted-side only and never shown to the executor. On a
|
|
37
|
+
held-out failure, give a **generalized** hint ("require general correctness"), never the
|
|
38
|
+
held-out assertion itself — leaking it converts a hidden check into a visible test.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# checkpoint.sh
|
|
4
|
+
#
|
|
5
|
+
# The DETERMINISTIC checkpoint step (§4 / §8.2) — the half the plan audit found missing.
|
|
6
|
+
# STATE.md is the soft state Opus AUTHORS (prose: plan, decisions, why). This is the glue
|
|
7
|
+
# half: a zero-Opus-token, machine-written checkpoint of what git/ledger/metrics already
|
|
8
|
+
# know, plus the §8.2 critical-point TRIGGERS held as data instead of "by feel". It is meant
|
|
9
|
+
# to run BEFORE a compaction / session switch, so a fresh session rehydrates from
|
|
10
|
+
# state/checkpoint.json + state/reconcile.json + STATE.md.
|
|
11
|
+
#
|
|
12
|
+
# It NEVER rewrites STATE.md (that is Opus's authored prose). It only emits a structured
|
|
13
|
+
# checkpoint and a recommendation.
|
|
14
|
+
#
|
|
15
|
+
# §8.2 triggers (any -> checkpoint + fresh session):
|
|
16
|
+
# - context near threshold (proxy: max Opus brief_chars from opus-metrics.jsonl)
|
|
17
|
+
# - logical milestone closed (last ledger event is accept/reject/block)
|
|
18
|
+
# - quality degradation (a task hit >= DEGRADE_ITERS iterate rounds)
|
|
19
|
+
# - several iterate/reject in a row (trailing run >= CONSECUTIVE_LIMIT — context may be dirty)
|
|
20
|
+
#
|
|
21
|
+
# Emits state/checkpoint.json. Exit 10 if a checkpoint+fresh-session is recommended, else 0.
|
|
22
|
+
#
|
|
23
|
+
# Env:
|
|
24
|
+
# LEDGER, OPUS_METRICS_LOG, CHECKPOINT_OUT — override the input/output paths (tests use this).
|
|
25
|
+
# CONTEXT_PROXY_LIMIT (24000), DEGRADE_ITERS (3), CONSECUTIVE_LIMIT (2) — trigger thresholds.
|
|
26
|
+
#
|
|
27
|
+
set -uo pipefail
|
|
28
|
+
HARNESS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
29
|
+
ROOT="$(cd "$HARNESS_DIR/.." && pwd)"
|
|
30
|
+
|
|
31
|
+
LEDGER="${LEDGER:-$ROOT/state/ledger.jsonl}"
|
|
32
|
+
OPUS_METRICS="${OPUS_METRICS_LOG:-$ROOT/runs/opus-metrics.jsonl}"
|
|
33
|
+
OUT="${CHECKPOINT_OUT:-$ROOT/state/checkpoint.json}"
|
|
34
|
+
CONTEXT_PROXY_LIMIT="${CONTEXT_PROXY_LIMIT:-24000}"
|
|
35
|
+
DEGRADE_ITERS="${DEGRADE_ITERS:-3}"
|
|
36
|
+
CONSECUTIVE_LIMIT="${CONSECUTIVE_LIMIT:-2}"
|
|
37
|
+
|
|
38
|
+
[[ -f "$LEDGER" ]] || { echo "checkpoint: ledger not found: $LEDGER" >&2; jq -n '{error:"no ledger"}' | tee "$OUT" >/dev/null; exit 2; }
|
|
39
|
+
|
|
40
|
+
# /context-growth proxy: the largest brief we ever fed Opus (§3). A real /context number
|
|
41
|
+
# would be better; this is the deterministic stand-in until that is wired.
|
|
42
|
+
CTX=0
|
|
43
|
+
if [[ -f "$OPUS_METRICS" ]]; then
|
|
44
|
+
CTX="$(jq -s 'map(.brief_chars // 0) | (max // 0)' "$OPUS_METRICS" 2>/dev/null || echo 0)"
|
|
45
|
+
fi
|
|
46
|
+
[[ "$CTX" =~ ^[0-9]+$ ]] || CTX=0
|
|
47
|
+
|
|
48
|
+
mkdir -p "$(dirname "$OUT")"
|
|
49
|
+
jq -s \
|
|
50
|
+
--arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
51
|
+
--argjson ctx "$CTX" \
|
|
52
|
+
--argjson ctxlim "$CONTEXT_PROXY_LIMIT" \
|
|
53
|
+
--argjson degit "$DEGRADE_ITERS" \
|
|
54
|
+
--argjson conlim "$CONSECUTIVE_LIMIT" '
|
|
55
|
+
. as $ev
|
|
56
|
+
| ($ev | length) as $n
|
|
57
|
+
| (($ev[-1].event) // "") as $last
|
|
58
|
+
| ($ev | map(select(.event=="iterate")) | group_by(.task_id)
|
|
59
|
+
| map({task:.[0].task_id, iters:length})) as $iters
|
|
60
|
+
| ($ev | group_by(.task_id)
|
|
61
|
+
| map({task:.[0].task_id, latest:(sort_by(.ts)|last.event), events:length})) as $tasks
|
|
62
|
+
# trailing run of iterate/reject (count from the end until a non-{iterate,reject} event)
|
|
63
|
+
| ([ ($ev|reverse)[].event ]) as $rev
|
|
64
|
+
| (reduce range(0; ($rev|length)) as $i ({stop:false,c:0};
|
|
65
|
+
if .stop then .
|
|
66
|
+
elif ($rev[$i]=="iterate" or $rev[$i]=="reject") then {stop:false,c:(.c+1)}
|
|
67
|
+
else {stop:true,c:.c} end) | .c) as $consec
|
|
68
|
+
| (($iters | map(.iters) | max) // 0) as $maxit
|
|
69
|
+
| {
|
|
70
|
+
ts: $ts,
|
|
71
|
+
ledger_events: $n,
|
|
72
|
+
last_event: $last,
|
|
73
|
+
tasks: $tasks,
|
|
74
|
+
iterations_by_task: $iters,
|
|
75
|
+
ledger_tail: ($ev[-8:] | map({ts, event, task_id})),
|
|
76
|
+
signals: {
|
|
77
|
+
context_proxy_chars: $ctx,
|
|
78
|
+
context_proxy_limit: $ctxlim,
|
|
79
|
+
context_near_threshold: ($ctx >= $ctxlim),
|
|
80
|
+
milestone_closed: ($last=="accept" or $last=="reject" or $last=="block"),
|
|
81
|
+
consecutive_iterate_reject: $consec,
|
|
82
|
+
consecutive_limit: $conlim,
|
|
83
|
+
consecutive_trip: ($consec >= $conlim),
|
|
84
|
+
max_iterations_on_a_task: $maxit,
|
|
85
|
+
degrade_iters_limit: $degit,
|
|
86
|
+
quality_degradation: ($maxit >= $degit)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
| .checkpoint_recommended = (
|
|
90
|
+
.signals.context_near_threshold or .signals.milestone_closed
|
|
91
|
+
or .signals.consecutive_trip or .signals.quality_degradation)
|
|
92
|
+
| .reasons = [
|
|
93
|
+
(if .signals.context_near_threshold then "context proxy \(.signals.context_proxy_chars) >= \(.signals.context_proxy_limit) chars (near window threshold)" else empty end),
|
|
94
|
+
(if .signals.milestone_closed then "logical milestone closed (last event: \(.last_event))" else empty end),
|
|
95
|
+
(if .signals.consecutive_trip then "\(.signals.consecutive_iterate_reject) consecutive iterate/reject (context may be polluted)" else empty end),
|
|
96
|
+
(if .signals.quality_degradation then "a task reached \(.signals.max_iterations_on_a_task) iterate rounds (quality degradation)" else empty end)
|
|
97
|
+
]
|
|
98
|
+
' "$LEDGER" | tee "$OUT"
|
|
99
|
+
|
|
100
|
+
REC="$(jq -r '.checkpoint_recommended' "$OUT" 2>/dev/null)"
|
|
101
|
+
if [[ "$REC" == "true" ]]; then
|
|
102
|
+
echo "checkpoint: RECOMMENDED — $(jq -rc '.reasons' "$OUT")" >&2
|
|
103
|
+
exit 10
|
|
104
|
+
fi
|
|
105
|
+
echo "checkpoint: no critical-point trigger; continue current session" >&2
|
|
106
|
+
exit 0
|