avatar-runtime 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avatar/__init__.py +45 -0
- avatar/api/__init__.py +10 -0
- avatar/api/app.py +106 -0
- avatar/api/ratelimit.py +36 -0
- avatar/api/routes.py +350 -0
- avatar/cli.py +194 -0
- avatar/config.py +119 -0
- avatar/dashboard/index.html +301 -0
- avatar/dashboard/landing.html +352 -0
- avatar/demo.py +168 -0
- avatar/engine/__init__.py +33 -0
- avatar/engine/_tool_runner.py +44 -0
- avatar/engine/budget.py +23 -0
- avatar/engine/db.py +98 -0
- avatar/engine/idempotency.py +42 -0
- avatar/engine/models.py +180 -0
- avatar/engine/policy.py +42 -0
- avatar/engine/registry.py +144 -0
- avatar/engine/replay.py +121 -0
- avatar/engine/runtime.py +411 -0
- avatar/engine/schema.sql +75 -0
- avatar/engine/tools.py +126 -0
- avatar/engine/worker.py +237 -0
- avatar/sdk/__init__.py +193 -0
- avatar_runtime-0.1.0.dist-info/METADATA +373 -0
- avatar_runtime-0.1.0.dist-info/RECORD +31 -0
- avatar_runtime-0.1.0.dist-info/WHEEL +5 -0
- avatar_runtime-0.1.0.dist-info/entry_points.txt +2 -0
- avatar_runtime-0.1.0.dist-info/licenses/LICENSE +201 -0
- avatar_runtime-0.1.0.dist-info/licenses/NOTICE +7 -0
- avatar_runtime-0.1.0.dist-info/top_level.txt +1 -0
avatar/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 2026 Avatar Runtime Authors
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Avatar — a durable execution engine for AI agents.
|
|
5
|
+
|
|
6
|
+
"Temporal for AI agents." A crash-safe, append-only, replayable state machine
|
|
7
|
+
for an LLM agent's ``plan → tool → observe → commit`` loop, backed entirely by
|
|
8
|
+
Postgres. A worker can die at any point; another resumes from the ledger and no
|
|
9
|
+
tool side effect is dispatched twice from Avatar's side.
|
|
10
|
+
|
|
11
|
+
The developer-facing surface lives here::
|
|
12
|
+
|
|
13
|
+
from avatar import Avatar, agent, tool, Plan, State, ToolCall
|
|
14
|
+
|
|
15
|
+
See ``avatar.sdk`` for the client and decorators, ``avatar.engine`` for the
|
|
16
|
+
durable core, and ``avatar.api`` for the control API.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
__version__ = "0.1.0"
|
|
22
|
+
|
|
23
|
+
# Re-export the full documented SDK surface so the README/sdk examples
|
|
24
|
+
# (``from avatar import Avatar, tool, Plan, ToolCall``) work off the top-level
|
|
25
|
+
# package, not just ``avatar.sdk``.
|
|
26
|
+
from avatar.sdk import ( # noqa: E402
|
|
27
|
+
Avatar,
|
|
28
|
+
Plan,
|
|
29
|
+
State,
|
|
30
|
+
ToolCall,
|
|
31
|
+
agent,
|
|
32
|
+
current_idempotency_key,
|
|
33
|
+
tool,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"Avatar",
|
|
38
|
+
"agent",
|
|
39
|
+
"tool",
|
|
40
|
+
"Plan",
|
|
41
|
+
"State",
|
|
42
|
+
"ToolCall",
|
|
43
|
+
"current_idempotency_key",
|
|
44
|
+
"__version__",
|
|
45
|
+
]
|
avatar/api/__init__.py
ADDED
avatar/api/app.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Copyright 2026 Avatar Runtime Authors
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""FastAPI application factory: lifespan (DB), single-key auth, routes, dashboard."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import contextlib
|
|
9
|
+
import hmac
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from fastapi import FastAPI, Header, HTTPException, Request
|
|
13
|
+
from fastapi.responses import HTMLResponse
|
|
14
|
+
from fastapi.staticfiles import StaticFiles
|
|
15
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
|
+
|
|
17
|
+
from avatar.api.ratelimit import TokenBucket
|
|
18
|
+
from avatar.config import Settings, check_startup_safety, load_settings
|
|
19
|
+
from avatar.engine.db import create_engine, create_session_factory, init_db
|
|
20
|
+
|
|
21
|
+
# Ships inside the package (avatar/dashboard/) so it is present on a `pip
|
|
22
|
+
# install` as well as in the Docker image — not just in a source checkout.
|
|
23
|
+
_DASHBOARD_DIR = Path(__file__).resolve().parent.parent / "dashboard"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_app(settings: Settings | None = None) -> FastAPI:
|
|
27
|
+
settings = settings or load_settings()
|
|
28
|
+
# Fail fast on an insecure production configuration (default API key, etc.).
|
|
29
|
+
check_startup_safety(settings)
|
|
30
|
+
|
|
31
|
+
@contextlib.asynccontextmanager
|
|
32
|
+
async def lifespan(app: FastAPI):
|
|
33
|
+
engine = create_engine(settings.database_url, settings)
|
|
34
|
+
if settings.is_sqlite:
|
|
35
|
+
await init_db(engine)
|
|
36
|
+
app.state.engine = engine
|
|
37
|
+
app.state.session_factory = create_session_factory(engine)
|
|
38
|
+
app.state.settings = settings
|
|
39
|
+
app.state.rate_limiter = TokenBucket(
|
|
40
|
+
settings.rate_limit_per_second, settings.rate_limit_burst
|
|
41
|
+
)
|
|
42
|
+
# Load developer agents/tools if configured (harmless if none).
|
|
43
|
+
with contextlib.suppress(Exception):
|
|
44
|
+
from avatar.engine.registry import load_app
|
|
45
|
+
|
|
46
|
+
load_app()
|
|
47
|
+
yield
|
|
48
|
+
await engine.dispose()
|
|
49
|
+
|
|
50
|
+
app = FastAPI(title="Avatar", version="0.1.0", lifespan=lifespan)
|
|
51
|
+
|
|
52
|
+
from avatar.api.routes import router
|
|
53
|
+
|
|
54
|
+
app.include_router(router)
|
|
55
|
+
|
|
56
|
+
# Marketing landing at `/` (static), the developer dashboard at `/app`.
|
|
57
|
+
if _DASHBOARD_DIR.exists():
|
|
58
|
+
@app.get("/", response_class=HTMLResponse)
|
|
59
|
+
async def landing() -> str:
|
|
60
|
+
landing_file = _DASHBOARD_DIR / "landing.html"
|
|
61
|
+
if landing_file.exists():
|
|
62
|
+
return landing_file.read_text()
|
|
63
|
+
return _dashboard_html(settings)
|
|
64
|
+
|
|
65
|
+
@app.get("/app", response_class=HTMLResponse)
|
|
66
|
+
async def dashboard_index() -> str:
|
|
67
|
+
return _dashboard_html(settings)
|
|
68
|
+
|
|
69
|
+
app.mount(
|
|
70
|
+
"/static", StaticFiles(directory=str(_DASHBOARD_DIR)), name="static"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return app
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _dashboard_html(settings: Settings) -> str:
|
|
77
|
+
"""Render the dashboard. The static API key is injected ONLY in dev mode;
|
|
78
|
+
in production the page ships with no key and prompts the operator for one
|
|
79
|
+
(kept in localStorage), so the key is never embedded in served HTML."""
|
|
80
|
+
html = (_DASHBOARD_DIR / "index.html").read_text()
|
|
81
|
+
injected = settings.api_key if settings.dev_mode else ""
|
|
82
|
+
return html.replace("__AVATAR_API_KEY__", injected)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# --- shared dependencies -----------------------------------------------------
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def require_auth(request: Request, authorization: str = Header(default="")) -> None:
|
|
89
|
+
"""Single static API key. ``Authorization: Bearer <key>``. Nothing else.
|
|
90
|
+
|
|
91
|
+
Uses a constant-time comparison to avoid leaking the key via timing.
|
|
92
|
+
"""
|
|
93
|
+
settings: Settings = request.app.state.settings
|
|
94
|
+
expected = f"Bearer {settings.api_key}"
|
|
95
|
+
if not hmac.compare_digest(authorization, expected):
|
|
96
|
+
raise HTTPException(status_code=401, detail="invalid or missing API key")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
async def get_session(request: Request) -> AsyncSession:
|
|
100
|
+
factory = request.app.state.session_factory
|
|
101
|
+
async with factory() as session:
|
|
102
|
+
yield session
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Re-export for routes module convenience.
|
|
106
|
+
__all__ = ["create_app", "require_auth", "get_session"]
|
avatar/api/ratelimit.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright 2026 Avatar Runtime Authors
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""A tiny in-process token-bucket rate limiter for the control API.
|
|
5
|
+
|
|
6
|
+
Single static key ⇒ a single global bucket is sufficient (per-key == global).
|
|
7
|
+
This guards the write path (enqueue) against a client flooding the queue. It is
|
|
8
|
+
intentionally process-local: with multiple API replicas, set the limit per
|
|
9
|
+
replica or front the API with a gateway. For per-tenant limits, see the
|
|
10
|
+
Avatar Cloud roadmap.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TokenBucket:
|
|
19
|
+
def __init__(self, rate_per_second: float, burst: int) -> None:
|
|
20
|
+
self.rate = max(0.0, rate_per_second)
|
|
21
|
+
self.capacity = max(1, burst)
|
|
22
|
+
self.tokens = float(self.capacity)
|
|
23
|
+
self.updated = time.monotonic()
|
|
24
|
+
|
|
25
|
+
def allow(self, cost: float = 1.0) -> bool:
|
|
26
|
+
"""Consume ``cost`` tokens if available. Returns False when throttled.
|
|
27
|
+
A non-positive rate disables limiting (always allow)."""
|
|
28
|
+
if self.rate <= 0:
|
|
29
|
+
return True
|
|
30
|
+
now = time.monotonic()
|
|
31
|
+
self.tokens = min(self.capacity, self.tokens + (now - self.updated) * self.rate)
|
|
32
|
+
self.updated = now
|
|
33
|
+
if self.tokens >= cost:
|
|
34
|
+
self.tokens -= cost
|
|
35
|
+
return True
|
|
36
|
+
return False
|
avatar/api/routes.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
# Copyright 2026 Avatar Runtime Authors
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Control API endpoints (§ control surface).
|
|
5
|
+
|
|
6
|
+
All ``/v1`` routes require the single static API key. SSE streams the ledger as
|
|
7
|
+
steps commit. The dashboard and SDK are both pure clients of these routes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import json
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Depends, HTTPException, Request
|
|
17
|
+
from fastapi.responses import PlainTextResponse, StreamingResponse
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
from sqlalchemy import func, select
|
|
20
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
21
|
+
|
|
22
|
+
from avatar.api.app import get_session, require_auth
|
|
23
|
+
from avatar.engine.models import (
|
|
24
|
+
RUN_STATUSES,
|
|
25
|
+
TERMINAL_STATUSES,
|
|
26
|
+
AgentRun,
|
|
27
|
+
AgentRunStep,
|
|
28
|
+
Approval,
|
|
29
|
+
utcnow,
|
|
30
|
+
)
|
|
31
|
+
from avatar.engine.replay import fork_run
|
|
32
|
+
|
|
33
|
+
router = APIRouter()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def rate_limit(request: Request) -> None:
|
|
37
|
+
"""Throttle the write path so a client cannot flood the queue."""
|
|
38
|
+
limiter = getattr(request.app.state, "rate_limiter", None)
|
|
39
|
+
if limiter is not None and not limiter.allow():
|
|
40
|
+
raise HTTPException(
|
|
41
|
+
status_code=429,
|
|
42
|
+
detail="rate limit exceeded",
|
|
43
|
+
headers={"Retry-After": "1"},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# --- request/response models -------------------------------------------------
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CreateRun(BaseModel):
|
|
51
|
+
agent_ref: str
|
|
52
|
+
input: dict[str, Any] = Field(default_factory=dict)
|
|
53
|
+
budget_cap_cents: int | None = None
|
|
54
|
+
idempotency_key: str | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ReplayReq(BaseModel):
|
|
58
|
+
from_seq: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _run_summary(r: AgentRun) -> dict:
|
|
62
|
+
return {
|
|
63
|
+
"id": r.id,
|
|
64
|
+
"agent_ref": r.agent_ref,
|
|
65
|
+
"status": r.status,
|
|
66
|
+
"attempt": r.attempt,
|
|
67
|
+
"cursor_seq": r.cursor_seq,
|
|
68
|
+
"budget_cap_cents": r.budget_cap_cents,
|
|
69
|
+
"budget_used_cents": r.budget_used_cents,
|
|
70
|
+
"error_class": r.error_class,
|
|
71
|
+
"output": r.output,
|
|
72
|
+
"forked_from": r.forked_from,
|
|
73
|
+
"fork_seq": r.fork_seq,
|
|
74
|
+
"created_at": r.created_at.isoformat() if r.created_at else None,
|
|
75
|
+
"updated_at": r.updated_at.isoformat() if r.updated_at else None,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _step_dict(s: AgentRunStep) -> dict:
|
|
80
|
+
return {
|
|
81
|
+
"seq": s.seq,
|
|
82
|
+
"type": s.type,
|
|
83
|
+
"payload": s.payload,
|
|
84
|
+
"tool_call_id": s.tool_call_id,
|
|
85
|
+
"idempotency_key": s.idempotency_key,
|
|
86
|
+
"cost_cents": s.cost_cents,
|
|
87
|
+
"worker_id": s.worker_id,
|
|
88
|
+
"attempt": s.attempt,
|
|
89
|
+
"created_at": s.created_at.isoformat() if s.created_at else None,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def _get_run(db: AsyncSession, run_id: str) -> AgentRun:
|
|
94
|
+
run = (
|
|
95
|
+
await db.execute(select(AgentRun).where(AgentRun.id == run_id))
|
|
96
|
+
).scalar_one_or_none()
|
|
97
|
+
if run is None:
|
|
98
|
+
raise HTTPException(status_code=404, detail="run not found")
|
|
99
|
+
return run
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# --- endpoints ---------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@router.post(
|
|
106
|
+
"/v1/runs",
|
|
107
|
+
status_code=202,
|
|
108
|
+
dependencies=[Depends(require_auth), Depends(rate_limit)],
|
|
109
|
+
)
|
|
110
|
+
async def create_run(
|
|
111
|
+
body: CreateRun, request: Request, db: AsyncSession = Depends(get_session)
|
|
112
|
+
) -> dict:
|
|
113
|
+
if body.idempotency_key:
|
|
114
|
+
existing = (
|
|
115
|
+
await db.execute(
|
|
116
|
+
select(AgentRun).where(AgentRun.idempotency_key == body.idempotency_key)
|
|
117
|
+
)
|
|
118
|
+
).scalar_one_or_none()
|
|
119
|
+
if existing is not None:
|
|
120
|
+
return {"id": existing.id, "status": existing.status}
|
|
121
|
+
# Backpressure: refuse new work when the queue is already saturated.
|
|
122
|
+
cap = request.app.state.settings.max_queue_depth
|
|
123
|
+
if cap > 0:
|
|
124
|
+
queued = (
|
|
125
|
+
await db.execute(
|
|
126
|
+
select(func.count())
|
|
127
|
+
.select_from(AgentRun)
|
|
128
|
+
.where(AgentRun.status == "queued")
|
|
129
|
+
)
|
|
130
|
+
).scalar_one()
|
|
131
|
+
if queued >= cap:
|
|
132
|
+
raise HTTPException(
|
|
133
|
+
status_code=429,
|
|
134
|
+
detail=f"queue is full ({queued} queued, cap {cap})",
|
|
135
|
+
headers={"Retry-After": "5"},
|
|
136
|
+
)
|
|
137
|
+
run = AgentRun(
|
|
138
|
+
agent_ref=body.agent_ref,
|
|
139
|
+
input=body.input,
|
|
140
|
+
budget_cap_cents=body.budget_cap_cents,
|
|
141
|
+
idempotency_key=body.idempotency_key,
|
|
142
|
+
status="queued",
|
|
143
|
+
)
|
|
144
|
+
db.add(run)
|
|
145
|
+
await db.commit()
|
|
146
|
+
return {"id": run.id, "status": run.status}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@router.get("/v1/runs", dependencies=[Depends(require_auth)])
|
|
150
|
+
async def list_runs(
|
|
151
|
+
status: str | None = None,
|
|
152
|
+
limit: int = 50,
|
|
153
|
+
db: AsyncSession = Depends(get_session),
|
|
154
|
+
) -> dict:
|
|
155
|
+
q = select(AgentRun).order_by(AgentRun.created_at.desc()).limit(min(limit, 200))
|
|
156
|
+
if status:
|
|
157
|
+
q = q.where(AgentRun.status == status)
|
|
158
|
+
rows = (await db.execute(q)).scalars().all()
|
|
159
|
+
return {"runs": [_run_summary(r) for r in rows]}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@router.get("/v1/runs/{run_id}", dependencies=[Depends(require_auth)])
|
|
163
|
+
async def get_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
|
|
164
|
+
return _run_summary(await _get_run(db, run_id))
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@router.get("/v1/runs/{run_id}/steps", dependencies=[Depends(require_auth)])
|
|
168
|
+
async def get_steps(run_id: str, db: AsyncSession = Depends(get_session)) -> list[dict]:
|
|
169
|
+
await _get_run(db, run_id)
|
|
170
|
+
rows = (
|
|
171
|
+
await db.execute(
|
|
172
|
+
select(AgentRunStep)
|
|
173
|
+
.where(AgentRunStep.run_id == run_id)
|
|
174
|
+
.order_by(AgentRunStep.seq)
|
|
175
|
+
)
|
|
176
|
+
).scalars().all()
|
|
177
|
+
return [_step_dict(s) for s in rows]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@router.post("/v1/runs/{run_id}/cancel", dependencies=[Depends(require_auth)])
|
|
181
|
+
async def cancel_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
|
|
182
|
+
run = await _get_run(db, run_id)
|
|
183
|
+
if run.status in TERMINAL_STATUSES:
|
|
184
|
+
return _run_summary(run)
|
|
185
|
+
run.cancel_requested = True
|
|
186
|
+
# If it never started, cancel immediately.
|
|
187
|
+
if run.status == "queued":
|
|
188
|
+
run.status = "failed"
|
|
189
|
+
run.error_class = "cancelled"
|
|
190
|
+
run.output = {"error": "cancelled before start"}
|
|
191
|
+
await db.commit()
|
|
192
|
+
return _run_summary(run)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@router.post("/v1/runs/{run_id}/approve", dependencies=[Depends(require_auth)])
|
|
196
|
+
async def approve_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
|
|
197
|
+
return await _resolve_approval(db, run_id, "approved")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@router.post("/v1/runs/{run_id}/reject", dependencies=[Depends(require_auth)])
|
|
201
|
+
async def reject_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
|
|
202
|
+
return await _resolve_approval(db, run_id, "rejected")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
async def _resolve_approval(db: AsyncSession, run_id: str, decision: str) -> dict:
|
|
206
|
+
run = await _get_run(db, run_id)
|
|
207
|
+
if run.status != "approval_wait":
|
|
208
|
+
raise HTTPException(status_code=409, detail="run is not awaiting approval")
|
|
209
|
+
appr = (
|
|
210
|
+
await db.execute(
|
|
211
|
+
select(Approval)
|
|
212
|
+
.where(Approval.run_id == run_id, Approval.status == "pending")
|
|
213
|
+
.order_by(Approval.created_at.desc())
|
|
214
|
+
)
|
|
215
|
+
).scalars().first()
|
|
216
|
+
if appr is None:
|
|
217
|
+
raise HTTPException(status_code=409, detail="no pending approval")
|
|
218
|
+
appr.status = decision
|
|
219
|
+
appr.decided_at = utcnow()
|
|
220
|
+
# Re-queue so a worker resumes and either dispatches or records the rejection.
|
|
221
|
+
run.status = "queued"
|
|
222
|
+
run.lease_owner = None
|
|
223
|
+
run.lease_expires_at = None
|
|
224
|
+
await db.commit()
|
|
225
|
+
return _run_summary(run)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@router.post("/v1/runs/{run_id}/replay", dependencies=[Depends(require_auth)])
|
|
229
|
+
async def replay_run(
|
|
230
|
+
run_id: str, body: ReplayReq, db: AsyncSession = Depends(get_session)
|
|
231
|
+
) -> dict:
|
|
232
|
+
source = await _get_run(db, run_id)
|
|
233
|
+
new_run = await fork_run(db, source, body.from_seq)
|
|
234
|
+
return {"id": new_run.id, "status": new_run.status,
|
|
235
|
+
"forked_from": run_id, "fork_seq": body.from_seq}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@router.get("/v1/runs/{run_id}/stream", dependencies=[Depends(require_auth)])
|
|
239
|
+
async def stream_run(run_id: str, request: Request) -> StreamingResponse:
|
|
240
|
+
factory = request.app.state.session_factory
|
|
241
|
+
|
|
242
|
+
async def event_gen():
|
|
243
|
+
last_seq = 0
|
|
244
|
+
# Replay existing steps first, then tail new ones.
|
|
245
|
+
while True:
|
|
246
|
+
if await request.is_disconnected():
|
|
247
|
+
return
|
|
248
|
+
async with factory() as db:
|
|
249
|
+
run = (
|
|
250
|
+
await db.execute(select(AgentRun).where(AgentRun.id == run_id))
|
|
251
|
+
).scalar_one_or_none()
|
|
252
|
+
if run is None:
|
|
253
|
+
yield _sse({"event": "error", "detail": "run not found"})
|
|
254
|
+
return
|
|
255
|
+
steps = (
|
|
256
|
+
await db.execute(
|
|
257
|
+
select(AgentRunStep)
|
|
258
|
+
.where(AgentRunStep.run_id == run_id, AgentRunStep.seq > last_seq)
|
|
259
|
+
.order_by(AgentRunStep.seq)
|
|
260
|
+
)
|
|
261
|
+
).scalars().all()
|
|
262
|
+
for s in steps:
|
|
263
|
+
last_seq = s.seq
|
|
264
|
+
yield _sse({"event": "step", **_step_dict(s)})
|
|
265
|
+
if run.status in TERMINAL_STATUSES or run.status == "approval_wait":
|
|
266
|
+
yield _sse({"event": "status", "status": run.status,
|
|
267
|
+
"output": run.output})
|
|
268
|
+
return
|
|
269
|
+
await asyncio.sleep(0.3)
|
|
270
|
+
|
|
271
|
+
return StreamingResponse(event_gen(), media_type="text/event-stream")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _sse(obj: dict) -> str:
|
|
275
|
+
return f"data: {json.dumps(obj, default=str)}\n\n"
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@router.get("/healthz")
|
|
279
|
+
async def healthz() -> dict:
|
|
280
|
+
return {"ok": True}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@router.get("/readyz")
|
|
284
|
+
async def readyz(request: Request) -> dict:
|
|
285
|
+
factory = request.app.state.session_factory
|
|
286
|
+
try:
|
|
287
|
+
async with factory() as db:
|
|
288
|
+
await db.execute(select(AgentRun.id).limit(1))
|
|
289
|
+
return {"ready": True}
|
|
290
|
+
except Exception as exc: # noqa: BLE001
|
|
291
|
+
raise HTTPException(status_code=503, detail=f"db not ready: {exc}") from exc
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
async def _fleet_stats(db: AsyncSession) -> dict:
|
|
295
|
+
"""Operational snapshot: runs by status, dead count, oldest-queued age."""
|
|
296
|
+
rows = (
|
|
297
|
+
await db.execute(
|
|
298
|
+
select(AgentRun.status, func.count()).group_by(AgentRun.status)
|
|
299
|
+
)
|
|
300
|
+
).all()
|
|
301
|
+
by_status = {s: 0 for s in RUN_STATUSES}
|
|
302
|
+
by_status.update({status: n for status, n in rows})
|
|
303
|
+
oldest_queued = (
|
|
304
|
+
await db.execute(
|
|
305
|
+
select(func.min(AgentRun.created_at)).where(AgentRun.status == "queued")
|
|
306
|
+
)
|
|
307
|
+
).scalar_one_or_none()
|
|
308
|
+
age = (utcnow() - oldest_queued).total_seconds() if oldest_queued else 0.0
|
|
309
|
+
return {
|
|
310
|
+
"by_status": by_status,
|
|
311
|
+
"queue_depth": by_status.get("queued", 0),
|
|
312
|
+
"running": by_status.get("leased", 0) + by_status.get("running", 0),
|
|
313
|
+
"dead": by_status.get("dead", 0),
|
|
314
|
+
"oldest_queued_age_seconds": round(age, 1),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@router.get("/v1/stats", dependencies=[Depends(require_auth)])
|
|
319
|
+
async def stats(db: AsyncSession = Depends(get_session)) -> dict:
|
|
320
|
+
return await _fleet_stats(db)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
@router.get("/metrics", response_class=PlainTextResponse)
|
|
324
|
+
async def metrics(request: Request) -> str:
|
|
325
|
+
"""Prometheus text exposition. Unauthenticated by convention so a scraper
|
|
326
|
+
can reach it; put it behind your network policy / reverse proxy."""
|
|
327
|
+
factory = request.app.state.session_factory
|
|
328
|
+
async with factory() as db:
|
|
329
|
+
s = await _fleet_stats(db)
|
|
330
|
+
lines = [
|
|
331
|
+
"# HELP avatar_runs Total runs by status.",
|
|
332
|
+
"# TYPE avatar_runs gauge",
|
|
333
|
+
]
|
|
334
|
+
for status, n in s["by_status"].items():
|
|
335
|
+
lines.append(f'avatar_runs{{status="{status}"}} {n}')
|
|
336
|
+
lines += [
|
|
337
|
+
"# HELP avatar_queue_depth Runs currently queued.",
|
|
338
|
+
"# TYPE avatar_queue_depth gauge",
|
|
339
|
+
f"avatar_queue_depth {s['queue_depth']}",
|
|
340
|
+
"# HELP avatar_runs_running Runs currently leased/running.",
|
|
341
|
+
"# TYPE avatar_runs_running gauge",
|
|
342
|
+
f"avatar_runs_running {s['running']}",
|
|
343
|
+
"# HELP avatar_runs_dead Dead-lettered (poison) runs.",
|
|
344
|
+
"# TYPE avatar_runs_dead gauge",
|
|
345
|
+
f"avatar_runs_dead {s['dead']}",
|
|
346
|
+
"# HELP avatar_oldest_queued_age_seconds Age of the oldest queued run.",
|
|
347
|
+
"# TYPE avatar_oldest_queued_age_seconds gauge",
|
|
348
|
+
f"avatar_oldest_queued_age_seconds {s['oldest_queued_age_seconds']}",
|
|
349
|
+
]
|
|
350
|
+
return "\n".join(lines) + "\n"
|