avatar-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avatar/__init__.py ADDED
@@ -0,0 +1,45 @@
1
+ # Copyright 2026 Avatar Runtime Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Avatar — a durable execution engine for AI agents.
5
+
6
+ "Temporal for AI agents." A crash-safe, append-only, replayable state machine
7
+ for an LLM agent's ``plan → tool → observe → commit`` loop, backed entirely by
8
+ Postgres. A worker can die at any point; another resumes from the ledger and no
9
+ tool side effect is dispatched twice from Avatar's side.
10
+
11
+ The developer-facing surface lives here::
12
+
13
+ from avatar import Avatar, agent, tool, Plan, State, ToolCall
14
+
15
+ See ``avatar.sdk`` for the client and decorators, ``avatar.engine`` for the
16
+ durable core, and ``avatar.api`` for the control API.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ __version__ = "0.1.0"
22
+
23
+ # Re-export the full documented SDK surface so the README/sdk examples
24
+ # (``from avatar import Avatar, tool, Plan, ToolCall``) work off the top-level
25
+ # package, not just ``avatar.sdk``.
26
+ from avatar.sdk import ( # noqa: E402
27
+ Avatar,
28
+ Plan,
29
+ State,
30
+ ToolCall,
31
+ agent,
32
+ current_idempotency_key,
33
+ tool,
34
+ )
35
+
36
+ __all__ = [
37
+ "Avatar",
38
+ "agent",
39
+ "tool",
40
+ "Plan",
41
+ "State",
42
+ "ToolCall",
43
+ "current_idempotency_key",
44
+ "__version__",
45
+ ]
avatar/api/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ # Copyright 2026 Avatar Runtime Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Avatar control API (FastAPI) — single-key auth + SSE."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from avatar.api.app import create_app
9
+
10
+ __all__ = ["create_app"]
avatar/api/app.py ADDED
@@ -0,0 +1,106 @@
1
+ # Copyright 2026 Avatar Runtime Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """FastAPI application factory: lifespan (DB), single-key auth, routes, dashboard."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import contextlib
9
+ import hmac
10
+ from pathlib import Path
11
+
12
+ from fastapi import FastAPI, Header, HTTPException, Request
13
+ from fastapi.responses import HTMLResponse
14
+ from fastapi.staticfiles import StaticFiles
15
+ from sqlalchemy.ext.asyncio import AsyncSession
16
+
17
+ from avatar.api.ratelimit import TokenBucket
18
+ from avatar.config import Settings, check_startup_safety, load_settings
19
+ from avatar.engine.db import create_engine, create_session_factory, init_db
20
+
21
+ # Ships inside the package (avatar/dashboard/) so it is present on a `pip
22
+ # install` as well as in the Docker image — not just in a source checkout.
23
+ _DASHBOARD_DIR = Path(__file__).resolve().parent.parent / "dashboard"
24
+
25
+
26
+ def create_app(settings: Settings | None = None) -> FastAPI:
27
+ settings = settings or load_settings()
28
+ # Fail fast on an insecure production configuration (default API key, etc.).
29
+ check_startup_safety(settings)
30
+
31
+ @contextlib.asynccontextmanager
32
+ async def lifespan(app: FastAPI):
33
+ engine = create_engine(settings.database_url, settings)
34
+ if settings.is_sqlite:
35
+ await init_db(engine)
36
+ app.state.engine = engine
37
+ app.state.session_factory = create_session_factory(engine)
38
+ app.state.settings = settings
39
+ app.state.rate_limiter = TokenBucket(
40
+ settings.rate_limit_per_second, settings.rate_limit_burst
41
+ )
42
+ # Load developer agents/tools if configured (harmless if none).
43
+ with contextlib.suppress(Exception):
44
+ from avatar.engine.registry import load_app
45
+
46
+ load_app()
47
+ yield
48
+ await engine.dispose()
49
+
50
+ app = FastAPI(title="Avatar", version="0.1.0", lifespan=lifespan)
51
+
52
+ from avatar.api.routes import router
53
+
54
+ app.include_router(router)
55
+
56
+ # Marketing landing at `/` (static), the developer dashboard at `/app`.
57
+ if _DASHBOARD_DIR.exists():
58
+ @app.get("/", response_class=HTMLResponse)
59
+ async def landing() -> str:
60
+ landing_file = _DASHBOARD_DIR / "landing.html"
61
+ if landing_file.exists():
62
+ return landing_file.read_text()
63
+ return _dashboard_html(settings)
64
+
65
+ @app.get("/app", response_class=HTMLResponse)
66
+ async def dashboard_index() -> str:
67
+ return _dashboard_html(settings)
68
+
69
+ app.mount(
70
+ "/static", StaticFiles(directory=str(_DASHBOARD_DIR)), name="static"
71
+ )
72
+
73
+ return app
74
+
75
+
76
+ def _dashboard_html(settings: Settings) -> str:
77
+ """Render the dashboard. The static API key is injected ONLY in dev mode;
78
+ in production the page ships with no key and prompts the operator for one
79
+ (kept in localStorage), so the key is never embedded in served HTML."""
80
+ html = (_DASHBOARD_DIR / "index.html").read_text()
81
+ injected = settings.api_key if settings.dev_mode else ""
82
+ return html.replace("__AVATAR_API_KEY__", injected)
83
+
84
+
85
+ # --- shared dependencies -----------------------------------------------------
86
+
87
+
88
+ async def require_auth(request: Request, authorization: str = Header(default="")) -> None:
89
+ """Single static API key. ``Authorization: Bearer <key>``. Nothing else.
90
+
91
+ Uses a constant-time comparison to avoid leaking the key via timing.
92
+ """
93
+ settings: Settings = request.app.state.settings
94
+ expected = f"Bearer {settings.api_key}"
95
+ if not hmac.compare_digest(authorization, expected):
96
+ raise HTTPException(status_code=401, detail="invalid or missing API key")
97
+
98
+
99
+ async def get_session(request: Request) -> AsyncSession:
100
+ factory = request.app.state.session_factory
101
+ async with factory() as session:
102
+ yield session
103
+
104
+
105
+ # Re-export for routes module convenience.
106
+ __all__ = ["create_app", "require_auth", "get_session"]
@@ -0,0 +1,36 @@
1
+ # Copyright 2026 Avatar Runtime Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """A tiny in-process token-bucket rate limiter for the control API.
5
+
6
+ Single static key ⇒ a single global bucket is sufficient (per-key == global).
7
+ This guards the write path (enqueue) against a client flooding the queue. It is
8
+ intentionally process-local: with multiple API replicas, set the limit per
9
+ replica or front the API with a gateway. For per-tenant limits, see the
10
+ Avatar Cloud roadmap.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import time
16
+
17
+
18
+ class TokenBucket:
19
+ def __init__(self, rate_per_second: float, burst: int) -> None:
20
+ self.rate = max(0.0, rate_per_second)
21
+ self.capacity = max(1, burst)
22
+ self.tokens = float(self.capacity)
23
+ self.updated = time.monotonic()
24
+
25
+ def allow(self, cost: float = 1.0) -> bool:
26
+ """Consume ``cost`` tokens if available. Returns False when throttled.
27
+ A non-positive rate disables limiting (always allow)."""
28
+ if self.rate <= 0:
29
+ return True
30
+ now = time.monotonic()
31
+ self.tokens = min(self.capacity, self.tokens + (now - self.updated) * self.rate)
32
+ self.updated = now
33
+ if self.tokens >= cost:
34
+ self.tokens -= cost
35
+ return True
36
+ return False
avatar/api/routes.py ADDED
@@ -0,0 +1,350 @@
1
+ # Copyright 2026 Avatar Runtime Authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Control API endpoints (§ control surface).
5
+
6
+ All ``/v1`` routes require the single static API key. SSE streams the ledger as
7
+ steps commit. The dashboard and SDK are both pure clients of these routes.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import json
14
+ from typing import Any
15
+
16
+ from fastapi import APIRouter, Depends, HTTPException, Request
17
+ from fastapi.responses import PlainTextResponse, StreamingResponse
18
+ from pydantic import BaseModel, Field
19
+ from sqlalchemy import func, select
20
+ from sqlalchemy.ext.asyncio import AsyncSession
21
+
22
+ from avatar.api.app import get_session, require_auth
23
+ from avatar.engine.models import (
24
+ RUN_STATUSES,
25
+ TERMINAL_STATUSES,
26
+ AgentRun,
27
+ AgentRunStep,
28
+ Approval,
29
+ utcnow,
30
+ )
31
+ from avatar.engine.replay import fork_run
32
+
33
+ router = APIRouter()
34
+
35
+
36
+ async def rate_limit(request: Request) -> None:
37
+ """Throttle the write path so a client cannot flood the queue."""
38
+ limiter = getattr(request.app.state, "rate_limiter", None)
39
+ if limiter is not None and not limiter.allow():
40
+ raise HTTPException(
41
+ status_code=429,
42
+ detail="rate limit exceeded",
43
+ headers={"Retry-After": "1"},
44
+ )
45
+
46
+
47
+ # --- request/response models -------------------------------------------------
48
+
49
+
50
+ class CreateRun(BaseModel):
51
+ agent_ref: str
52
+ input: dict[str, Any] = Field(default_factory=dict)
53
+ budget_cap_cents: int | None = None
54
+ idempotency_key: str | None = None
55
+
56
+
57
+ class ReplayReq(BaseModel):
58
+ from_seq: int
59
+
60
+
61
+ def _run_summary(r: AgentRun) -> dict:
62
+ return {
63
+ "id": r.id,
64
+ "agent_ref": r.agent_ref,
65
+ "status": r.status,
66
+ "attempt": r.attempt,
67
+ "cursor_seq": r.cursor_seq,
68
+ "budget_cap_cents": r.budget_cap_cents,
69
+ "budget_used_cents": r.budget_used_cents,
70
+ "error_class": r.error_class,
71
+ "output": r.output,
72
+ "forked_from": r.forked_from,
73
+ "fork_seq": r.fork_seq,
74
+ "created_at": r.created_at.isoformat() if r.created_at else None,
75
+ "updated_at": r.updated_at.isoformat() if r.updated_at else None,
76
+ }
77
+
78
+
79
+ def _step_dict(s: AgentRunStep) -> dict:
80
+ return {
81
+ "seq": s.seq,
82
+ "type": s.type,
83
+ "payload": s.payload,
84
+ "tool_call_id": s.tool_call_id,
85
+ "idempotency_key": s.idempotency_key,
86
+ "cost_cents": s.cost_cents,
87
+ "worker_id": s.worker_id,
88
+ "attempt": s.attempt,
89
+ "created_at": s.created_at.isoformat() if s.created_at else None,
90
+ }
91
+
92
+
93
+ async def _get_run(db: AsyncSession, run_id: str) -> AgentRun:
94
+ run = (
95
+ await db.execute(select(AgentRun).where(AgentRun.id == run_id))
96
+ ).scalar_one_or_none()
97
+ if run is None:
98
+ raise HTTPException(status_code=404, detail="run not found")
99
+ return run
100
+
101
+
102
+ # --- endpoints ---------------------------------------------------------------
103
+
104
+
105
+ @router.post(
106
+ "/v1/runs",
107
+ status_code=202,
108
+ dependencies=[Depends(require_auth), Depends(rate_limit)],
109
+ )
110
+ async def create_run(
111
+ body: CreateRun, request: Request, db: AsyncSession = Depends(get_session)
112
+ ) -> dict:
113
+ if body.idempotency_key:
114
+ existing = (
115
+ await db.execute(
116
+ select(AgentRun).where(AgentRun.idempotency_key == body.idempotency_key)
117
+ )
118
+ ).scalar_one_or_none()
119
+ if existing is not None:
120
+ return {"id": existing.id, "status": existing.status}
121
+ # Backpressure: refuse new work when the queue is already saturated.
122
+ cap = request.app.state.settings.max_queue_depth
123
+ if cap > 0:
124
+ queued = (
125
+ await db.execute(
126
+ select(func.count())
127
+ .select_from(AgentRun)
128
+ .where(AgentRun.status == "queued")
129
+ )
130
+ ).scalar_one()
131
+ if queued >= cap:
132
+ raise HTTPException(
133
+ status_code=429,
134
+ detail=f"queue is full ({queued} queued, cap {cap})",
135
+ headers={"Retry-After": "5"},
136
+ )
137
+ run = AgentRun(
138
+ agent_ref=body.agent_ref,
139
+ input=body.input,
140
+ budget_cap_cents=body.budget_cap_cents,
141
+ idempotency_key=body.idempotency_key,
142
+ status="queued",
143
+ )
144
+ db.add(run)
145
+ await db.commit()
146
+ return {"id": run.id, "status": run.status}
147
+
148
+
149
+ @router.get("/v1/runs", dependencies=[Depends(require_auth)])
150
+ async def list_runs(
151
+ status: str | None = None,
152
+ limit: int = 50,
153
+ db: AsyncSession = Depends(get_session),
154
+ ) -> dict:
155
+ q = select(AgentRun).order_by(AgentRun.created_at.desc()).limit(min(limit, 200))
156
+ if status:
157
+ q = q.where(AgentRun.status == status)
158
+ rows = (await db.execute(q)).scalars().all()
159
+ return {"runs": [_run_summary(r) for r in rows]}
160
+
161
+
162
+ @router.get("/v1/runs/{run_id}", dependencies=[Depends(require_auth)])
163
+ async def get_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
164
+ return _run_summary(await _get_run(db, run_id))
165
+
166
+
167
+ @router.get("/v1/runs/{run_id}/steps", dependencies=[Depends(require_auth)])
168
+ async def get_steps(run_id: str, db: AsyncSession = Depends(get_session)) -> list[dict]:
169
+ await _get_run(db, run_id)
170
+ rows = (
171
+ await db.execute(
172
+ select(AgentRunStep)
173
+ .where(AgentRunStep.run_id == run_id)
174
+ .order_by(AgentRunStep.seq)
175
+ )
176
+ ).scalars().all()
177
+ return [_step_dict(s) for s in rows]
178
+
179
+
180
+ @router.post("/v1/runs/{run_id}/cancel", dependencies=[Depends(require_auth)])
181
+ async def cancel_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
182
+ run = await _get_run(db, run_id)
183
+ if run.status in TERMINAL_STATUSES:
184
+ return _run_summary(run)
185
+ run.cancel_requested = True
186
+ # If it never started, cancel immediately.
187
+ if run.status == "queued":
188
+ run.status = "failed"
189
+ run.error_class = "cancelled"
190
+ run.output = {"error": "cancelled before start"}
191
+ await db.commit()
192
+ return _run_summary(run)
193
+
194
+
195
+ @router.post("/v1/runs/{run_id}/approve", dependencies=[Depends(require_auth)])
196
+ async def approve_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
197
+ return await _resolve_approval(db, run_id, "approved")
198
+
199
+
200
+ @router.post("/v1/runs/{run_id}/reject", dependencies=[Depends(require_auth)])
201
+ async def reject_run(run_id: str, db: AsyncSession = Depends(get_session)) -> dict:
202
+ return await _resolve_approval(db, run_id, "rejected")
203
+
204
+
205
+ async def _resolve_approval(db: AsyncSession, run_id: str, decision: str) -> dict:
206
+ run = await _get_run(db, run_id)
207
+ if run.status != "approval_wait":
208
+ raise HTTPException(status_code=409, detail="run is not awaiting approval")
209
+ appr = (
210
+ await db.execute(
211
+ select(Approval)
212
+ .where(Approval.run_id == run_id, Approval.status == "pending")
213
+ .order_by(Approval.created_at.desc())
214
+ )
215
+ ).scalars().first()
216
+ if appr is None:
217
+ raise HTTPException(status_code=409, detail="no pending approval")
218
+ appr.status = decision
219
+ appr.decided_at = utcnow()
220
+ # Re-queue so a worker resumes and either dispatches or records the rejection.
221
+ run.status = "queued"
222
+ run.lease_owner = None
223
+ run.lease_expires_at = None
224
+ await db.commit()
225
+ return _run_summary(run)
226
+
227
+
228
+ @router.post("/v1/runs/{run_id}/replay", dependencies=[Depends(require_auth)])
229
+ async def replay_run(
230
+ run_id: str, body: ReplayReq, db: AsyncSession = Depends(get_session)
231
+ ) -> dict:
232
+ source = await _get_run(db, run_id)
233
+ new_run = await fork_run(db, source, body.from_seq)
234
+ return {"id": new_run.id, "status": new_run.status,
235
+ "forked_from": run_id, "fork_seq": body.from_seq}
236
+
237
+
238
+ @router.get("/v1/runs/{run_id}/stream", dependencies=[Depends(require_auth)])
239
+ async def stream_run(run_id: str, request: Request) -> StreamingResponse:
240
+ factory = request.app.state.session_factory
241
+
242
+ async def event_gen():
243
+ last_seq = 0
244
+ # Replay existing steps first, then tail new ones.
245
+ while True:
246
+ if await request.is_disconnected():
247
+ return
248
+ async with factory() as db:
249
+ run = (
250
+ await db.execute(select(AgentRun).where(AgentRun.id == run_id))
251
+ ).scalar_one_or_none()
252
+ if run is None:
253
+ yield _sse({"event": "error", "detail": "run not found"})
254
+ return
255
+ steps = (
256
+ await db.execute(
257
+ select(AgentRunStep)
258
+ .where(AgentRunStep.run_id == run_id, AgentRunStep.seq > last_seq)
259
+ .order_by(AgentRunStep.seq)
260
+ )
261
+ ).scalars().all()
262
+ for s in steps:
263
+ last_seq = s.seq
264
+ yield _sse({"event": "step", **_step_dict(s)})
265
+ if run.status in TERMINAL_STATUSES or run.status == "approval_wait":
266
+ yield _sse({"event": "status", "status": run.status,
267
+ "output": run.output})
268
+ return
269
+ await asyncio.sleep(0.3)
270
+
271
+ return StreamingResponse(event_gen(), media_type="text/event-stream")
272
+
273
+
274
+ def _sse(obj: dict) -> str:
275
+ return f"data: {json.dumps(obj, default=str)}\n\n"
276
+
277
+
278
+ @router.get("/healthz")
279
+ async def healthz() -> dict:
280
+ return {"ok": True}
281
+
282
+
283
+ @router.get("/readyz")
284
+ async def readyz(request: Request) -> dict:
285
+ factory = request.app.state.session_factory
286
+ try:
287
+ async with factory() as db:
288
+ await db.execute(select(AgentRun.id).limit(1))
289
+ return {"ready": True}
290
+ except Exception as exc: # noqa: BLE001
291
+ raise HTTPException(status_code=503, detail=f"db not ready: {exc}") from exc
292
+
293
+
294
+ async def _fleet_stats(db: AsyncSession) -> dict:
295
+ """Operational snapshot: runs by status, dead count, oldest-queued age."""
296
+ rows = (
297
+ await db.execute(
298
+ select(AgentRun.status, func.count()).group_by(AgentRun.status)
299
+ )
300
+ ).all()
301
+ by_status = {s: 0 for s in RUN_STATUSES}
302
+ by_status.update({status: n for status, n in rows})
303
+ oldest_queued = (
304
+ await db.execute(
305
+ select(func.min(AgentRun.created_at)).where(AgentRun.status == "queued")
306
+ )
307
+ ).scalar_one_or_none()
308
+ age = (utcnow() - oldest_queued).total_seconds() if oldest_queued else 0.0
309
+ return {
310
+ "by_status": by_status,
311
+ "queue_depth": by_status.get("queued", 0),
312
+ "running": by_status.get("leased", 0) + by_status.get("running", 0),
313
+ "dead": by_status.get("dead", 0),
314
+ "oldest_queued_age_seconds": round(age, 1),
315
+ }
316
+
317
+
318
+ @router.get("/v1/stats", dependencies=[Depends(require_auth)])
319
+ async def stats(db: AsyncSession = Depends(get_session)) -> dict:
320
+ return await _fleet_stats(db)
321
+
322
+
323
+ @router.get("/metrics", response_class=PlainTextResponse)
324
+ async def metrics(request: Request) -> str:
325
+ """Prometheus text exposition. Unauthenticated by convention so a scraper
326
+ can reach it; put it behind your network policy / reverse proxy."""
327
+ factory = request.app.state.session_factory
328
+ async with factory() as db:
329
+ s = await _fleet_stats(db)
330
+ lines = [
331
+ "# HELP avatar_runs Total runs by status.",
332
+ "# TYPE avatar_runs gauge",
333
+ ]
334
+ for status, n in s["by_status"].items():
335
+ lines.append(f'avatar_runs{{status="{status}"}} {n}')
336
+ lines += [
337
+ "# HELP avatar_queue_depth Runs currently queued.",
338
+ "# TYPE avatar_queue_depth gauge",
339
+ f"avatar_queue_depth {s['queue_depth']}",
340
+ "# HELP avatar_runs_running Runs currently leased/running.",
341
+ "# TYPE avatar_runs_running gauge",
342
+ f"avatar_runs_running {s['running']}",
343
+ "# HELP avatar_runs_dead Dead-lettered (poison) runs.",
344
+ "# TYPE avatar_runs_dead gauge",
345
+ f"avatar_runs_dead {s['dead']}",
346
+ "# HELP avatar_oldest_queued_age_seconds Age of the oldest queued run.",
347
+ "# TYPE avatar_oldest_queued_age_seconds gauge",
348
+ f"avatar_oldest_queued_age_seconds {s['oldest_queued_age_seconds']}",
349
+ ]
350
+ return "\n".join(lines) + "\n"