generic-ml-cache-daemon 0.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. generic_ml_cache_daemon-0.13.0/.gitignore +18 -0
  2. generic_ml_cache_daemon-0.13.0/PKG-INFO +36 -0
  3. generic_ml_cache_daemon-0.13.0/README.md +165 -0
  4. generic_ml_cache_daemon-0.13.0/pyproject.toml +64 -0
  5. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/__init__.py +3 -0
  6. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/__main__.py +27 -0
  7. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/app.py +63 -0
  8. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/jobs.py +86 -0
  9. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/metrics.py +17 -0
  10. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/__init__.py +2 -0
  11. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/execution.py +59 -0
  12. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/gateway.py +39 -0
  13. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/health.py +25 -0
  14. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/job.py +28 -0
  15. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/run.py +27 -0
  16. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/session.py +43 -0
  17. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/py.typed +0 -0
  18. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/__init__.py +2 -0
  19. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/executions.py +102 -0
  20. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/gateway.py +119 -0
  21. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/health.py +75 -0
  22. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/jobs.py +91 -0
  23. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/run.py +114 -0
  24. generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/sessions.py +122 -0
  25. generic_ml_cache_daemon-0.13.0/tests/conftest.py +19 -0
  26. generic_ml_cache_daemon-0.13.0/tests/test_app.py +46 -0
  27. generic_ml_cache_daemon-0.13.0/tests/test_executions.py +100 -0
  28. generic_ml_cache_daemon-0.13.0/tests/test_gateway.py +159 -0
  29. generic_ml_cache_daemon-0.13.0/tests/test_health.py +135 -0
  30. generic_ml_cache_daemon-0.13.0/tests/test_jobs.py +190 -0
  31. generic_ml_cache_daemon-0.13.0/tests/test_main.py +55 -0
  32. generic_ml_cache_daemon-0.13.0/tests/test_metrics.py +18 -0
  33. generic_ml_cache_daemon-0.13.0/tests/test_run.py +146 -0
  34. generic_ml_cache_daemon-0.13.0/tests/test_sessions.py +216 -0
@@ -0,0 +1,18 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ dist/
5
+ build/
6
+ *.egg-info/
7
+ .coverage
8
+ coverage.xml
9
+ .pytest_cache/
10
+ .ruff_cache/
11
+
12
+ # editors / IDEs
13
+ .idea/
14
+ .vscode/
15
+
16
+ # machine-local repo tooling (not project content)
17
+ secret-audit.sh
18
+ setup-repo.sh
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: generic-ml-cache-daemon
3
+ Version: 0.13.0
4
+ Summary: Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core.
5
+ Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
6
+ Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
7
+ Project-URL: Issues, https://github.com/danielslobozian/generic-ml-cache/issues
8
+ Project-URL: Changelog, https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md
9
+ Author: Daniel Slobozian
10
+ License-Expression: Apache-2.0
11
+ Keywords: ai,cache,daemon,fastapi,gateway,http,llm,proxy
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: fastapi>=0.115
25
+ Requires-Dist: generic-ml-cache-core>=0.12.0
26
+ Requires-Dist: sse-starlette>=2.0
27
+ Requires-Dist: uvicorn>=0.30
28
+ Provides-Extra: dev
29
+ Requires-Dist: coverage>=7; extra == 'dev'
30
+ Requires-Dist: httpx>=0.27; extra == 'dev'
31
+ Requires-Dist: prometheus-client>=0.20; extra == 'dev'
32
+ Requires-Dist: pytest-cov; extra == 'dev'
33
+ Requires-Dist: pytest>=7; extra == 'dev'
34
+ Requires-Dist: ruff>=0.15; extra == 'dev'
35
+ Provides-Extra: metrics
36
+ Requires-Dist: prometheus-client>=0.20; extra == 'metrics'
@@ -0,0 +1,165 @@
1
+ # generic-ml-cache-daemon
2
+
3
+ The HTTP daemon for [generic-ml-cache](https://github.com/danielslobozian/generic-ml-cache).
4
+ Exposes the cache store and all session/execution functionality as a local REST API with
5
+ server-sent event (SSE) streaming, plus a caching proxy gateway for the Anthropic Messages API.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install generic-ml-cache-daemon # runtime only
11
+ pip install "generic-ml-cache-daemon[metrics]" # + Prometheus /metrics endpoint
12
+ ```
13
+
14
+ ## Starting the daemon
15
+
16
+ ### Via the CLI (recommended)
17
+
18
+ ```bash
19
+ gmlcache daemon start # foreground, default 127.0.0.1:8765
20
+ gmlcache daemon start --port 9000 # custom port
21
+ gmlcache daemon start --metrics # enable /metrics endpoint
22
+ gmlcache daemon start --session abc # bind to session "abc"
23
+ ```
24
+
25
+ Check status or stop:
26
+
27
+ ```bash
28
+ gmlcache daemon status
29
+ gmlcache daemon stop
30
+ ```
31
+
32
+ ### Direct launch
33
+
34
+ ```bash
35
+ python -m generic_ml_cache_daemon # uses defaults
36
+ GMLCACHE_STORE=/path/to/store python -m generic_ml_cache_daemon
37
+ GMLCACHE_SESSION=abc GMLCACHE_METRICS=1 python -m generic_ml_cache_daemon
38
+ ```
39
+
40
+ Environment variables:
41
+
42
+ | Variable | Default | Description |
43
+ |---|---|---|
44
+ | `GMLCACHE_STORE` | config store path | Path to the cache store directory |
45
+ | `GMLCACHE_SESSION` | *(none)* | Bind all intercepted calls to a session |
46
+ | `GMLCACHE_METRICS` | `0` | Set `1` to enable the Prometheus `/metrics` endpoint |
47
+
48
+ ## HTTP API
49
+
50
+ The daemon listens on `http://127.0.0.1:8765` by default.
51
+ Interactive API docs are available at `/docs` (Swagger UI) and `/redoc`.
52
+
53
+ ### Observability
54
+
55
+ | Method | Path | Description |
56
+ |---|---|---|
57
+ | `GET` | `/health` | Liveness: `{"status":"ok"}` |
58
+ | `GET` | `/ready` | Readiness: probes the store; 503 if inaccessible |
59
+ | `GET` | `/info` | Version, store path, adapters, bound session |
60
+ | `GET` | `/metrics` | Prometheus text (requires `[metrics]` extra + `--metrics`) |
61
+
62
+ ### Sessions
63
+
64
+ | Method | Path | Description |
65
+ |---|---|---|
66
+ | `GET` | `/sessions` | List all session IDs |
67
+ | `POST` | `/sessions` | Create a session (body: `{tags, spec}`) |
68
+ | `GET` | `/sessions/{id}` | Get session tags and spec (404 if unknown) |
69
+ | `GET` | `/sessions/{id}/stats` | Calls, hits, hit rate |
70
+ | `PUT` | `/sessions/{id}/spec` | Set or replace execution spec |
71
+ | `DELETE` | `/sessions/{id}/spec` | Remove execution spec |
72
+ | `POST` | `/sessions/{id}/tags` | Add a tag |
73
+ | `DELETE` | `/sessions/{id}/tags/{tag}` | Remove a tag |
74
+
75
+ ### Executions & Global Stats
76
+
77
+ | Method | Path | Description |
78
+ |---|---|---|
79
+ | `GET` | `/executions` | List all current (servable) executions |
80
+ | `GET` | `/executions/{key}` | Inspect by exact key or prefix (409 on ambiguous prefix) |
81
+ | `GET` | `/stats` | Global execution count + event counts |
82
+ | `POST` | `/purge` | Purge by scope: `all`, `key`, `tag`, `session`, `session_tag` |
83
+
84
+ **Purge body examples:**
85
+
86
+ ```json
87
+ {"by": "all"}
88
+ {"by": "key", "target": "deadbeef"}
89
+ {"by": "session", "target": "abc123"}
90
+ ```
91
+
92
+ ### Run (synchronous or SSE)
93
+
94
+ ```
95
+ POST /run
96
+ {
97
+ "client": "anthropic",
98
+ "model": "claude-opus-4-8",
99
+ "prompt": "Summarise the paper.",
100
+ "effort": "medium",
101
+ "session_id": "abc"
102
+ }
103
+ ```
104
+
105
+ - `Accept: application/json` (default) — blocks and returns `{execution_key, state, cache_hit, stdout, stderr}`
106
+ - `Accept: text/event-stream` — SSE: `{"type":"accepted"}` immediately, then `{"type":"complete", ...}` on finish
107
+
108
+ ### Jobs (detached / async)
109
+
110
+ | Method | Path | Description |
111
+ |---|---|---|
112
+ | `POST` | `/jobs` | Submit a background execution; returns `{job_id, state}` with 202 |
113
+ | `GET` | `/jobs` | List all job IDs |
114
+ | `GET` | `/jobs/{id}` | Poll state: `pending`, `running`, `done`, `error` |
115
+ | `GET` | `/jobs/{id}/stream` | SSE: periodic `status` events, then `complete` or `error` |
116
+
117
+ ### Claude Gateway
118
+
119
+ ```
120
+ POST /gateway/claude/v1/messages
121
+ ```
122
+
123
+ A cache-transparent proxy for the Anthropic Messages API. Requests that hit the
124
+ cache are returned without a network call to Anthropic. The response shape matches
125
+ the Anthropic Messages API exactly, with one extra field: `x_cache_hit: bool`.
126
+
127
+ **Limitations (0.13.0):** single-turn conversations only (one `role: user` message,
128
+ no prior assistant turns). Multi-turn support is planned.
129
+
130
+ **Example:**
131
+
132
+ ```bash
133
+ curl http://127.0.0.1:8765/gateway/claude/v1/messages \
134
+ -H "Content-Type: application/json" \
135
+ -d '{
136
+ "model": "claude-opus-4-8",
137
+ "messages": [{"role": "user", "content": "Hello, world!"}],
138
+ "max_tokens": 256
139
+ }'
140
+ ```
141
+
142
+ Point any Anthropic SDK client at the gateway by overriding the base URL:
143
+
144
+ ```python
145
+ import anthropic
146
+
147
+ client = anthropic.Anthropic(
148
+ api_key="...",
149
+ base_url="http://127.0.0.1:8765/gateway/claude",
150
+ )
151
+ ```
152
+
153
+ ## Architecture
154
+
155
+ The daemon is a thin FastAPI layer over the `generic-ml-cache-core` hexagonal
156
+ architecture. It does not own any state — all persistence goes through the
157
+ existing `JournalMetrics` (SQLite registry) and `SqliteExecutionRepository`
158
+ that the core library manages.
159
+
160
+ Background jobs run in a `ThreadPoolExecutor` inside an in-process
161
+ `JobRegistry`; job state is not persisted across daemon restarts.
162
+
163
+ ## License
164
+
165
+ Apache-2.0
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.18"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "generic-ml-cache-daemon"
7
+ version = "0.13.0"
8
+ description = "Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core."
9
+ requires-python = ">=3.9"
10
+ license = "Apache-2.0"
11
+ license-files = ["LICENSE", "NOTICE"]
12
+ authors = [{ name = "Daniel Slobozian" }]
13
+ keywords = ["cache", "llm", "ai", "daemon", "http", "fastapi", "gateway", "proxy"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: Apache Software License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Utilities",
26
+ ]
27
+ dependencies = [
28
+ "generic-ml-cache-core>=0.12.0",
29
+ "fastapi>=0.115",
30
+ "uvicorn>=0.30",
31
+ "sse-starlette>=2.0",
32
+ ]
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/danielslobozian/generic-ml-cache"
36
+ Repository = "https://github.com/danielslobozian/generic-ml-cache"
37
+ Issues = "https://github.com/danielslobozian/generic-ml-cache/issues"
38
+ Changelog = "https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md"
39
+
40
+ [project.optional-dependencies]
41
+ # Optional Prometheus /metrics endpoint. Off by default.
42
+ metrics = ["prometheus-client>=0.20"]
43
+ dev = [
44
+ "pytest>=7",
45
+ "pytest-cov",
46
+ "coverage>=7",
47
+ "ruff>=0.15",
48
+ "httpx>=0.27",
49
+ "prometheus-client>=0.20",
50
+ ]
51
+
52
+ [tool.hatch.build.targets.wheel]
53
+ packages = ["src/generic_ml_cache_daemon"]
54
+
55
+ [tool.pytest.ini_options]
56
+ testpaths = ["tests"]
57
+ addopts = "-ra"
58
+
59
+ [tool.coverage.run]
60
+ omit = ["*/generic_ml_cache_daemon/__main__.py"]
61
+
62
+ [tool.ruff]
63
+ line-length = 100
64
+ target-version = "py39"
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ __version__ = "0.13.0"
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Entry point: run the daemon via ``python -m generic_ml_cache_daemon``."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+ import uvicorn
11
+
12
+ from generic_ml_cache_daemon.app import create_app
13
+
14
+ _DEFAULT_HOST = "127.0.0.1"
15
+ _DEFAULT_PORT = 8765
16
+
17
+
18
+ def main() -> None:
19
+ store_root = Path(os.environ.get("GMLCACHE_STORE", str(Path.home() / ".gmlcache")))
20
+ session_id = os.environ.get("GMLCACHE_SESSION") or None
21
+ enable_metrics = os.environ.get("GMLCACHE_METRICS", "").lower() in ("1", "true", "yes")
22
+ application = create_app(store_root, session_id=session_id, enable_metrics=enable_metrics)
23
+ uvicorn.run(application, host=_DEFAULT_HOST, port=_DEFAULT_PORT)
24
+
25
+
26
+ if __name__ == "__main__":
27
+ main()
@@ -0,0 +1,63 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """FastAPI application factory for the generic-ml-cache daemon."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from fastapi import FastAPI
11
+
12
+ from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
13
+
14
+ from generic_ml_cache_daemon import __version__
15
+
16
+
17
+ def create_app(
18
+ store_root: Path,
19
+ *,
20
+ session_id: Optional[str] = None,
21
+ enable_metrics: bool = False,
22
+ ) -> FastAPI:
23
+ """Create and configure the daemon FastAPI application.
24
+
25
+ Args:
26
+ store_root: path to the gmlcache store directory (the injected data source).
27
+ session_id: optional session all intercepted calls are recorded under.
28
+ enable_metrics: expose the Prometheus /metrics endpoint.
29
+
30
+ Returns:
31
+ A fully wired FastAPI application. Routes are mounted by this function;
32
+ callers should not mount additional routes after construction.
33
+ """
34
+ application = FastAPI(
35
+ title="generic-ml-cache daemon",
36
+ version=__version__,
37
+ docs_url="/docs",
38
+ redoc_url="/redoc",
39
+ )
40
+
41
+ wired_use_cases = build_use_cases(store_root)
42
+ application.state.wired = wired_use_cases
43
+ application.state.store_root = store_root
44
+ application.state.session_id = session_id
45
+ application.state.enable_metrics = enable_metrics
46
+
47
+ from generic_ml_cache_daemon.jobs import JobRegistry
48
+ from generic_ml_cache_daemon.routes.executions import router as executions_router
49
+ from generic_ml_cache_daemon.routes.gateway import router as gateway_router
50
+ from generic_ml_cache_daemon.routes.health import router as health_router
51
+ from generic_ml_cache_daemon.routes.jobs import router as jobs_router
52
+ from generic_ml_cache_daemon.routes.run import router as run_router
53
+ from generic_ml_cache_daemon.routes.sessions import router as sessions_router
54
+
55
+ application.state.job_registry = JobRegistry()
56
+ application.include_router(health_router)
57
+ application.include_router(sessions_router)
58
+ application.include_router(executions_router)
59
+ application.include_router(run_router)
60
+ application.include_router(jobs_router)
61
+ application.include_router(gateway_router)
62
+
63
+ return application
@@ -0,0 +1,86 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """In-process job registry for detached background executions.
4
+
5
+ Each POST /jobs submission gets a unique job_id. The execution runs in a
6
+ background thread; callers poll GET /jobs/{id} or stream GET /jobs/{id}/stream.
7
+ The registry is in-process memory only — jobs are not persisted across restarts.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import concurrent.futures
13
+ import secrets
14
+ import threading
15
+ from enum import Enum
16
+ from typing import Dict, Optional
17
+
18
+ from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
19
+
20
+
21
+ class JobState(str, Enum):
22
+ PENDING = "pending"
23
+ RUNNING = "running"
24
+ DONE = "done"
25
+ ERROR = "error"
26
+
27
+
28
+ class Job:
29
+ def __init__(self, job_id: str) -> None:
30
+ self.job_id = job_id
31
+ self.state = JobState.PENDING
32
+ self.execution: Optional[MlExecution] = None
33
+ self.error: Optional[str] = None
34
+ self._done_event = threading.Event()
35
+
36
+ def wait(self, timeout: Optional[float] = None) -> bool:
37
+ return self._done_event.wait(timeout=timeout)
38
+
39
+ def mark_running(self) -> None:
40
+ self.state = JobState.RUNNING
41
+
42
+ def mark_done(self, execution: MlExecution) -> None:
43
+ self.execution = execution
44
+ self.state = JobState.DONE
45
+ self._done_event.set()
46
+
47
+ def mark_error(self, error: str) -> None:
48
+ self.error = error
49
+ self.state = JobState.ERROR
50
+ self._done_event.set()
51
+
52
+
53
+ class JobRegistry:
54
+ """Thread-safe in-memory registry of submitted jobs."""
55
+
56
+ def __init__(self) -> None:
57
+ self._jobs: Dict[str, Job] = {}
58
+ self._lock = threading.Lock()
59
+ self._executor = concurrent.futures.ThreadPoolExecutor(
60
+ max_workers=4, thread_name_prefix="gmlc-job"
61
+ )
62
+
63
+ def submit(self, fn, *args) -> Job:
64
+ job_id = secrets.token_hex(8)
65
+ job = Job(job_id)
66
+ with self._lock:
67
+ self._jobs[job_id] = job
68
+
69
+ def _run() -> None:
70
+ job.mark_running()
71
+ try:
72
+ execution = fn(*args)
73
+ job.mark_done(execution)
74
+ except Exception as exc:
75
+ job.mark_error(str(exc))
76
+
77
+ self._executor.submit(_run)
78
+ return job
79
+
80
+ def get(self, job_id: str) -> Optional[Job]:
81
+ with self._lock:
82
+ return self._jobs.get(job_id)
83
+
84
+ def list_ids(self) -> list:
85
+ with self._lock:
86
+ return list(self._jobs.keys())
@@ -0,0 +1,17 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Prometheus metrics setup for the daemon. Requires the optional [metrics] extra."""
4
+
5
+ from __future__ import annotations
6
+
7
+ try:
8
+ import prometheus_client # type: ignore[import-untyped] # noqa: F401
9
+
10
+ _AVAILABLE = True
11
+ except ImportError: # pragma: no cover
12
+ _AVAILABLE = False
13
+
14
+
15
+ def is_prometheus_available() -> bool:
16
+ """Return True when the prometheus-client extra is installed."""
17
+ return _AVAILABLE
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Executions HTTP API and global stats/purge."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, List, Literal, Union
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class ExecutionSummaryResponse(BaseModel):
13
+ execution_key: str
14
+ kind: str
15
+ client: str
16
+ model: str
17
+
18
+
19
+ class ExecutionListResponse(BaseModel):
20
+ executions: List[ExecutionSummaryResponse]
21
+ total: int
22
+
23
+
24
+ class GlobalStatsResponse(BaseModel):
25
+ executions: int
26
+ event_counts: Dict[str, int]
27
+
28
+
29
+ class PurgeByAll(BaseModel):
30
+ by: Literal["all"]
31
+
32
+
33
+ class PurgeByKey(BaseModel):
34
+ by: Literal["key"]
35
+ target: str
36
+
37
+
38
+ class PurgeByTag(BaseModel):
39
+ by: Literal["tag"]
40
+ target: str
41
+
42
+
43
+ class PurgeBySession(BaseModel):
44
+ by: Literal["session"]
45
+ target: str
46
+
47
+
48
+ class PurgeBySessionTag(BaseModel):
49
+ by: Literal["session_tag"]
50
+ target: str
51
+
52
+
53
+ PurgeBody = Union[PurgeByAll, PurgeByKey, PurgeByTag, PurgeBySession, PurgeBySessionTag]
54
+
55
+
56
+ class PurgeResponse(BaseModel):
57
+ executions_removed: int
58
+ bytes_freed: int
59
+ blobs_removed: int
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Claude gateway (/gateway/claude/v1/messages)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class MessageParam(BaseModel):
13
+ role: str
14
+ content: str
15
+
16
+
17
+ class MessagesRequest(BaseModel):
18
+ model: str
19
+ messages: List[MessageParam]
20
+ max_tokens: int = 8192
21
+ system: Optional[str] = None
22
+ session_id: Optional[str] = None
23
+
24
+
25
+ class ContentBlock(BaseModel):
26
+ type: str = "text"
27
+ text: str
28
+
29
+
30
+ class MessagesResponse(BaseModel):
31
+ id: str
32
+ type: str = "message"
33
+ role: str = "assistant"
34
+ content: List[ContentBlock]
35
+ model: str
36
+ stop_reason: str = "end_turn"
37
+ stop_sequence: Optional[str] = None
38
+ usage: Dict[str, Any]
39
+ x_cache_hit: bool = False
@@ -0,0 +1,25 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic response models for /health, /ready, and /info."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class HealthResponse(BaseModel):
13
+ status: str
14
+
15
+
16
+ class ReadyResponse(BaseModel):
17
+ status: str
18
+ detail: Optional[str] = None
19
+
20
+
21
+ class InfoResponse(BaseModel):
22
+ version: str
23
+ store_root: str
24
+ session_id: Optional[str] = None
25
+ adapters: List[str]
@@ -0,0 +1,28 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Jobs HTTP API (detached background executions)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class JobSubmitBody(BaseModel):
13
+ client: str
14
+ model: str
15
+ effort: str = ""
16
+ prompt: str = ""
17
+ context: str = ""
18
+ tags: List[str] = []
19
+ session_id: Optional[str] = None
20
+
21
+
22
+ class JobResponse(BaseModel):
23
+ job_id: str
24
+ state: str
25
+ execution_key: Optional[str] = None
26
+ stdout: Optional[str] = None
27
+ stderr: Optional[str] = None
28
+ error: Optional[str] = None
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the /run endpoint."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class RunBody(BaseModel):
13
+ client: str
14
+ model: str
15
+ effort: str = ""
16
+ prompt: str = ""
17
+ context: str = ""
18
+ tags: List[str] = []
19
+ session_id: Optional[str] = None
20
+
21
+
22
+ class RunResponse(BaseModel):
23
+ execution_key: str
24
+ state: str
25
+ cache_hit: bool
26
+ stdout: Optional[str] = None
27
+ stderr: Optional[str] = None