generic-ml-cache-daemon 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_daemon-0.13.0/.gitignore +18 -0
- generic_ml_cache_daemon-0.13.0/PKG-INFO +36 -0
- generic_ml_cache_daemon-0.13.0/README.md +165 -0
- generic_ml_cache_daemon-0.13.0/pyproject.toml +64 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/__init__.py +3 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/__main__.py +27 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/app.py +63 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/jobs.py +86 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/metrics.py +17 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/__init__.py +2 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/execution.py +59 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/gateway.py +39 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/health.py +25 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/job.py +28 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/run.py +27 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/models/session.py +43 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/py.typed +0 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/__init__.py +2 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/executions.py +102 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/gateway.py +119 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/health.py +75 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/jobs.py +91 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/run.py +114 -0
- generic_ml_cache_daemon-0.13.0/src/generic_ml_cache_daemon/routes/sessions.py +122 -0
- generic_ml_cache_daemon-0.13.0/tests/conftest.py +19 -0
- generic_ml_cache_daemon-0.13.0/tests/test_app.py +46 -0
- generic_ml_cache_daemon-0.13.0/tests/test_executions.py +100 -0
- generic_ml_cache_daemon-0.13.0/tests/test_gateway.py +159 -0
- generic_ml_cache_daemon-0.13.0/tests/test_health.py +135 -0
- generic_ml_cache_daemon-0.13.0/tests/test_jobs.py +190 -0
- generic_ml_cache_daemon-0.13.0/tests/test_main.py +55 -0
- generic_ml_cache_daemon-0.13.0/tests/test_metrics.py +18 -0
- generic_ml_cache_daemon-0.13.0/tests/test_run.py +146 -0
- generic_ml_cache_daemon-0.13.0/tests/test_sessions.py +216 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
.venv/
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.pyc
|
|
4
|
+
dist/
|
|
5
|
+
build/
|
|
6
|
+
*.egg-info/
|
|
7
|
+
.coverage
|
|
8
|
+
coverage.xml
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
|
|
12
|
+
# editors / IDEs
|
|
13
|
+
.idea/
|
|
14
|
+
.vscode/
|
|
15
|
+
|
|
16
|
+
# machine-local repo tooling (not project content)
|
|
17
|
+
secret-audit.sh
|
|
18
|
+
setup-repo.sh
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: generic-ml-cache-daemon
|
|
3
|
+
Version: 0.13.0
|
|
4
|
+
Summary: Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core.
|
|
5
|
+
Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
|
|
6
|
+
Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
|
|
7
|
+
Project-URL: Issues, https://github.com/danielslobozian/generic-ml-cache/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Daniel Slobozian
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
Keywords: ai,cache,daemon,fastapi,gateway,http,llm,proxy
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: fastapi>=0.115
|
|
25
|
+
Requires-Dist: generic-ml-cache-core>=0.12.0
|
|
26
|
+
Requires-Dist: sse-starlette>=2.0
|
|
27
|
+
Requires-Dist: uvicorn>=0.30
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: coverage>=7; extra == 'dev'
|
|
30
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
31
|
+
Requires-Dist: prometheus-client>=0.20; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.15; extra == 'dev'
|
|
35
|
+
Provides-Extra: metrics
|
|
36
|
+
Requires-Dist: prometheus-client>=0.20; extra == 'metrics'
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# generic-ml-cache-daemon
|
|
2
|
+
|
|
3
|
+
The HTTP daemon for [generic-ml-cache](https://github.com/danielslobozian/generic-ml-cache).
|
|
4
|
+
Exposes the cache store and all session/execution functionality as a local REST API with
|
|
5
|
+
server-sent event (SSE) streaming, plus a caching proxy gateway for the Anthropic Messages API.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install generic-ml-cache-daemon # runtime only
|
|
11
|
+
pip install "generic-ml-cache-daemon[metrics]" # + Prometheus /metrics endpoint
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Starting the daemon
|
|
15
|
+
|
|
16
|
+
### Via the CLI (recommended)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
gmlcache daemon start # foreground, default 127.0.0.1:8765
|
|
20
|
+
gmlcache daemon start --port 9000 # custom port
|
|
21
|
+
gmlcache daemon start --metrics # enable /metrics endpoint
|
|
22
|
+
gmlcache daemon start --session abc # bind to session "abc"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Check status or stop:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
gmlcache daemon status
|
|
29
|
+
gmlcache daemon stop
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Direct launch
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python -m generic_ml_cache_daemon # uses defaults
|
|
36
|
+
GMLCACHE_STORE=/path/to/store python -m generic_ml_cache_daemon
|
|
37
|
+
GMLCACHE_SESSION=abc GMLCACHE_METRICS=1 python -m generic_ml_cache_daemon
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Environment variables:
|
|
41
|
+
|
|
42
|
+
| Variable | Default | Description |
|
|
43
|
+
|---|---|---|
|
|
44
|
+
| `GMLCACHE_STORE` | config store path | Path to the cache store directory |
|
|
45
|
+
| `GMLCACHE_SESSION` | *(none)* | Bind all intercepted calls to a session |
|
|
46
|
+
| `GMLCACHE_METRICS` | `0` | Set `1` to enable the Prometheus `/metrics` endpoint |
|
|
47
|
+
|
|
48
|
+
## HTTP API
|
|
49
|
+
|
|
50
|
+
The daemon listens on `http://127.0.0.1:8765` by default.
|
|
51
|
+
Interactive API docs are available at `/docs` (Swagger UI) and `/redoc`.
|
|
52
|
+
|
|
53
|
+
### Observability
|
|
54
|
+
|
|
55
|
+
| Method | Path | Description |
|
|
56
|
+
|---|---|---|
|
|
57
|
+
| `GET` | `/health` | Liveness: `{"status":"ok"}` |
|
|
58
|
+
| `GET` | `/ready` | Readiness: probes the store; 503 if inaccessible |
|
|
59
|
+
| `GET` | `/info` | Version, store path, adapters, bound session |
|
|
60
|
+
| `GET` | `/metrics` | Prometheus text (requires `[metrics]` extra + `--metrics`) |
|
|
61
|
+
|
|
62
|
+
### Sessions
|
|
63
|
+
|
|
64
|
+
| Method | Path | Description |
|
|
65
|
+
|---|---|---|
|
|
66
|
+
| `GET` | `/sessions` | List all session IDs |
|
|
67
|
+
| `POST` | `/sessions` | Create a session (body: `{tags, spec}`) |
|
|
68
|
+
| `GET` | `/sessions/{id}` | Get session tags and spec (404 if unknown) |
|
|
69
|
+
| `GET` | `/sessions/{id}/stats` | Calls, hits, hit rate |
|
|
70
|
+
| `PUT` | `/sessions/{id}/spec` | Set or replace execution spec |
|
|
71
|
+
| `DELETE` | `/sessions/{id}/spec` | Remove execution spec |
|
|
72
|
+
| `POST` | `/sessions/{id}/tags` | Add a tag |
|
|
73
|
+
| `DELETE` | `/sessions/{id}/tags/{tag}` | Remove a tag |
|
|
74
|
+
|
|
75
|
+
### Executions & Global Stats
|
|
76
|
+
|
|
77
|
+
| Method | Path | Description |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| `GET` | `/executions` | List all current (servable) executions |
|
|
80
|
+
| `GET` | `/executions/{key}` | Inspect by exact key or prefix (409 on ambiguous prefix) |
|
|
81
|
+
| `GET` | `/stats` | Global execution count + event counts |
|
|
82
|
+
| `POST` | `/purge` | Purge by scope: `all`, `key`, `tag`, `session`, `session_tag` |
|
|
83
|
+
|
|
84
|
+
**Purge body examples:**
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{"by": "all"}
|
|
88
|
+
{"by": "key", "target": "deadbeef"}
|
|
89
|
+
{"by": "session", "target": "abc123"}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Run (synchronous or SSE)
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
POST /run
|
|
96
|
+
{
|
|
97
|
+
"client": "anthropic",
|
|
98
|
+
"model": "claude-opus-4-8",
|
|
99
|
+
"prompt": "Summarise the paper.",
|
|
100
|
+
"effort": "medium",
|
|
101
|
+
"session_id": "abc"
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
- `Accept: application/json` (default) — blocks and returns `{execution_key, state, cache_hit, stdout, stderr}`
|
|
106
|
+
- `Accept: text/event-stream` — SSE: `{"type":"accepted"}` immediately, then `{"type":"complete", ...}` on finish
|
|
107
|
+
|
|
108
|
+
### Jobs (detached / async)
|
|
109
|
+
|
|
110
|
+
| Method | Path | Description |
|
|
111
|
+
|---|---|---|
|
|
112
|
+
| `POST` | `/jobs` | Submit a background execution; returns `{job_id, state}` with 202 |
|
|
113
|
+
| `GET` | `/jobs` | List all job IDs |
|
|
114
|
+
| `GET` | `/jobs/{id}` | Poll state: `pending`, `running`, `done`, `error` |
|
|
115
|
+
| `GET` | `/jobs/{id}/stream` | SSE: periodic `status` events, then `complete` or `error` |
|
|
116
|
+
|
|
117
|
+
### Claude Gateway
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
POST /gateway/claude/v1/messages
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
A cache-transparent proxy for the Anthropic Messages API. Requests that hit the
|
|
124
|
+
cache are returned without a network call to Anthropic. The response shape matches
|
|
125
|
+
the Anthropic Messages API exactly, with one extra field: `x_cache_hit: bool`.
|
|
126
|
+
|
|
127
|
+
**Limitations (0.13.0):** single-turn conversations only (one `role: user` message,
|
|
128
|
+
no prior assistant turns). Multi-turn support is planned.
|
|
129
|
+
|
|
130
|
+
**Example:**
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
curl http://127.0.0.1:8765/gateway/claude/v1/messages \
|
|
134
|
+
-H "Content-Type: application/json" \
|
|
135
|
+
-d '{
|
|
136
|
+
"model": "claude-opus-4-8",
|
|
137
|
+
"messages": [{"role": "user", "content": "Hello, world!"}],
|
|
138
|
+
"max_tokens": 256
|
|
139
|
+
}'
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Point any Anthropic SDK client at the gateway by overriding the base URL:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
import anthropic
|
|
146
|
+
|
|
147
|
+
client = anthropic.Anthropic(
|
|
148
|
+
api_key="...",
|
|
149
|
+
base_url="http://127.0.0.1:8765/gateway/claude",
|
|
150
|
+
)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Architecture
|
|
154
|
+
|
|
155
|
+
The daemon is a thin FastAPI layer over the `generic-ml-cache-core` hexagonal
|
|
156
|
+
architecture. It does not own any state — all persistence goes through the
|
|
157
|
+
existing `JournalMetrics` (SQLite registry) and `SqliteExecutionRepository`
|
|
158
|
+
that the core library manages.
|
|
159
|
+
|
|
160
|
+
Background jobs run in a `ThreadPoolExecutor` inside an in-process
|
|
161
|
+
`JobRegistry`; job state is not persisted across daemon restarts.
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
Apache-2.0
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.18"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "generic-ml-cache-daemon"
|
|
7
|
+
version = "0.13.0"
|
|
8
|
+
description = "Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core."
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
license-files = ["LICENSE", "NOTICE"]
|
|
12
|
+
authors = [{ name = "Daniel Slobozian" }]
|
|
13
|
+
keywords = ["cache", "llm", "ai", "daemon", "http", "fastapi", "gateway", "proxy"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.9",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Topic :: Utilities",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"generic-ml-cache-core>=0.12.0",
|
|
29
|
+
"fastapi>=0.115",
|
|
30
|
+
"uvicorn>=0.30",
|
|
31
|
+
"sse-starlette>=2.0",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/danielslobozian/generic-ml-cache"
|
|
36
|
+
Repository = "https://github.com/danielslobozian/generic-ml-cache"
|
|
37
|
+
Issues = "https://github.com/danielslobozian/generic-ml-cache/issues"
|
|
38
|
+
Changelog = "https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md"
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
# Optional Prometheus /metrics endpoint. Off by default.
|
|
42
|
+
metrics = ["prometheus-client>=0.20"]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7",
|
|
45
|
+
"pytest-cov",
|
|
46
|
+
"coverage>=7",
|
|
47
|
+
"ruff>=0.15",
|
|
48
|
+
"httpx>=0.27",
|
|
49
|
+
"prometheus-client>=0.20",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tool.hatch.build.targets.wheel]
|
|
53
|
+
packages = ["src/generic_ml_cache_daemon"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
testpaths = ["tests"]
|
|
57
|
+
addopts = "-ra"
|
|
58
|
+
|
|
59
|
+
[tool.coverage.run]
|
|
60
|
+
omit = ["*/generic_ml_cache_daemon/__main__.py"]
|
|
61
|
+
|
|
62
|
+
[tool.ruff]
|
|
63
|
+
line-length = 100
|
|
64
|
+
target-version = "py39"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Entry point: run the daemon via ``python -m generic_ml_cache_daemon``."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import uvicorn
|
|
11
|
+
|
|
12
|
+
from generic_ml_cache_daemon.app import create_app
|
|
13
|
+
|
|
14
|
+
_DEFAULT_HOST = "127.0.0.1"
|
|
15
|
+
_DEFAULT_PORT = 8765
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
store_root = Path(os.environ.get("GMLCACHE_STORE", str(Path.home() / ".gmlcache")))
|
|
20
|
+
session_id = os.environ.get("GMLCACHE_SESSION") or None
|
|
21
|
+
enable_metrics = os.environ.get("GMLCACHE_METRICS", "").lower() in ("1", "true", "yes")
|
|
22
|
+
application = create_app(store_root, session_id=session_id, enable_metrics=enable_metrics)
|
|
23
|
+
uvicorn.run(application, host=_DEFAULT_HOST, port=_DEFAULT_PORT)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
main()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""FastAPI application factory for the generic-ml-cache daemon."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from fastapi import FastAPI
|
|
11
|
+
|
|
12
|
+
from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
|
|
13
|
+
|
|
14
|
+
from generic_ml_cache_daemon import __version__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_app(
|
|
18
|
+
store_root: Path,
|
|
19
|
+
*,
|
|
20
|
+
session_id: Optional[str] = None,
|
|
21
|
+
enable_metrics: bool = False,
|
|
22
|
+
) -> FastAPI:
|
|
23
|
+
"""Create and configure the daemon FastAPI application.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
store_root: path to the gmlcache store directory (the injected data source).
|
|
27
|
+
session_id: optional session all intercepted calls are recorded under.
|
|
28
|
+
enable_metrics: expose the Prometheus /metrics endpoint.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A fully wired FastAPI application. Routes are mounted by this function;
|
|
32
|
+
callers should not mount additional routes after construction.
|
|
33
|
+
"""
|
|
34
|
+
application = FastAPI(
|
|
35
|
+
title="generic-ml-cache daemon",
|
|
36
|
+
version=__version__,
|
|
37
|
+
docs_url="/docs",
|
|
38
|
+
redoc_url="/redoc",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
wired_use_cases = build_use_cases(store_root)
|
|
42
|
+
application.state.wired = wired_use_cases
|
|
43
|
+
application.state.store_root = store_root
|
|
44
|
+
application.state.session_id = session_id
|
|
45
|
+
application.state.enable_metrics = enable_metrics
|
|
46
|
+
|
|
47
|
+
from generic_ml_cache_daemon.jobs import JobRegistry
|
|
48
|
+
from generic_ml_cache_daemon.routes.executions import router as executions_router
|
|
49
|
+
from generic_ml_cache_daemon.routes.gateway import router as gateway_router
|
|
50
|
+
from generic_ml_cache_daemon.routes.health import router as health_router
|
|
51
|
+
from generic_ml_cache_daemon.routes.jobs import router as jobs_router
|
|
52
|
+
from generic_ml_cache_daemon.routes.run import router as run_router
|
|
53
|
+
from generic_ml_cache_daemon.routes.sessions import router as sessions_router
|
|
54
|
+
|
|
55
|
+
application.state.job_registry = JobRegistry()
|
|
56
|
+
application.include_router(health_router)
|
|
57
|
+
application.include_router(sessions_router)
|
|
58
|
+
application.include_router(executions_router)
|
|
59
|
+
application.include_router(run_router)
|
|
60
|
+
application.include_router(jobs_router)
|
|
61
|
+
application.include_router(gateway_router)
|
|
62
|
+
|
|
63
|
+
return application
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""In-process job registry for detached background executions.
|
|
4
|
+
|
|
5
|
+
Each POST /jobs submission gets a unique job_id. The execution runs in a
|
|
6
|
+
background thread; callers poll GET /jobs/{id} or stream GET /jobs/{id}/stream.
|
|
7
|
+
The registry is in-process memory only — jobs are not persisted across restarts.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import concurrent.futures
|
|
13
|
+
import secrets
|
|
14
|
+
import threading
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Dict, Optional
|
|
17
|
+
|
|
18
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class JobState(str, Enum):
|
|
22
|
+
PENDING = "pending"
|
|
23
|
+
RUNNING = "running"
|
|
24
|
+
DONE = "done"
|
|
25
|
+
ERROR = "error"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Job:
|
|
29
|
+
def __init__(self, job_id: str) -> None:
|
|
30
|
+
self.job_id = job_id
|
|
31
|
+
self.state = JobState.PENDING
|
|
32
|
+
self.execution: Optional[MlExecution] = None
|
|
33
|
+
self.error: Optional[str] = None
|
|
34
|
+
self._done_event = threading.Event()
|
|
35
|
+
|
|
36
|
+
def wait(self, timeout: Optional[float] = None) -> bool:
|
|
37
|
+
return self._done_event.wait(timeout=timeout)
|
|
38
|
+
|
|
39
|
+
def mark_running(self) -> None:
|
|
40
|
+
self.state = JobState.RUNNING
|
|
41
|
+
|
|
42
|
+
def mark_done(self, execution: MlExecution) -> None:
|
|
43
|
+
self.execution = execution
|
|
44
|
+
self.state = JobState.DONE
|
|
45
|
+
self._done_event.set()
|
|
46
|
+
|
|
47
|
+
def mark_error(self, error: str) -> None:
|
|
48
|
+
self.error = error
|
|
49
|
+
self.state = JobState.ERROR
|
|
50
|
+
self._done_event.set()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class JobRegistry:
|
|
54
|
+
"""Thread-safe in-memory registry of submitted jobs."""
|
|
55
|
+
|
|
56
|
+
def __init__(self) -> None:
|
|
57
|
+
self._jobs: Dict[str, Job] = {}
|
|
58
|
+
self._lock = threading.Lock()
|
|
59
|
+
self._executor = concurrent.futures.ThreadPoolExecutor(
|
|
60
|
+
max_workers=4, thread_name_prefix="gmlc-job"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def submit(self, fn, *args) -> Job:
|
|
64
|
+
job_id = secrets.token_hex(8)
|
|
65
|
+
job = Job(job_id)
|
|
66
|
+
with self._lock:
|
|
67
|
+
self._jobs[job_id] = job
|
|
68
|
+
|
|
69
|
+
def _run() -> None:
|
|
70
|
+
job.mark_running()
|
|
71
|
+
try:
|
|
72
|
+
execution = fn(*args)
|
|
73
|
+
job.mark_done(execution)
|
|
74
|
+
except Exception as exc:
|
|
75
|
+
job.mark_error(str(exc))
|
|
76
|
+
|
|
77
|
+
self._executor.submit(_run)
|
|
78
|
+
return job
|
|
79
|
+
|
|
80
|
+
def get(self, job_id: str) -> Optional[Job]:
|
|
81
|
+
with self._lock:
|
|
82
|
+
return self._jobs.get(job_id)
|
|
83
|
+
|
|
84
|
+
def list_ids(self) -> list:
|
|
85
|
+
with self._lock:
|
|
86
|
+
return list(self._jobs.keys())
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Prometheus metrics setup for the daemon. Requires the optional [metrics] extra."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import prometheus_client # type: ignore[import-untyped] # noqa: F401
|
|
9
|
+
|
|
10
|
+
_AVAILABLE = True
|
|
11
|
+
except ImportError: # pragma: no cover
|
|
12
|
+
_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_prometheus_available() -> bool:
|
|
16
|
+
"""Return True when the prometheus-client extra is installed."""
|
|
17
|
+
return _AVAILABLE
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Executions HTTP API and global stats/purge."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Literal, Union
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExecutionSummaryResponse(BaseModel):
|
|
13
|
+
execution_key: str
|
|
14
|
+
kind: str
|
|
15
|
+
client: str
|
|
16
|
+
model: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExecutionListResponse(BaseModel):
|
|
20
|
+
executions: List[ExecutionSummaryResponse]
|
|
21
|
+
total: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GlobalStatsResponse(BaseModel):
|
|
25
|
+
executions: int
|
|
26
|
+
event_counts: Dict[str, int]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PurgeByAll(BaseModel):
|
|
30
|
+
by: Literal["all"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PurgeByKey(BaseModel):
|
|
34
|
+
by: Literal["key"]
|
|
35
|
+
target: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PurgeByTag(BaseModel):
|
|
39
|
+
by: Literal["tag"]
|
|
40
|
+
target: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class PurgeBySession(BaseModel):
|
|
44
|
+
by: Literal["session"]
|
|
45
|
+
target: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PurgeBySessionTag(BaseModel):
|
|
49
|
+
by: Literal["session_tag"]
|
|
50
|
+
target: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
PurgeBody = Union[PurgeByAll, PurgeByKey, PurgeByTag, PurgeBySession, PurgeBySessionTag]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PurgeResponse(BaseModel):
|
|
57
|
+
executions_removed: int
|
|
58
|
+
bytes_freed: int
|
|
59
|
+
blobs_removed: int
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Claude gateway (/gateway/claude/v1/messages)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MessageParam(BaseModel):
|
|
13
|
+
role: str
|
|
14
|
+
content: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MessagesRequest(BaseModel):
|
|
18
|
+
model: str
|
|
19
|
+
messages: List[MessageParam]
|
|
20
|
+
max_tokens: int = 8192
|
|
21
|
+
system: Optional[str] = None
|
|
22
|
+
session_id: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ContentBlock(BaseModel):
|
|
26
|
+
type: str = "text"
|
|
27
|
+
text: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MessagesResponse(BaseModel):
|
|
31
|
+
id: str
|
|
32
|
+
type: str = "message"
|
|
33
|
+
role: str = "assistant"
|
|
34
|
+
content: List[ContentBlock]
|
|
35
|
+
model: str
|
|
36
|
+
stop_reason: str = "end_turn"
|
|
37
|
+
stop_sequence: Optional[str] = None
|
|
38
|
+
usage: Dict[str, Any]
|
|
39
|
+
x_cache_hit: bool = False
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic response models for /health, /ready, and /info."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HealthResponse(BaseModel):
|
|
13
|
+
status: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ReadyResponse(BaseModel):
|
|
17
|
+
status: str
|
|
18
|
+
detail: Optional[str] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class InfoResponse(BaseModel):
|
|
22
|
+
version: str
|
|
23
|
+
store_root: str
|
|
24
|
+
session_id: Optional[str] = None
|
|
25
|
+
adapters: List[str]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Jobs HTTP API (detached background executions)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JobSubmitBody(BaseModel):
|
|
13
|
+
client: str
|
|
14
|
+
model: str
|
|
15
|
+
effort: str = ""
|
|
16
|
+
prompt: str = ""
|
|
17
|
+
context: str = ""
|
|
18
|
+
tags: List[str] = []
|
|
19
|
+
session_id: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JobResponse(BaseModel):
|
|
23
|
+
job_id: str
|
|
24
|
+
state: str
|
|
25
|
+
execution_key: Optional[str] = None
|
|
26
|
+
stdout: Optional[str] = None
|
|
27
|
+
stderr: Optional[str] = None
|
|
28
|
+
error: Optional[str] = None
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the /run endpoint."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RunBody(BaseModel):
|
|
13
|
+
client: str
|
|
14
|
+
model: str
|
|
15
|
+
effort: str = ""
|
|
16
|
+
prompt: str = ""
|
|
17
|
+
context: str = ""
|
|
18
|
+
tags: List[str] = []
|
|
19
|
+
session_id: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RunResponse(BaseModel):
|
|
23
|
+
execution_key: str
|
|
24
|
+
state: str
|
|
25
|
+
cache_hit: bool
|
|
26
|
+
stdout: Optional[str] = None
|
|
27
|
+
stderr: Optional[str] = None
|