generic-ml-cache-daemon 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_daemon/__init__.py +3 -0
- generic_ml_cache_daemon/__main__.py +27 -0
- generic_ml_cache_daemon/app.py +63 -0
- generic_ml_cache_daemon/jobs.py +86 -0
- generic_ml_cache_daemon/metrics.py +17 -0
- generic_ml_cache_daemon/models/__init__.py +2 -0
- generic_ml_cache_daemon/models/execution.py +59 -0
- generic_ml_cache_daemon/models/gateway.py +39 -0
- generic_ml_cache_daemon/models/health.py +25 -0
- generic_ml_cache_daemon/models/job.py +28 -0
- generic_ml_cache_daemon/models/run.py +27 -0
- generic_ml_cache_daemon/models/session.py +43 -0
- generic_ml_cache_daemon/py.typed +0 -0
- generic_ml_cache_daemon/routes/__init__.py +2 -0
- generic_ml_cache_daemon/routes/executions.py +102 -0
- generic_ml_cache_daemon/routes/gateway.py +119 -0
- generic_ml_cache_daemon/routes/health.py +75 -0
- generic_ml_cache_daemon/routes/jobs.py +91 -0
- generic_ml_cache_daemon/routes/run.py +114 -0
- generic_ml_cache_daemon/routes/sessions.py +122 -0
- generic_ml_cache_daemon-0.13.0.dist-info/METADATA +36 -0
- generic_ml_cache_daemon-0.13.0.dist-info/RECORD +23 -0
- generic_ml_cache_daemon-0.13.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Entry point: run the daemon via ``python -m generic_ml_cache_daemon``."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import uvicorn
|
|
11
|
+
|
|
12
|
+
from generic_ml_cache_daemon.app import create_app
|
|
13
|
+
|
|
14
|
+
_DEFAULT_HOST = "127.0.0.1"
|
|
15
|
+
_DEFAULT_PORT = 8765
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
store_root = Path(os.environ.get("GMLCACHE_STORE", str(Path.home() / ".gmlcache")))
|
|
20
|
+
session_id = os.environ.get("GMLCACHE_SESSION") or None
|
|
21
|
+
enable_metrics = os.environ.get("GMLCACHE_METRICS", "").lower() in ("1", "true", "yes")
|
|
22
|
+
application = create_app(store_root, session_id=session_id, enable_metrics=enable_metrics)
|
|
23
|
+
uvicorn.run(application, host=_DEFAULT_HOST, port=_DEFAULT_PORT)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
main()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""FastAPI application factory for the generic-ml-cache daemon."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from fastapi import FastAPI
|
|
11
|
+
|
|
12
|
+
from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
|
|
13
|
+
|
|
14
|
+
from generic_ml_cache_daemon import __version__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_app(
|
|
18
|
+
store_root: Path,
|
|
19
|
+
*,
|
|
20
|
+
session_id: Optional[str] = None,
|
|
21
|
+
enable_metrics: bool = False,
|
|
22
|
+
) -> FastAPI:
|
|
23
|
+
"""Create and configure the daemon FastAPI application.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
store_root: path to the gmlcache store directory (the injected data source).
|
|
27
|
+
session_id: optional session all intercepted calls are recorded under.
|
|
28
|
+
enable_metrics: expose the Prometheus /metrics endpoint.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A fully wired FastAPI application. Routes are mounted by this function;
|
|
32
|
+
callers should not mount additional routes after construction.
|
|
33
|
+
"""
|
|
34
|
+
application = FastAPI(
|
|
35
|
+
title="generic-ml-cache daemon",
|
|
36
|
+
version=__version__,
|
|
37
|
+
docs_url="/docs",
|
|
38
|
+
redoc_url="/redoc",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
wired_use_cases = build_use_cases(store_root)
|
|
42
|
+
application.state.wired = wired_use_cases
|
|
43
|
+
application.state.store_root = store_root
|
|
44
|
+
application.state.session_id = session_id
|
|
45
|
+
application.state.enable_metrics = enable_metrics
|
|
46
|
+
|
|
47
|
+
from generic_ml_cache_daemon.jobs import JobRegistry
|
|
48
|
+
from generic_ml_cache_daemon.routes.executions import router as executions_router
|
|
49
|
+
from generic_ml_cache_daemon.routes.gateway import router as gateway_router
|
|
50
|
+
from generic_ml_cache_daemon.routes.health import router as health_router
|
|
51
|
+
from generic_ml_cache_daemon.routes.jobs import router as jobs_router
|
|
52
|
+
from generic_ml_cache_daemon.routes.run import router as run_router
|
|
53
|
+
from generic_ml_cache_daemon.routes.sessions import router as sessions_router
|
|
54
|
+
|
|
55
|
+
application.state.job_registry = JobRegistry()
|
|
56
|
+
application.include_router(health_router)
|
|
57
|
+
application.include_router(sessions_router)
|
|
58
|
+
application.include_router(executions_router)
|
|
59
|
+
application.include_router(run_router)
|
|
60
|
+
application.include_router(jobs_router)
|
|
61
|
+
application.include_router(gateway_router)
|
|
62
|
+
|
|
63
|
+
return application
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""In-process job registry for detached background executions.
|
|
4
|
+
|
|
5
|
+
Each POST /jobs submission gets a unique job_id. The execution runs in a
|
|
6
|
+
background thread; callers poll GET /jobs/{id} or stream GET /jobs/{id}/stream.
|
|
7
|
+
The registry is in-process memory only — jobs are not persisted across restarts.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import concurrent.futures
|
|
13
|
+
import secrets
|
|
14
|
+
import threading
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Dict, Optional
|
|
17
|
+
|
|
18
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class JobState(str, Enum):
|
|
22
|
+
PENDING = "pending"
|
|
23
|
+
RUNNING = "running"
|
|
24
|
+
DONE = "done"
|
|
25
|
+
ERROR = "error"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Job:
|
|
29
|
+
def __init__(self, job_id: str) -> None:
|
|
30
|
+
self.job_id = job_id
|
|
31
|
+
self.state = JobState.PENDING
|
|
32
|
+
self.execution: Optional[MlExecution] = None
|
|
33
|
+
self.error: Optional[str] = None
|
|
34
|
+
self._done_event = threading.Event()
|
|
35
|
+
|
|
36
|
+
def wait(self, timeout: Optional[float] = None) -> bool:
|
|
37
|
+
return self._done_event.wait(timeout=timeout)
|
|
38
|
+
|
|
39
|
+
def mark_running(self) -> None:
|
|
40
|
+
self.state = JobState.RUNNING
|
|
41
|
+
|
|
42
|
+
def mark_done(self, execution: MlExecution) -> None:
|
|
43
|
+
self.execution = execution
|
|
44
|
+
self.state = JobState.DONE
|
|
45
|
+
self._done_event.set()
|
|
46
|
+
|
|
47
|
+
def mark_error(self, error: str) -> None:
|
|
48
|
+
self.error = error
|
|
49
|
+
self.state = JobState.ERROR
|
|
50
|
+
self._done_event.set()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class JobRegistry:
|
|
54
|
+
"""Thread-safe in-memory registry of submitted jobs."""
|
|
55
|
+
|
|
56
|
+
def __init__(self) -> None:
|
|
57
|
+
self._jobs: Dict[str, Job] = {}
|
|
58
|
+
self._lock = threading.Lock()
|
|
59
|
+
self._executor = concurrent.futures.ThreadPoolExecutor(
|
|
60
|
+
max_workers=4, thread_name_prefix="gmlc-job"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def submit(self, fn, *args) -> Job:
|
|
64
|
+
job_id = secrets.token_hex(8)
|
|
65
|
+
job = Job(job_id)
|
|
66
|
+
with self._lock:
|
|
67
|
+
self._jobs[job_id] = job
|
|
68
|
+
|
|
69
|
+
def _run() -> None:
|
|
70
|
+
job.mark_running()
|
|
71
|
+
try:
|
|
72
|
+
execution = fn(*args)
|
|
73
|
+
job.mark_done(execution)
|
|
74
|
+
except Exception as exc:
|
|
75
|
+
job.mark_error(str(exc))
|
|
76
|
+
|
|
77
|
+
self._executor.submit(_run)
|
|
78
|
+
return job
|
|
79
|
+
|
|
80
|
+
def get(self, job_id: str) -> Optional[Job]:
|
|
81
|
+
with self._lock:
|
|
82
|
+
return self._jobs.get(job_id)
|
|
83
|
+
|
|
84
|
+
def list_ids(self) -> list:
|
|
85
|
+
with self._lock:
|
|
86
|
+
return list(self._jobs.keys())
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Prometheus metrics setup for the daemon. Requires the optional [metrics] extra."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import prometheus_client # type: ignore[import-untyped] # noqa: F401
|
|
9
|
+
|
|
10
|
+
_AVAILABLE = True
|
|
11
|
+
except ImportError: # pragma: no cover
|
|
12
|
+
_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_prometheus_available() -> bool:
|
|
16
|
+
"""Return True when the prometheus-client extra is installed."""
|
|
17
|
+
return _AVAILABLE
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Executions HTTP API and global stats/purge."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Literal, Union
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExecutionSummaryResponse(BaseModel):
|
|
13
|
+
execution_key: str
|
|
14
|
+
kind: str
|
|
15
|
+
client: str
|
|
16
|
+
model: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExecutionListResponse(BaseModel):
|
|
20
|
+
executions: List[ExecutionSummaryResponse]
|
|
21
|
+
total: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GlobalStatsResponse(BaseModel):
|
|
25
|
+
executions: int
|
|
26
|
+
event_counts: Dict[str, int]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PurgeByAll(BaseModel):
|
|
30
|
+
by: Literal["all"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PurgeByKey(BaseModel):
|
|
34
|
+
by: Literal["key"]
|
|
35
|
+
target: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PurgeByTag(BaseModel):
|
|
39
|
+
by: Literal["tag"]
|
|
40
|
+
target: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class PurgeBySession(BaseModel):
|
|
44
|
+
by: Literal["session"]
|
|
45
|
+
target: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PurgeBySessionTag(BaseModel):
|
|
49
|
+
by: Literal["session_tag"]
|
|
50
|
+
target: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
PurgeBody = Union[PurgeByAll, PurgeByKey, PurgeByTag, PurgeBySession, PurgeBySessionTag]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PurgeResponse(BaseModel):
|
|
57
|
+
executions_removed: int
|
|
58
|
+
bytes_freed: int
|
|
59
|
+
blobs_removed: int
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Claude gateway (/gateway/claude/v1/messages)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MessageParam(BaseModel):
|
|
13
|
+
role: str
|
|
14
|
+
content: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MessagesRequest(BaseModel):
|
|
18
|
+
model: str
|
|
19
|
+
messages: List[MessageParam]
|
|
20
|
+
max_tokens: int = 8192
|
|
21
|
+
system: Optional[str] = None
|
|
22
|
+
session_id: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ContentBlock(BaseModel):
|
|
26
|
+
type: str = "text"
|
|
27
|
+
text: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MessagesResponse(BaseModel):
|
|
31
|
+
id: str
|
|
32
|
+
type: str = "message"
|
|
33
|
+
role: str = "assistant"
|
|
34
|
+
content: List[ContentBlock]
|
|
35
|
+
model: str
|
|
36
|
+
stop_reason: str = "end_turn"
|
|
37
|
+
stop_sequence: Optional[str] = None
|
|
38
|
+
usage: Dict[str, Any]
|
|
39
|
+
x_cache_hit: bool = False
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic response models for /health, /ready, and /info."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HealthResponse(BaseModel):
|
|
13
|
+
status: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ReadyResponse(BaseModel):
|
|
17
|
+
status: str
|
|
18
|
+
detail: Optional[str] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class InfoResponse(BaseModel):
|
|
22
|
+
version: str
|
|
23
|
+
store_root: str
|
|
24
|
+
session_id: Optional[str] = None
|
|
25
|
+
adapters: List[str]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Jobs HTTP API (detached background executions)."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JobSubmitBody(BaseModel):
|
|
13
|
+
client: str
|
|
14
|
+
model: str
|
|
15
|
+
effort: str = ""
|
|
16
|
+
prompt: str = ""
|
|
17
|
+
context: str = ""
|
|
18
|
+
tags: List[str] = []
|
|
19
|
+
session_id: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JobResponse(BaseModel):
|
|
23
|
+
job_id: str
|
|
24
|
+
state: str
|
|
25
|
+
execution_key: Optional[str] = None
|
|
26
|
+
stdout: Optional[str] = None
|
|
27
|
+
stderr: Optional[str] = None
|
|
28
|
+
error: Optional[str] = None
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the /run endpoint."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RunBody(BaseModel):
|
|
13
|
+
client: str
|
|
14
|
+
model: str
|
|
15
|
+
effort: str = ""
|
|
16
|
+
prompt: str = ""
|
|
17
|
+
context: str = ""
|
|
18
|
+
tags: List[str] = []
|
|
19
|
+
session_id: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RunResponse(BaseModel):
|
|
23
|
+
execution_key: str
|
|
24
|
+
state: str
|
|
25
|
+
cache_hit: bool
|
|
26
|
+
stdout: Optional[str] = None
|
|
27
|
+
stderr: Optional[str] = None
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Pydantic models for the Sessions HTTP API."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SpecBody(BaseModel):
|
|
13
|
+
client: str
|
|
14
|
+
model: str
|
|
15
|
+
effort: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SessionCreateBody(BaseModel):
|
|
19
|
+
tags: List[str] = []
|
|
20
|
+
spec: Optional[SpecBody] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SessionResponse(BaseModel):
|
|
24
|
+
session_id: str
|
|
25
|
+
tags: List[str]
|
|
26
|
+
spec: Optional[SpecBody] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SessionStatsResponse(BaseModel):
|
|
30
|
+
session_id: str
|
|
31
|
+
tags: List[str]
|
|
32
|
+
spec: Optional[SpecBody] = None
|
|
33
|
+
calls: int
|
|
34
|
+
hits: int
|
|
35
|
+
hit_rate: float
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TagBody(BaseModel):
|
|
39
|
+
tag: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class SessionListResponse(BaseModel):
|
|
43
|
+
session_ids: List[str]
|
|
File without changes
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Routes: /executions, /stats, /purge."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Body, HTTPException, Request # noqa: F401
|
|
10
|
+
|
|
11
|
+
from generic_ml_cache_daemon.models.execution import (
|
|
12
|
+
ExecutionListResponse,
|
|
13
|
+
ExecutionSummaryResponse,
|
|
14
|
+
GlobalStatsResponse,
|
|
15
|
+
PurgeBody,
|
|
16
|
+
PurgeByAll,
|
|
17
|
+
PurgeByKey,
|
|
18
|
+
PurgeBySession,
|
|
19
|
+
PurgeBySessionTag,
|
|
20
|
+
PurgeByTag,
|
|
21
|
+
PurgeResponse,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
router = APIRouter()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@router.get("/executions")
|
|
28
|
+
def list_executions(request: Request) -> ExecutionListResponse:
|
|
29
|
+
"""Return all current (servable) executions."""
|
|
30
|
+
summaries = request.app.state.wired.repository.current_execution_summaries()
|
|
31
|
+
items = [
|
|
32
|
+
ExecutionSummaryResponse(
|
|
33
|
+
execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
|
|
34
|
+
)
|
|
35
|
+
for s in summaries
|
|
36
|
+
]
|
|
37
|
+
return ExecutionListResponse(executions=items, total=len(items))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@router.get(
|
|
41
|
+
"/executions/{key}",
|
|
42
|
+
responses={
|
|
43
|
+
404: {"description": "Execution not found"},
|
|
44
|
+
409: {"description": "Ambiguous key prefix matches multiple executions"},
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
def get_execution(key: str, request: Request) -> ExecutionSummaryResponse:
|
|
48
|
+
"""Return the execution whose key equals or starts with ``key``."""
|
|
49
|
+
summaries = request.app.state.wired.repository.current_execution_summaries()
|
|
50
|
+
# exact match first, then prefix
|
|
51
|
+
exact = [s for s in summaries if s.execution_key == key]
|
|
52
|
+
if exact:
|
|
53
|
+
s = exact[0]
|
|
54
|
+
return ExecutionSummaryResponse(
|
|
55
|
+
execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
|
|
56
|
+
)
|
|
57
|
+
prefix_matches = [s for s in summaries if s.execution_key.startswith(key)]
|
|
58
|
+
if not prefix_matches:
|
|
59
|
+
raise HTTPException(status_code=404, detail=f"execution {key!r} not found")
|
|
60
|
+
if len(prefix_matches) > 1:
|
|
61
|
+
raise HTTPException(
|
|
62
|
+
status_code=409,
|
|
63
|
+
detail=f"ambiguous key prefix {key!r} matches {len(prefix_matches)} executions",
|
|
64
|
+
)
|
|
65
|
+
s = prefix_matches[0]
|
|
66
|
+
return ExecutionSummaryResponse(
|
|
67
|
+
execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@router.get("/stats")
|
|
72
|
+
def get_stats(request: Request) -> GlobalStatsResponse:
|
|
73
|
+
"""Return global store statistics."""
|
|
74
|
+
wired = request.app.state.wired
|
|
75
|
+
summaries = wired.repository.current_execution_summaries()
|
|
76
|
+
return GlobalStatsResponse(
|
|
77
|
+
executions=len(summaries),
|
|
78
|
+
event_counts=wired.metrics.event_counts(),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@router.post("/purge", responses={422: {"description": "Unsupported purge scope"}})
|
|
83
|
+
def purge(body: Annotated[PurgeBody, Body(discriminator="by")], request: Request) -> PurgeResponse:
|
|
84
|
+
"""Purge (soft-delete) executions by scope."""
|
|
85
|
+
purge_service = request.app.state.wired.purge
|
|
86
|
+
if isinstance(body, PurgeByAll):
|
|
87
|
+
report = purge_service.purge_all()
|
|
88
|
+
elif isinstance(body, PurgeByKey):
|
|
89
|
+
report = purge_service.purge_one(body.target)
|
|
90
|
+
elif isinstance(body, PurgeByTag):
|
|
91
|
+
report = purge_service.purge_by_tag(body.target)
|
|
92
|
+
elif isinstance(body, PurgeBySession):
|
|
93
|
+
report = purge_service.purge_by_session(body.target)
|
|
94
|
+
elif isinstance(body, PurgeBySessionTag):
|
|
95
|
+
report = purge_service.purge_by_session_tag(body.target)
|
|
96
|
+
else: # pragma: no cover
|
|
97
|
+
raise HTTPException(status_code=422, detail="unsupported purge scope")
|
|
98
|
+
return PurgeResponse(
|
|
99
|
+
executions_removed=report.executions_removed,
|
|
100
|
+
bytes_freed=report.bytes_freed,
|
|
101
|
+
blobs_removed=report.blobs_removed,
|
|
102
|
+
)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Route: POST /gateway/claude/v1/messages — Anthropic Messages API caching proxy.
|
|
4
|
+
|
|
5
|
+
Scope for 0.13.0: single-user-turn conversations only (one role=user message in
|
|
6
|
+
the messages array). Multi-turn support requires thread-aware context handling and
|
|
7
|
+
is deferred to a future element.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import secrets
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
17
|
+
|
|
18
|
+
from generic_ml_cache_core.adapter.inbound.composition import resolve_execution_kind
|
|
19
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
|
|
20
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_state import (
|
|
21
|
+
ExecutionState,
|
|
22
|
+
)
|
|
23
|
+
from generic_ml_cache_core.application.port.inbound.run_ml_execution_command import (
|
|
24
|
+
RunMlExecutionCommand,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from generic_ml_cache_daemon.models.gateway import (
|
|
28
|
+
ContentBlock,
|
|
29
|
+
MessagesRequest,
|
|
30
|
+
MessagesResponse,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
router = APIRouter(prefix="/gateway/claude")
|
|
34
|
+
|
|
35
|
+
_STDOUT = ArtifactType.STDOUT
|
|
36
|
+
_CLIENT = "anthropic"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _extract_stdout(execution: Any) -> str:
|
|
40
|
+
for artifact in execution.artifacts:
|
|
41
|
+
if artifact.artifact_type is _STDOUT and artifact.content is not None:
|
|
42
|
+
try:
|
|
43
|
+
return artifact.content.decode("utf-8", errors="replace")
|
|
44
|
+
except Exception: # pragma: no cover
|
|
45
|
+
return ""
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _build_usage(execution: Any) -> dict:
|
|
50
|
+
if execution.token_usage is None:
|
|
51
|
+
return {"input_tokens": 0, "output_tokens": 0}
|
|
52
|
+
tu = execution.token_usage
|
|
53
|
+
return {
|
|
54
|
+
"input_tokens": tu.input_tokens or 0,
|
|
55
|
+
"output_tokens": tu.output_tokens or 0,
|
|
56
|
+
"cache_read_input_tokens": getattr(tu, "cache_read_tokens", None) or 0,
|
|
57
|
+
"cache_creation_input_tokens": getattr(tu, "cache_write_tokens", None) or 0,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@router.post(
|
|
62
|
+
"/v1/messages",
|
|
63
|
+
responses={
|
|
64
|
+
422: {"description": "Multi-turn request (only single-turn supported in 0.13.0)"},
|
|
65
|
+
502: {"description": "Upstream Anthropic call failed"},
|
|
66
|
+
503: {"description": "Anthropic adapter not available"},
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
async def proxy_messages(body: MessagesRequest, request: Request) -> MessagesResponse:
|
|
70
|
+
"""Cache-aware proxy for POST https://api.anthropic.com/v1/messages.
|
|
71
|
+
|
|
72
|
+
Only single-turn conversations (one user message) are supported in 0.13.0.
|
|
73
|
+
Multi-turn requests (messages with more than one entry) return HTTP 422.
|
|
74
|
+
"""
|
|
75
|
+
user_messages = [m for m in body.messages if m.role == "user"]
|
|
76
|
+
if len(user_messages) != 1 or len(body.messages) > 1:
|
|
77
|
+
raise HTTPException(
|
|
78
|
+
status_code=422,
|
|
79
|
+
detail=(
|
|
80
|
+
"The gateway currently supports single-turn requests only "
|
|
81
|
+
"(one role=user message, no prior assistant turns). "
|
|
82
|
+
"Multi-turn support is planned."
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
kind = resolve_execution_kind(_CLIENT)
|
|
88
|
+
except Exception as exc: # pragma: no cover
|
|
89
|
+
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
|
90
|
+
|
|
91
|
+
command = RunMlExecutionCommand(
|
|
92
|
+
execution_kind=kind,
|
|
93
|
+
client=_CLIENT,
|
|
94
|
+
model=body.model,
|
|
95
|
+
prompt=user_messages[0].content,
|
|
96
|
+
user_system_prompt=body.system,
|
|
97
|
+
session_id=body.session_id,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
wired = request.app.state.wired
|
|
101
|
+
loop = asyncio.get_event_loop()
|
|
102
|
+
execution = await loop.run_in_executor(None, wired.run_ml.execute, command)
|
|
103
|
+
|
|
104
|
+
if execution.execution_state is ExecutionState.FAILED:
|
|
105
|
+
raise HTTPException(
|
|
106
|
+
status_code=502,
|
|
107
|
+
detail="upstream Anthropic call failed",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
stdout = _extract_stdout(execution)
|
|
111
|
+
cache_hit = execution.execution_state is ExecutionState.SUCCESS and bool(stdout)
|
|
112
|
+
|
|
113
|
+
return MessagesResponse(
|
|
114
|
+
id=f"msg_{secrets.token_hex(12)}",
|
|
115
|
+
content=[ContentBlock(text=stdout)],
|
|
116
|
+
model=body.model,
|
|
117
|
+
usage=_build_usage(execution),
|
|
118
|
+
x_cache_hit=cache_hit,
|
|
119
|
+
)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Routes: /health, /ready, /info, /metrics."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Request, Response
|
|
10
|
+
from fastapi.responses import JSONResponse, PlainTextResponse
|
|
11
|
+
|
|
12
|
+
from generic_ml_cache_core.adapter.out.api.api_registry import registered_api_names
|
|
13
|
+
from generic_ml_cache_core.adapter.out.client.registry import registered_names
|
|
14
|
+
|
|
15
|
+
from generic_ml_cache_daemon import __version__
|
|
16
|
+
from generic_ml_cache_daemon.metrics import is_prometheus_available
|
|
17
|
+
from generic_ml_cache_daemon.models.health import HealthResponse, InfoResponse, ReadyResponse
|
|
18
|
+
|
|
19
|
+
router = APIRouter()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@router.get("/health")
|
|
23
|
+
def get_health() -> HealthResponse:
|
|
24
|
+
"""Liveness: confirm the daemon process is alive."""
|
|
25
|
+
return HealthResponse(status="ok")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@router.get("/ready", response_model=ReadyResponse)
|
|
29
|
+
def get_ready(request: Request) -> Response:
|
|
30
|
+
"""Readiness: confirm the store is accessible and the daemon can serve requests."""
|
|
31
|
+
wired = request.app.state.wired
|
|
32
|
+
try:
|
|
33
|
+
wired.metrics.event_counts()
|
|
34
|
+
return JSONResponse(content=ReadyResponse(status="ready").model_dump())
|
|
35
|
+
except Exception:
|
|
36
|
+
return JSONResponse(
|
|
37
|
+
status_code=503,
|
|
38
|
+
content=ReadyResponse(status="not ready", detail="store not accessible").model_dump(),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@router.get("/info", response_model=InfoResponse)
|
|
43
|
+
def get_info(request: Request) -> InfoResponse:
|
|
44
|
+
"""Return daemon version, store path, active adapters, and bound session."""
|
|
45
|
+
store_root: str = str(request.app.state.store_root)
|
|
46
|
+
session_id: str | None = request.app.state.session_id
|
|
47
|
+
all_adapter_names: List[str] = sorted(set(registered_names()) | set(registered_api_names()))
|
|
48
|
+
return InfoResponse(
|
|
49
|
+
version=__version__,
|
|
50
|
+
store_root=store_root,
|
|
51
|
+
session_id=session_id,
|
|
52
|
+
adapters=all_adapter_names,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@router.get("/metrics")
|
|
57
|
+
def get_metrics(request: Request) -> Response:
|
|
58
|
+
"""Prometheus metrics. Requires the [metrics] extra and enable_metrics=True."""
|
|
59
|
+
if not request.app.state.enable_metrics:
|
|
60
|
+
return JSONResponse(
|
|
61
|
+
status_code=503,
|
|
62
|
+
content={"detail": "metrics endpoint not enabled (start daemon with --metrics)"},
|
|
63
|
+
)
|
|
64
|
+
if not is_prometheus_available(): # pragma: no cover
|
|
65
|
+
return JSONResponse(
|
|
66
|
+
status_code=501,
|
|
67
|
+
content={"detail": "prometheus-client extra not installed"},
|
|
68
|
+
)
|
|
69
|
+
import prometheus_client # type: ignore[import-untyped]
|
|
70
|
+
|
|
71
|
+
metrics_output = prometheus_client.generate_latest()
|
|
72
|
+
return PlainTextResponse(
|
|
73
|
+
content=metrics_output.decode("utf-8"),
|
|
74
|
+
media_type=prometheus_client.CONTENT_TYPE_LATEST,
|
|
75
|
+
)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Routes: /jobs — submit detached background executions and stream/poll their status."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any, AsyncIterator, Dict, Optional
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
12
|
+
|
|
13
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
|
|
14
|
+
|
|
15
|
+
from generic_ml_cache_daemon.jobs import Job, JobState
|
|
16
|
+
from generic_ml_cache_daemon.models.job import JobResponse, JobSubmitBody
|
|
17
|
+
from generic_ml_cache_daemon.routes.run import _build_command, _extract_artifact
|
|
18
|
+
|
|
19
|
+
router = APIRouter(prefix="/jobs")
|
|
20
|
+
|
|
21
|
+
_STDOUT = ArtifactType.STDOUT
|
|
22
|
+
_STDERR = ArtifactType.STDERR
|
|
23
|
+
_SSE_POLL_INTERVAL = 0.1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _job_to_response(job: Job) -> JobResponse:
|
|
27
|
+
execution_key: Optional[str] = None
|
|
28
|
+
stdout: Optional[str] = None
|
|
29
|
+
stderr: Optional[str] = None
|
|
30
|
+
if job.execution is not None:
|
|
31
|
+
execution_key = job.execution.call_identity.generate_key()
|
|
32
|
+
stdout = _extract_artifact(job.execution, _STDOUT)
|
|
33
|
+
stderr = _extract_artifact(job.execution, _STDERR)
|
|
34
|
+
return JobResponse(
|
|
35
|
+
job_id=job.job_id,
|
|
36
|
+
state=job.state.value,
|
|
37
|
+
execution_key=execution_key,
|
|
38
|
+
stdout=stdout,
|
|
39
|
+
stderr=stderr,
|
|
40
|
+
error=job.error,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.post("", status_code=202)
|
|
45
|
+
def submit_job(body: JobSubmitBody, request: Request) -> JobResponse:
|
|
46
|
+
"""Submit an execution to run in the background. Returns immediately with
|
|
47
|
+
a job_id in 'pending' state."""
|
|
48
|
+
command = _build_command(body) # type: ignore[arg-type]
|
|
49
|
+
wired = request.app.state.wired
|
|
50
|
+
registry = request.app.state.job_registry
|
|
51
|
+
job = registry.submit(wired.run_ml.execute, command)
|
|
52
|
+
return _job_to_response(job)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@router.get("")
|
|
56
|
+
def list_jobs(request: Request) -> Dict[str, Any]:
|
|
57
|
+
"""Return all known job IDs."""
|
|
58
|
+
registry = request.app.state.job_registry
|
|
59
|
+
return {"job_ids": registry.list_ids()}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.get("/{job_id}", responses={404: {"description": "Job not found"}})
|
|
63
|
+
def get_job(job_id: str, request: Request) -> JobResponse:
|
|
64
|
+
"""Return the current status of a job."""
|
|
65
|
+
registry = request.app.state.job_registry
|
|
66
|
+
job = registry.get(job_id)
|
|
67
|
+
if job is None:
|
|
68
|
+
raise HTTPException(status_code=404, detail=f"job {job_id!r} not found")
|
|
69
|
+
return _job_to_response(job)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@router.get("/{job_id}/stream", responses={404: {"description": "Job not found"}})
|
|
73
|
+
async def stream_job(job_id: str, request: Request) -> Any:
|
|
74
|
+
"""SSE stream for a job. Emits a 'status' event every 100ms until the job
|
|
75
|
+
completes, then a final 'complete' or 'error' event."""
|
|
76
|
+
from sse_starlette.sse import EventSourceResponse
|
|
77
|
+
|
|
78
|
+
registry = request.app.state.job_registry
|
|
79
|
+
job = registry.get(job_id)
|
|
80
|
+
if job is None:
|
|
81
|
+
raise HTTPException(status_code=404, detail=f"job {job_id!r} not found")
|
|
82
|
+
|
|
83
|
+
async def generator() -> AsyncIterator[Dict[str, str]]:
|
|
84
|
+
while job.state not in (JobState.DONE, JobState.ERROR): # pragma: no cover
|
|
85
|
+
yield {"data": json.dumps({"type": "status", "state": job.state.value})}
|
|
86
|
+
await asyncio.sleep(_SSE_POLL_INTERVAL)
|
|
87
|
+
response = _job_to_response(job)
|
|
88
|
+
event_type = "complete" if job.state is JobState.DONE else "error"
|
|
89
|
+
yield {"data": json.dumps({"type": event_type, **response.model_dump()})}
|
|
90
|
+
|
|
91
|
+
return EventSourceResponse(generator())
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Route: POST /run — synchronous execution or SSE stream, content-negotiated."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any, AsyncIterator, Dict, Optional
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
12
|
+
from fastapi.responses import JSONResponse
|
|
13
|
+
from sse_starlette.sse import EventSourceResponse
|
|
14
|
+
|
|
15
|
+
from generic_ml_cache_core.adapter.inbound.composition import resolve_execution_kind
|
|
16
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
|
|
17
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_state import (
|
|
18
|
+
ExecutionState,
|
|
19
|
+
)
|
|
20
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
21
|
+
from generic_ml_cache_core.application.port.inbound.run_ml_execution_command import (
|
|
22
|
+
RunMlExecutionCommand,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from generic_ml_cache_daemon.models.run import RunBody, RunResponse
|
|
26
|
+
|
|
27
|
+
router = APIRouter()
|
|
28
|
+
|
|
29
|
+
_STDOUT = ArtifactType.STDOUT
|
|
30
|
+
_STDERR = ArtifactType.STDERR
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _build_command(body: RunBody) -> RunMlExecutionCommand:
|
|
34
|
+
try:
|
|
35
|
+
kind = resolve_execution_kind(body.client)
|
|
36
|
+
except Exception as exc:
|
|
37
|
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
|
38
|
+
return RunMlExecutionCommand(
|
|
39
|
+
execution_kind=kind,
|
|
40
|
+
client=body.client,
|
|
41
|
+
model=body.model,
|
|
42
|
+
effort=body.effort,
|
|
43
|
+
prompt=body.prompt,
|
|
44
|
+
context=body.context,
|
|
45
|
+
tags=body.tags,
|
|
46
|
+
session_id=body.session_id,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _extract_artifact(execution: MlExecution, artifact_type: ArtifactType) -> Optional[str]:
|
|
51
|
+
for artifact in execution.artifacts:
|
|
52
|
+
if artifact.artifact_type is artifact_type and artifact.content is not None:
|
|
53
|
+
try:
|
|
54
|
+
return artifact.content.decode("utf-8", errors="replace")
|
|
55
|
+
except Exception: # pragma: no cover
|
|
56
|
+
return None
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _was_cache_hit(execution: MlExecution) -> bool:
|
|
61
|
+
return execution.execution_state is ExecutionState.SUCCESS and any(
|
|
62
|
+
a.artifact_type is _STDOUT for a in execution.artifacts
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _to_response(execution: MlExecution, cache_hit: bool) -> RunResponse:
|
|
67
|
+
key = execution.call_identity.generate_key()
|
|
68
|
+
return RunResponse(
|
|
69
|
+
execution_key=key,
|
|
70
|
+
state=execution.execution_state.value,
|
|
71
|
+
cache_hit=cache_hit,
|
|
72
|
+
stdout=_extract_artifact(execution, _STDOUT),
|
|
73
|
+
stderr=_extract_artifact(execution, _STDERR),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _to_dict(response: RunResponse) -> Dict[str, Any]:
|
|
78
|
+
return response.model_dump()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def _run_in_thread(wired: Any, command: RunMlExecutionCommand) -> MlExecution:
|
|
82
|
+
loop = asyncio.get_event_loop()
|
|
83
|
+
return await loop.run_in_executor(None, wired.run_ml.execute, command)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def _sse_generator(
|
|
87
|
+
wired: Any, command: RunMlExecutionCommand
|
|
88
|
+
) -> AsyncIterator[Dict[str, str]]:
|
|
89
|
+
yield {"data": json.dumps({"type": "accepted"})}
|
|
90
|
+
execution = await _run_in_thread(wired, command)
|
|
91
|
+
hit = _was_cache_hit(execution)
|
|
92
|
+
response = _to_response(execution, hit)
|
|
93
|
+
yield {"data": json.dumps({"type": "complete", **_to_dict(response)})}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@router.post("/run", responses={400: {"description": "Unknown or unsupported client"}})
|
|
97
|
+
async def run(body: RunBody, request: Request) -> Any:
|
|
98
|
+
"""Execute an ML call synchronously (JSON) or as a server-sent event stream (SSE).
|
|
99
|
+
|
|
100
|
+
Content negotiation:
|
|
101
|
+
- ``Accept: text/event-stream`` → SSE: an ``accepted`` event followed by a
|
|
102
|
+
``complete`` event when the execution finishes.
|
|
103
|
+
- Any other ``Accept`` → JSON: blocks until the execution completes.
|
|
104
|
+
"""
|
|
105
|
+
command = _build_command(body)
|
|
106
|
+
wired = request.app.state.wired
|
|
107
|
+
|
|
108
|
+
if "text/event-stream" in request.headers.get("accept", ""):
|
|
109
|
+
return EventSourceResponse(_sse_generator(wired, command))
|
|
110
|
+
|
|
111
|
+
execution = await _run_in_thread(wired, command)
|
|
112
|
+
cache_hit = _was_cache_hit(execution)
|
|
113
|
+
response = _to_response(execution, cache_hit)
|
|
114
|
+
return JSONResponse(content=_to_dict(response))
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Routes: /sessions — CRUD, stats, spec, and tags."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import secrets
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
10
|
+
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.session.session_spec import SessionSpec
|
|
12
|
+
|
|
13
|
+
from generic_ml_cache_daemon.models.session import (
|
|
14
|
+
SessionCreateBody,
|
|
15
|
+
SessionListResponse,
|
|
16
|
+
SessionResponse,
|
|
17
|
+
SessionStatsResponse,
|
|
18
|
+
SpecBody,
|
|
19
|
+
TagBody,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
router = APIRouter(prefix="/sessions")
|
|
23
|
+
|
|
24
|
+
_HIT = "hit"
|
|
25
|
+
_MISS = "miss"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _spec_to_body(spec: SessionSpec | None) -> SpecBody | None:
|
|
29
|
+
if spec is None:
|
|
30
|
+
return None
|
|
31
|
+
return SpecBody(client=spec.client, model=spec.model, effort=spec.effort)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _session_response(metrics, session_id: str) -> SessionResponse:
|
|
35
|
+
return SessionResponse(
|
|
36
|
+
session_id=session_id,
|
|
37
|
+
tags=metrics.session_tags(session_id),
|
|
38
|
+
spec=_spec_to_body(metrics.session_spec(session_id)),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@router.get("")
|
|
43
|
+
def list_sessions(request: Request) -> SessionListResponse:
|
|
44
|
+
"""Return all known session IDs."""
|
|
45
|
+
metrics = request.app.state.wired.metrics
|
|
46
|
+
return SessionListResponse(session_ids=metrics.list_session_ids())
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@router.post("", status_code=201)
|
|
50
|
+
def create_session(body: SessionCreateBody, request: Request) -> SessionResponse:
|
|
51
|
+
"""Create a new session, optionally seeding it with tags and/or a spec."""
|
|
52
|
+
session_id = secrets.token_hex(8)
|
|
53
|
+
metrics = request.app.state.wired.metrics
|
|
54
|
+
for tag in body.tags:
|
|
55
|
+
metrics.add_session_tag(session_id, tag)
|
|
56
|
+
if body.spec is not None:
|
|
57
|
+
metrics.set_session_spec(
|
|
58
|
+
session_id,
|
|
59
|
+
SessionSpec(client=body.spec.client, model=body.spec.model, effort=body.spec.effort),
|
|
60
|
+
)
|
|
61
|
+
return _session_response(metrics, session_id)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@router.get("/{session_id}", responses={404: {"description": "Session not found"}})
|
|
65
|
+
def get_session(session_id: str, request: Request) -> SessionResponse:
|
|
66
|
+
"""Return tags and spec for a session."""
|
|
67
|
+
metrics = request.app.state.wired.metrics
|
|
68
|
+
tags = metrics.session_tags(session_id)
|
|
69
|
+
spec = metrics.session_spec(session_id)
|
|
70
|
+
if not tags and spec is None:
|
|
71
|
+
raise HTTPException(status_code=404, detail=f"session {session_id!r} not found")
|
|
72
|
+
return SessionResponse(session_id=session_id, tags=tags, spec=_spec_to_body(spec))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.get("/{session_id}/stats")
|
|
76
|
+
def get_session_stats(session_id: str, request: Request) -> SessionStatsResponse:
|
|
77
|
+
"""Return call/hit statistics for a session."""
|
|
78
|
+
metrics = request.app.state.wired.metrics
|
|
79
|
+
counts = metrics.session_event_counts(session_id)
|
|
80
|
+
hits = counts.get(_HIT, 0)
|
|
81
|
+
misses = counts.get(_MISS, 0)
|
|
82
|
+
calls = hits + misses
|
|
83
|
+
hit_rate = round(hits / calls, 4) if calls > 0 else 0.0
|
|
84
|
+
return SessionStatsResponse(
|
|
85
|
+
session_id=session_id,
|
|
86
|
+
tags=metrics.session_tags(session_id),
|
|
87
|
+
spec=_spec_to_body(metrics.session_spec(session_id)),
|
|
88
|
+
calls=calls,
|
|
89
|
+
hits=hits,
|
|
90
|
+
hit_rate=hit_rate,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@router.put("/{session_id}/spec", status_code=200)
|
|
95
|
+
def set_session_spec(session_id: str, body: SpecBody, request: Request) -> SessionResponse:
|
|
96
|
+
"""Attach or replace the execution spec for a session."""
|
|
97
|
+
metrics = request.app.state.wired.metrics
|
|
98
|
+
metrics.set_session_spec(
|
|
99
|
+
session_id,
|
|
100
|
+
SessionSpec(client=body.client, model=body.model, effort=body.effort),
|
|
101
|
+
)
|
|
102
|
+
return _session_response(metrics, session_id)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@router.delete("/{session_id}/spec", status_code=204)
|
|
106
|
+
def clear_session_spec(session_id: str, request: Request) -> None:
|
|
107
|
+
"""Remove the execution spec for a session (no-op if absent)."""
|
|
108
|
+
request.app.state.wired.metrics.clear_session_spec(session_id)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@router.post("/{session_id}/tags", status_code=201)
|
|
112
|
+
def add_session_tag(session_id: str, body: TagBody, request: Request) -> SessionResponse:
|
|
113
|
+
"""Add a tag to a session."""
|
|
114
|
+
metrics = request.app.state.wired.metrics
|
|
115
|
+
metrics.add_session_tag(session_id, body.tag)
|
|
116
|
+
return _session_response(metrics, session_id)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@router.delete("/{session_id}/tags/{tag}", status_code=204)
|
|
120
|
+
def remove_session_tag(session_id: str, tag: str, request: Request) -> None:
|
|
121
|
+
"""Remove a tag from a session (no-op if tag is absent)."""
|
|
122
|
+
request.app.state.wired.metrics.remove_session_tag(session_id, tag)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: generic-ml-cache-daemon
|
|
3
|
+
Version: 0.13.0
|
|
4
|
+
Summary: Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core.
|
|
5
|
+
Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
|
|
6
|
+
Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
|
|
7
|
+
Project-URL: Issues, https://github.com/danielslobozian/generic-ml-cache/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Daniel Slobozian
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
Keywords: ai,cache,daemon,fastapi,gateway,http,llm,proxy
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: fastapi>=0.115
|
|
25
|
+
Requires-Dist: generic-ml-cache-core>=0.12.0
|
|
26
|
+
Requires-Dist: sse-starlette>=2.0
|
|
27
|
+
Requires-Dist: uvicorn>=0.30
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: coverage>=7; extra == 'dev'
|
|
30
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
31
|
+
Requires-Dist: prometheus-client>=0.20; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.15; extra == 'dev'
|
|
35
|
+
Provides-Extra: metrics
|
|
36
|
+
Requires-Dist: prometheus-client>=0.20; extra == 'metrics'
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
generic_ml_cache_daemon/__init__.py,sha256=AXxNWYeI4Ago5Q0i6K_9XF7N7mhRGikxniniOL-olsE,109
|
|
2
|
+
generic_ml_cache_daemon/__main__.py,sha256=bK_4qHAvsSeI82iKWycVa7l_V66pfQ7UrxYrEWYZtII,819
|
|
3
|
+
generic_ml_cache_daemon/app.py,sha256=ao_O2G99wYqPXXH_y-x5HL3sDioORuzXDbM3iS6-Ess,2290
|
|
4
|
+
generic_ml_cache_daemon/jobs.py,sha256=PERCq9uk_aKsasUi4ulKY_CT7a2GujkMzKVPRLn7zRw,2504
|
|
5
|
+
generic_ml_cache_daemon/metrics.py,sha256=z5Go7NLznbZdMLIBr_ifTzUl6_qplQX8U5X0xRlabVk,507
|
|
6
|
+
generic_ml_cache_daemon/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
generic_ml_cache_daemon/models/__init__.py,sha256=Btv93JzW7tt7nDwSgwfnpysoCcAjHee0up8CJAOyKvQ,86
|
|
8
|
+
generic_ml_cache_daemon/models/execution.py,sha256=tQDhTwQadJ8T5ODJMF4xYZiMXRa9APlpBIhn44YzS44,1139
|
|
9
|
+
generic_ml_cache_daemon/models/gateway.py,sha256=hKxMZ5wK04CjtwNlnBQVhnNUqIgUG05BrwqQAw6fhrI,871
|
|
10
|
+
generic_ml_cache_daemon/models/health.py,sha256=abnyljTynlCkaTLJuY8UK01vzMLm7OVCmF5TYW93FLI,517
|
|
11
|
+
generic_ml_cache_daemon/models/job.py,sha256=LzTqr3TNImTEYcH4pQuQVrFjTJJMkV4QqPJ4CFbJcCA,659
|
|
12
|
+
generic_ml_cache_daemon/models/run.py,sha256=pq6tA9o7U8edFwdTsPiPUkFDuhcpK-qN6lF4iRObvuQ,575
|
|
13
|
+
generic_ml_cache_daemon/models/session.py,sha256=zrsuM5YTzYzbqJnUJOAUnBgSOkIumy430Erg-qS1n3M,799
|
|
14
|
+
generic_ml_cache_daemon/routes/__init__.py,sha256=Btv93JzW7tt7nDwSgwfnpysoCcAjHee0up8CJAOyKvQ,86
|
|
15
|
+
generic_ml_cache_daemon/routes/executions.py,sha256=Is8aK4lxyChdenzewJGowZI37c-vA1geK7TIpOc5DIw,3659
|
|
16
|
+
generic_ml_cache_daemon/routes/gateway.py,sha256=8b33uDkDsmKVF2VeqPkiml1FijkpqZ-FjaKHcr3kUto,4054
|
|
17
|
+
generic_ml_cache_daemon/routes/health.py,sha256=lnytyIPedyQig07k73YsMwqat2Z8Q-UdEECjld1T2Ck,2773
|
|
18
|
+
generic_ml_cache_daemon/routes/jobs.py,sha256=I_lSVDK9JxTS35o22W5czoIUXUes2KhV_cp3TG3Jf8g,3429
|
|
19
|
+
generic_ml_cache_daemon/routes/run.py,sha256=D2MdhGgBEanzhzgMUCtWyAyeDxw2sMzp9M6BnHT02Uw,4080
|
|
20
|
+
generic_ml_cache_daemon/routes/sessions.py,sha256=FfDi0TlPdQreY1ODzhzKqi4vqPdXwIyu1uRAV0Jlh7Q,4367
|
|
21
|
+
generic_ml_cache_daemon-0.13.0.dist-info/METADATA,sha256=--ru-jyhRlUpgENQJbkEwHwRYrjYGt_MTpxweBw8mQs,1720
|
|
22
|
+
generic_ml_cache_daemon-0.13.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
23
|
+
generic_ml_cache_daemon-0.13.0.dist-info/RECORD,,
|