generic-ml-cache-daemon 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ __version__ = "0.13.0"
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Entry point: run the daemon via ``python -m generic_ml_cache_daemon``."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+ import uvicorn
11
+
12
+ from generic_ml_cache_daemon.app import create_app
13
+
14
+ _DEFAULT_HOST = "127.0.0.1"
15
+ _DEFAULT_PORT = 8765
16
+
17
+
18
+ def main() -> None:
19
+ store_root = Path(os.environ.get("GMLCACHE_STORE", str(Path.home() / ".gmlcache")))
20
+ session_id = os.environ.get("GMLCACHE_SESSION") or None
21
+ enable_metrics = os.environ.get("GMLCACHE_METRICS", "").lower() in ("1", "true", "yes")
22
+ application = create_app(store_root, session_id=session_id, enable_metrics=enable_metrics)
23
+ uvicorn.run(application, host=_DEFAULT_HOST, port=_DEFAULT_PORT)
24
+
25
+
26
+ if __name__ == "__main__":
27
+ main()
@@ -0,0 +1,63 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """FastAPI application factory for the generic-ml-cache daemon."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from fastapi import FastAPI
11
+
12
+ from generic_ml_cache_core.adapter.inbound.composition import build_use_cases
13
+
14
+ from generic_ml_cache_daemon import __version__
15
+
16
+
17
+ def create_app(
18
+ store_root: Path,
19
+ *,
20
+ session_id: Optional[str] = None,
21
+ enable_metrics: bool = False,
22
+ ) -> FastAPI:
23
+ """Create and configure the daemon FastAPI application.
24
+
25
+ Args:
26
+ store_root: path to the gmlcache store directory (the injected data source).
27
+ session_id: optional session all intercepted calls are recorded under.
28
+ enable_metrics: expose the Prometheus /metrics endpoint.
29
+
30
+ Returns:
31
+ A fully wired FastAPI application. Routes are mounted by this function;
32
+ callers should not mount additional routes after construction.
33
+ """
34
+ application = FastAPI(
35
+ title="generic-ml-cache daemon",
36
+ version=__version__,
37
+ docs_url="/docs",
38
+ redoc_url="/redoc",
39
+ )
40
+
41
+ wired_use_cases = build_use_cases(store_root)
42
+ application.state.wired = wired_use_cases
43
+ application.state.store_root = store_root
44
+ application.state.session_id = session_id
45
+ application.state.enable_metrics = enable_metrics
46
+
47
+ from generic_ml_cache_daemon.jobs import JobRegistry
48
+ from generic_ml_cache_daemon.routes.executions import router as executions_router
49
+ from generic_ml_cache_daemon.routes.gateway import router as gateway_router
50
+ from generic_ml_cache_daemon.routes.health import router as health_router
51
+ from generic_ml_cache_daemon.routes.jobs import router as jobs_router
52
+ from generic_ml_cache_daemon.routes.run import router as run_router
53
+ from generic_ml_cache_daemon.routes.sessions import router as sessions_router
54
+
55
+ application.state.job_registry = JobRegistry()
56
+ application.include_router(health_router)
57
+ application.include_router(sessions_router)
58
+ application.include_router(executions_router)
59
+ application.include_router(run_router)
60
+ application.include_router(jobs_router)
61
+ application.include_router(gateway_router)
62
+
63
+ return application
@@ -0,0 +1,86 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """In-process job registry for detached background executions.
4
+
5
+ Each POST /jobs submission gets a unique job_id. The execution runs in a
6
+ background thread; callers poll GET /jobs/{id} or stream GET /jobs/{id}/stream.
7
+ The registry is in-process memory only — jobs are not persisted across restarts.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import concurrent.futures
13
+ import secrets
14
+ import threading
15
+ from enum import Enum
16
+ from typing import Dict, Optional
17
+
18
+ from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
19
+
20
+
21
+ class JobState(str, Enum):
22
+ PENDING = "pending"
23
+ RUNNING = "running"
24
+ DONE = "done"
25
+ ERROR = "error"
26
+
27
+
28
+ class Job:
29
+ def __init__(self, job_id: str) -> None:
30
+ self.job_id = job_id
31
+ self.state = JobState.PENDING
32
+ self.execution: Optional[MlExecution] = None
33
+ self.error: Optional[str] = None
34
+ self._done_event = threading.Event()
35
+
36
+ def wait(self, timeout: Optional[float] = None) -> bool:
37
+ return self._done_event.wait(timeout=timeout)
38
+
39
+ def mark_running(self) -> None:
40
+ self.state = JobState.RUNNING
41
+
42
+ def mark_done(self, execution: MlExecution) -> None:
43
+ self.execution = execution
44
+ self.state = JobState.DONE
45
+ self._done_event.set()
46
+
47
+ def mark_error(self, error: str) -> None:
48
+ self.error = error
49
+ self.state = JobState.ERROR
50
+ self._done_event.set()
51
+
52
+
53
+ class JobRegistry:
54
+ """Thread-safe in-memory registry of submitted jobs."""
55
+
56
+ def __init__(self) -> None:
57
+ self._jobs: Dict[str, Job] = {}
58
+ self._lock = threading.Lock()
59
+ self._executor = concurrent.futures.ThreadPoolExecutor(
60
+ max_workers=4, thread_name_prefix="gmlc-job"
61
+ )
62
+
63
+ def submit(self, fn, *args) -> Job:
64
+ job_id = secrets.token_hex(8)
65
+ job = Job(job_id)
66
+ with self._lock:
67
+ self._jobs[job_id] = job
68
+
69
+ def _run() -> None:
70
+ job.mark_running()
71
+ try:
72
+ execution = fn(*args)
73
+ job.mark_done(execution)
74
+ except Exception as exc:
75
+ job.mark_error(str(exc))
76
+
77
+ self._executor.submit(_run)
78
+ return job
79
+
80
+ def get(self, job_id: str) -> Optional[Job]:
81
+ with self._lock:
82
+ return self._jobs.get(job_id)
83
+
84
+ def list_ids(self) -> list:
85
+ with self._lock:
86
+ return list(self._jobs.keys())
@@ -0,0 +1,17 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Prometheus metrics setup for the daemon. Requires the optional [metrics] extra."""
4
+
5
+ from __future__ import annotations
6
+
7
+ try:
8
+ import prometheus_client # type: ignore[import-untyped] # noqa: F401
9
+
10
+ _AVAILABLE = True
11
+ except ImportError: # pragma: no cover
12
+ _AVAILABLE = False
13
+
14
+
15
+ def is_prometheus_available() -> bool:
16
+ """Return True when the prometheus-client extra is installed."""
17
+ return _AVAILABLE
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Executions HTTP API and global stats/purge."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, List, Literal, Union
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class ExecutionSummaryResponse(BaseModel):
13
+ execution_key: str
14
+ kind: str
15
+ client: str
16
+ model: str
17
+
18
+
19
+ class ExecutionListResponse(BaseModel):
20
+ executions: List[ExecutionSummaryResponse]
21
+ total: int
22
+
23
+
24
+ class GlobalStatsResponse(BaseModel):
25
+ executions: int
26
+ event_counts: Dict[str, int]
27
+
28
+
29
+ class PurgeByAll(BaseModel):
30
+ by: Literal["all"]
31
+
32
+
33
+ class PurgeByKey(BaseModel):
34
+ by: Literal["key"]
35
+ target: str
36
+
37
+
38
+ class PurgeByTag(BaseModel):
39
+ by: Literal["tag"]
40
+ target: str
41
+
42
+
43
+ class PurgeBySession(BaseModel):
44
+ by: Literal["session"]
45
+ target: str
46
+
47
+
48
+ class PurgeBySessionTag(BaseModel):
49
+ by: Literal["session_tag"]
50
+ target: str
51
+
52
+
53
+ PurgeBody = Union[PurgeByAll, PurgeByKey, PurgeByTag, PurgeBySession, PurgeBySessionTag]
54
+
55
+
56
+ class PurgeResponse(BaseModel):
57
+ executions_removed: int
58
+ bytes_freed: int
59
+ blobs_removed: int
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Claude gateway (/gateway/claude/v1/messages)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class MessageParam(BaseModel):
13
+ role: str
14
+ content: str
15
+
16
+
17
+ class MessagesRequest(BaseModel):
18
+ model: str
19
+ messages: List[MessageParam]
20
+ max_tokens: int = 8192
21
+ system: Optional[str] = None
22
+ session_id: Optional[str] = None
23
+
24
+
25
+ class ContentBlock(BaseModel):
26
+ type: str = "text"
27
+ text: str
28
+
29
+
30
+ class MessagesResponse(BaseModel):
31
+ id: str
32
+ type: str = "message"
33
+ role: str = "assistant"
34
+ content: List[ContentBlock]
35
+ model: str
36
+ stop_reason: str = "end_turn"
37
+ stop_sequence: Optional[str] = None
38
+ usage: Dict[str, Any]
39
+ x_cache_hit: bool = False
@@ -0,0 +1,25 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic response models for /health, /ready, and /info."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class HealthResponse(BaseModel):
13
+ status: str
14
+
15
+
16
+ class ReadyResponse(BaseModel):
17
+ status: str
18
+ detail: Optional[str] = None
19
+
20
+
21
+ class InfoResponse(BaseModel):
22
+ version: str
23
+ store_root: str
24
+ session_id: Optional[str] = None
25
+ adapters: List[str]
@@ -0,0 +1,28 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Jobs HTTP API (detached background executions)."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class JobSubmitBody(BaseModel):
13
+ client: str
14
+ model: str
15
+ effort: str = ""
16
+ prompt: str = ""
17
+ context: str = ""
18
+ tags: List[str] = []
19
+ session_id: Optional[str] = None
20
+
21
+
22
+ class JobResponse(BaseModel):
23
+ job_id: str
24
+ state: str
25
+ execution_key: Optional[str] = None
26
+ stdout: Optional[str] = None
27
+ stderr: Optional[str] = None
28
+ error: Optional[str] = None
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the /run endpoint."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class RunBody(BaseModel):
13
+ client: str
14
+ model: str
15
+ effort: str = ""
16
+ prompt: str = ""
17
+ context: str = ""
18
+ tags: List[str] = []
19
+ session_id: Optional[str] = None
20
+
21
+
22
+ class RunResponse(BaseModel):
23
+ execution_key: str
24
+ state: str
25
+ cache_hit: bool
26
+ stdout: Optional[str] = None
27
+ stderr: Optional[str] = None
@@ -0,0 +1,43 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Pydantic models for the Sessions HTTP API."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class SpecBody(BaseModel):
13
+ client: str
14
+ model: str
15
+ effort: str
16
+
17
+
18
+ class SessionCreateBody(BaseModel):
19
+ tags: List[str] = []
20
+ spec: Optional[SpecBody] = None
21
+
22
+
23
+ class SessionResponse(BaseModel):
24
+ session_id: str
25
+ tags: List[str]
26
+ spec: Optional[SpecBody] = None
27
+
28
+
29
+ class SessionStatsResponse(BaseModel):
30
+ session_id: str
31
+ tags: List[str]
32
+ spec: Optional[SpecBody] = None
33
+ calls: int
34
+ hits: int
35
+ hit_rate: float
36
+
37
+
38
+ class TagBody(BaseModel):
39
+ tag: str
40
+
41
+
42
+ class SessionListResponse(BaseModel):
43
+ session_ids: List[str]
File without changes
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,102 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Routes: /executions, /stats, /purge."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Annotated
8
+
9
+ from fastapi import APIRouter, Body, HTTPException, Request # noqa: F401
10
+
11
+ from generic_ml_cache_daemon.models.execution import (
12
+ ExecutionListResponse,
13
+ ExecutionSummaryResponse,
14
+ GlobalStatsResponse,
15
+ PurgeBody,
16
+ PurgeByAll,
17
+ PurgeByKey,
18
+ PurgeBySession,
19
+ PurgeBySessionTag,
20
+ PurgeByTag,
21
+ PurgeResponse,
22
+ )
23
+
24
+ router = APIRouter()
25
+
26
+
27
+ @router.get("/executions")
28
+ def list_executions(request: Request) -> ExecutionListResponse:
29
+ """Return all current (servable) executions."""
30
+ summaries = request.app.state.wired.repository.current_execution_summaries()
31
+ items = [
32
+ ExecutionSummaryResponse(
33
+ execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
34
+ )
35
+ for s in summaries
36
+ ]
37
+ return ExecutionListResponse(executions=items, total=len(items))
38
+
39
+
40
+ @router.get(
41
+ "/executions/{key}",
42
+ responses={
43
+ 404: {"description": "Execution not found"},
44
+ 409: {"description": "Ambiguous key prefix matches multiple executions"},
45
+ },
46
+ )
47
+ def get_execution(key: str, request: Request) -> ExecutionSummaryResponse:
48
+ """Return the execution whose key equals or starts with ``key``."""
49
+ summaries = request.app.state.wired.repository.current_execution_summaries()
50
+ # exact match first, then prefix
51
+ exact = [s for s in summaries if s.execution_key == key]
52
+ if exact:
53
+ s = exact[0]
54
+ return ExecutionSummaryResponse(
55
+ execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
56
+ )
57
+ prefix_matches = [s for s in summaries if s.execution_key.startswith(key)]
58
+ if not prefix_matches:
59
+ raise HTTPException(status_code=404, detail=f"execution {key!r} not found")
60
+ if len(prefix_matches) > 1:
61
+ raise HTTPException(
62
+ status_code=409,
63
+ detail=f"ambiguous key prefix {key!r} matches {len(prefix_matches)} executions",
64
+ )
65
+ s = prefix_matches[0]
66
+ return ExecutionSummaryResponse(
67
+ execution_key=s.execution_key, kind=s.kind, client=s.client, model=s.model
68
+ )
69
+
70
+
71
+ @router.get("/stats")
72
+ def get_stats(request: Request) -> GlobalStatsResponse:
73
+ """Return global store statistics."""
74
+ wired = request.app.state.wired
75
+ summaries = wired.repository.current_execution_summaries()
76
+ return GlobalStatsResponse(
77
+ executions=len(summaries),
78
+ event_counts=wired.metrics.event_counts(),
79
+ )
80
+
81
+
82
+ @router.post("/purge", responses={422: {"description": "Unsupported purge scope"}})
83
+ def purge(body: Annotated[PurgeBody, Body(discriminator="by")], request: Request) -> PurgeResponse:
84
+ """Purge (soft-delete) executions by scope."""
85
+ purge_service = request.app.state.wired.purge
86
+ if isinstance(body, PurgeByAll):
87
+ report = purge_service.purge_all()
88
+ elif isinstance(body, PurgeByKey):
89
+ report = purge_service.purge_one(body.target)
90
+ elif isinstance(body, PurgeByTag):
91
+ report = purge_service.purge_by_tag(body.target)
92
+ elif isinstance(body, PurgeBySession):
93
+ report = purge_service.purge_by_session(body.target)
94
+ elif isinstance(body, PurgeBySessionTag):
95
+ report = purge_service.purge_by_session_tag(body.target)
96
+ else: # pragma: no cover
97
+ raise HTTPException(status_code=422, detail="unsupported purge scope")
98
+ return PurgeResponse(
99
+ executions_removed=report.executions_removed,
100
+ bytes_freed=report.bytes_freed,
101
+ blobs_removed=report.blobs_removed,
102
+ )
@@ -0,0 +1,119 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Route: POST /gateway/claude/v1/messages — Anthropic Messages API caching proxy.
4
+
5
+ Scope for 0.13.0: single-user-turn conversations only (one role=user message in
6
+ the messages array). Multi-turn support requires thread-aware context handling and
7
+ is deferred to a future element.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import secrets
14
+ from typing import Any
15
+
16
+ from fastapi import APIRouter, HTTPException, Request
17
+
18
+ from generic_ml_cache_core.adapter.inbound.composition import resolve_execution_kind
19
+ from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
20
+ from generic_ml_cache_core.application.domain.model.execution.execution_state import (
21
+ ExecutionState,
22
+ )
23
+ from generic_ml_cache_core.application.port.inbound.run_ml_execution_command import (
24
+ RunMlExecutionCommand,
25
+ )
26
+
27
+ from generic_ml_cache_daemon.models.gateway import (
28
+ ContentBlock,
29
+ MessagesRequest,
30
+ MessagesResponse,
31
+ )
32
+
33
+ router = APIRouter(prefix="/gateway/claude")
34
+
35
+ _STDOUT = ArtifactType.STDOUT
36
+ _CLIENT = "anthropic"
37
+
38
+
39
+ def _extract_stdout(execution: Any) -> str:
40
+ for artifact in execution.artifacts:
41
+ if artifact.artifact_type is _STDOUT and artifact.content is not None:
42
+ try:
43
+ return artifact.content.decode("utf-8", errors="replace")
44
+ except Exception: # pragma: no cover
45
+ return ""
46
+ return ""
47
+
48
+
49
+ def _build_usage(execution: Any) -> dict:
50
+ if execution.token_usage is None:
51
+ return {"input_tokens": 0, "output_tokens": 0}
52
+ tu = execution.token_usage
53
+ return {
54
+ "input_tokens": tu.input_tokens or 0,
55
+ "output_tokens": tu.output_tokens or 0,
56
+ "cache_read_input_tokens": getattr(tu, "cache_read_tokens", None) or 0,
57
+ "cache_creation_input_tokens": getattr(tu, "cache_write_tokens", None) or 0,
58
+ }
59
+
60
+
61
+ @router.post(
62
+ "/v1/messages",
63
+ responses={
64
+ 422: {"description": "Multi-turn request (only single-turn supported in 0.13.0)"},
65
+ 502: {"description": "Upstream Anthropic call failed"},
66
+ 503: {"description": "Anthropic adapter not available"},
67
+ },
68
+ )
69
+ async def proxy_messages(body: MessagesRequest, request: Request) -> MessagesResponse:
70
+ """Cache-aware proxy for POST https://api.anthropic.com/v1/messages.
71
+
72
+ Only single-turn conversations (one user message) are supported in 0.13.0.
73
+ Multi-turn requests (messages with more than one entry) return HTTP 422.
74
+ """
75
+ user_messages = [m for m in body.messages if m.role == "user"]
76
+ if len(user_messages) != 1 or len(body.messages) > 1:
77
+ raise HTTPException(
78
+ status_code=422,
79
+ detail=(
80
+ "The gateway currently supports single-turn requests only "
81
+ "(one role=user message, no prior assistant turns). "
82
+ "Multi-turn support is planned."
83
+ ),
84
+ )
85
+
86
+ try:
87
+ kind = resolve_execution_kind(_CLIENT)
88
+ except Exception as exc: # pragma: no cover
89
+ raise HTTPException(status_code=503, detail=str(exc)) from exc
90
+
91
+ command = RunMlExecutionCommand(
92
+ execution_kind=kind,
93
+ client=_CLIENT,
94
+ model=body.model,
95
+ prompt=user_messages[0].content,
96
+ user_system_prompt=body.system,
97
+ session_id=body.session_id,
98
+ )
99
+
100
+ wired = request.app.state.wired
101
+ loop = asyncio.get_event_loop()
102
+ execution = await loop.run_in_executor(None, wired.run_ml.execute, command)
103
+
104
+ if execution.execution_state is ExecutionState.FAILED:
105
+ raise HTTPException(
106
+ status_code=502,
107
+ detail="upstream Anthropic call failed",
108
+ )
109
+
110
+ stdout = _extract_stdout(execution)
111
+ cache_hit = execution.execution_state is ExecutionState.SUCCESS and bool(stdout)
112
+
113
+ return MessagesResponse(
114
+ id=f"msg_{secrets.token_hex(12)}",
115
+ content=[ContentBlock(text=stdout)],
116
+ model=body.model,
117
+ usage=_build_usage(execution),
118
+ x_cache_hit=cache_hit,
119
+ )
@@ -0,0 +1,75 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Routes: /health, /ready, /info, /metrics."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import List
8
+
9
+ from fastapi import APIRouter, Request, Response
10
+ from fastapi.responses import JSONResponse, PlainTextResponse
11
+
12
+ from generic_ml_cache_core.adapter.out.api.api_registry import registered_api_names
13
+ from generic_ml_cache_core.adapter.out.client.registry import registered_names
14
+
15
+ from generic_ml_cache_daemon import __version__
16
+ from generic_ml_cache_daemon.metrics import is_prometheus_available
17
+ from generic_ml_cache_daemon.models.health import HealthResponse, InfoResponse, ReadyResponse
18
+
19
+ router = APIRouter()
20
+
21
+
22
+ @router.get("/health")
23
+ def get_health() -> HealthResponse:
24
+ """Liveness: confirm the daemon process is alive."""
25
+ return HealthResponse(status="ok")
26
+
27
+
28
+ @router.get("/ready", response_model=ReadyResponse)
29
+ def get_ready(request: Request) -> Response:
30
+ """Readiness: confirm the store is accessible and the daemon can serve requests."""
31
+ wired = request.app.state.wired
32
+ try:
33
+ wired.metrics.event_counts()
34
+ return JSONResponse(content=ReadyResponse(status="ready").model_dump())
35
+ except Exception:
36
+ return JSONResponse(
37
+ status_code=503,
38
+ content=ReadyResponse(status="not ready", detail="store not accessible").model_dump(),
39
+ )
40
+
41
+
42
+ @router.get("/info", response_model=InfoResponse)
43
+ def get_info(request: Request) -> InfoResponse:
44
+ """Return daemon version, store path, active adapters, and bound session."""
45
+ store_root: str = str(request.app.state.store_root)
46
+ session_id: str | None = request.app.state.session_id
47
+ all_adapter_names: List[str] = sorted(set(registered_names()) | set(registered_api_names()))
48
+ return InfoResponse(
49
+ version=__version__,
50
+ store_root=store_root,
51
+ session_id=session_id,
52
+ adapters=all_adapter_names,
53
+ )
54
+
55
+
56
+ @router.get("/metrics")
57
+ def get_metrics(request: Request) -> Response:
58
+ """Prometheus metrics. Requires the [metrics] extra and enable_metrics=True."""
59
+ if not request.app.state.enable_metrics:
60
+ return JSONResponse(
61
+ status_code=503,
62
+ content={"detail": "metrics endpoint not enabled (start daemon with --metrics)"},
63
+ )
64
+ if not is_prometheus_available(): # pragma: no cover
65
+ return JSONResponse(
66
+ status_code=501,
67
+ content={"detail": "prometheus-client extra not installed"},
68
+ )
69
+ import prometheus_client # type: ignore[import-untyped]
70
+
71
+ metrics_output = prometheus_client.generate_latest()
72
+ return PlainTextResponse(
73
+ content=metrics_output.decode("utf-8"),
74
+ media_type=prometheus_client.CONTENT_TYPE_LATEST,
75
+ )
@@ -0,0 +1,91 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Routes: /jobs — submit detached background executions and stream/poll their status."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import json
9
+ from typing import Any, AsyncIterator, Dict, Optional
10
+
11
+ from fastapi import APIRouter, HTTPException, Request
12
+
13
+ from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
14
+
15
+ from generic_ml_cache_daemon.jobs import Job, JobState
16
+ from generic_ml_cache_daemon.models.job import JobResponse, JobSubmitBody
17
+ from generic_ml_cache_daemon.routes.run import _build_command, _extract_artifact
18
+
19
+ router = APIRouter(prefix="/jobs")
20
+
21
+ _STDOUT = ArtifactType.STDOUT
22
+ _STDERR = ArtifactType.STDERR
23
+ _SSE_POLL_INTERVAL = 0.1
24
+
25
+
26
+ def _job_to_response(job: Job) -> JobResponse:
27
+ execution_key: Optional[str] = None
28
+ stdout: Optional[str] = None
29
+ stderr: Optional[str] = None
30
+ if job.execution is not None:
31
+ execution_key = job.execution.call_identity.generate_key()
32
+ stdout = _extract_artifact(job.execution, _STDOUT)
33
+ stderr = _extract_artifact(job.execution, _STDERR)
34
+ return JobResponse(
35
+ job_id=job.job_id,
36
+ state=job.state.value,
37
+ execution_key=execution_key,
38
+ stdout=stdout,
39
+ stderr=stderr,
40
+ error=job.error,
41
+ )
42
+
43
+
44
+ @router.post("", status_code=202)
45
+ def submit_job(body: JobSubmitBody, request: Request) -> JobResponse:
46
+ """Submit an execution to run in the background. Returns immediately with
47
+ a job_id in 'pending' state."""
48
+ command = _build_command(body) # type: ignore[arg-type]
49
+ wired = request.app.state.wired
50
+ registry = request.app.state.job_registry
51
+ job = registry.submit(wired.run_ml.execute, command)
52
+ return _job_to_response(job)
53
+
54
+
55
+ @router.get("")
56
+ def list_jobs(request: Request) -> Dict[str, Any]:
57
+ """Return all known job IDs."""
58
+ registry = request.app.state.job_registry
59
+ return {"job_ids": registry.list_ids()}
60
+
61
+
62
+ @router.get("/{job_id}", responses={404: {"description": "Job not found"}})
63
+ def get_job(job_id: str, request: Request) -> JobResponse:
64
+ """Return the current status of a job."""
65
+ registry = request.app.state.job_registry
66
+ job = registry.get(job_id)
67
+ if job is None:
68
+ raise HTTPException(status_code=404, detail=f"job {job_id!r} not found")
69
+ return _job_to_response(job)
70
+
71
+
72
+ @router.get("/{job_id}/stream", responses={404: {"description": "Job not found"}})
73
+ async def stream_job(job_id: str, request: Request) -> Any:
74
+ """SSE stream for a job. Emits a 'status' event every 100ms until the job
75
+ completes, then a final 'complete' or 'error' event."""
76
+ from sse_starlette.sse import EventSourceResponse
77
+
78
+ registry = request.app.state.job_registry
79
+ job = registry.get(job_id)
80
+ if job is None:
81
+ raise HTTPException(status_code=404, detail=f"job {job_id!r} not found")
82
+
83
+ async def generator() -> AsyncIterator[Dict[str, str]]:
84
+ while job.state not in (JobState.DONE, JobState.ERROR): # pragma: no cover
85
+ yield {"data": json.dumps({"type": "status", "state": job.state.value})}
86
+ await asyncio.sleep(_SSE_POLL_INTERVAL)
87
+ response = _job_to_response(job)
88
+ event_type = "complete" if job.state is JobState.DONE else "error"
89
+ yield {"data": json.dumps({"type": event_type, **response.model_dump()})}
90
+
91
+ return EventSourceResponse(generator())
@@ -0,0 +1,114 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Route: POST /run — synchronous execution or SSE stream, content-negotiated."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import json
9
+ from typing import Any, AsyncIterator, Dict, Optional
10
+
11
+ from fastapi import APIRouter, HTTPException, Request
12
+ from fastapi.responses import JSONResponse
13
+ from sse_starlette.sse import EventSourceResponse
14
+
15
+ from generic_ml_cache_core.adapter.inbound.composition import resolve_execution_kind
16
+ from generic_ml_cache_core.application.domain.model.execution.artifact import ArtifactType
17
+ from generic_ml_cache_core.application.domain.model.execution.execution_state import (
18
+ ExecutionState,
19
+ )
20
+ from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
21
+ from generic_ml_cache_core.application.port.inbound.run_ml_execution_command import (
22
+ RunMlExecutionCommand,
23
+ )
24
+
25
+ from generic_ml_cache_daemon.models.run import RunBody, RunResponse
26
+
27
+ router = APIRouter()
28
+
29
+ _STDOUT = ArtifactType.STDOUT
30
+ _STDERR = ArtifactType.STDERR
31
+
32
+
33
+ def _build_command(body: RunBody) -> RunMlExecutionCommand:
34
+ try:
35
+ kind = resolve_execution_kind(body.client)
36
+ except Exception as exc:
37
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
38
+ return RunMlExecutionCommand(
39
+ execution_kind=kind,
40
+ client=body.client,
41
+ model=body.model,
42
+ effort=body.effort,
43
+ prompt=body.prompt,
44
+ context=body.context,
45
+ tags=body.tags,
46
+ session_id=body.session_id,
47
+ )
48
+
49
+
50
+ def _extract_artifact(execution: MlExecution, artifact_type: ArtifactType) -> Optional[str]:
51
+ for artifact in execution.artifacts:
52
+ if artifact.artifact_type is artifact_type and artifact.content is not None:
53
+ try:
54
+ return artifact.content.decode("utf-8", errors="replace")
55
+ except Exception: # pragma: no cover
56
+ return None
57
+ return None
58
+
59
+
60
+ def _was_cache_hit(execution: MlExecution) -> bool:
61
+ return execution.execution_state is ExecutionState.SUCCESS and any(
62
+ a.artifact_type is _STDOUT for a in execution.artifacts
63
+ )
64
+
65
+
66
+ def _to_response(execution: MlExecution, cache_hit: bool) -> RunResponse:
67
+ key = execution.call_identity.generate_key()
68
+ return RunResponse(
69
+ execution_key=key,
70
+ state=execution.execution_state.value,
71
+ cache_hit=cache_hit,
72
+ stdout=_extract_artifact(execution, _STDOUT),
73
+ stderr=_extract_artifact(execution, _STDERR),
74
+ )
75
+
76
+
77
+ def _to_dict(response: RunResponse) -> Dict[str, Any]:
78
+ return response.model_dump()
79
+
80
+
81
+ async def _run_in_thread(wired: Any, command: RunMlExecutionCommand) -> MlExecution:
82
+ loop = asyncio.get_event_loop()
83
+ return await loop.run_in_executor(None, wired.run_ml.execute, command)
84
+
85
+
86
+ async def _sse_generator(
87
+ wired: Any, command: RunMlExecutionCommand
88
+ ) -> AsyncIterator[Dict[str, str]]:
89
+ yield {"data": json.dumps({"type": "accepted"})}
90
+ execution = await _run_in_thread(wired, command)
91
+ hit = _was_cache_hit(execution)
92
+ response = _to_response(execution, hit)
93
+ yield {"data": json.dumps({"type": "complete", **_to_dict(response)})}
94
+
95
+
96
+ @router.post("/run", responses={400: {"description": "Unknown or unsupported client"}})
97
+ async def run(body: RunBody, request: Request) -> Any:
98
+ """Execute an ML call synchronously (JSON) or as a server-sent event stream (SSE).
99
+
100
+ Content negotiation:
101
+ - ``Accept: text/event-stream`` → SSE: an ``accepted`` event followed by a
102
+ ``complete`` event when the execution finishes.
103
+ - Any other ``Accept`` → JSON: blocks until the execution completes.
104
+ """
105
+ command = _build_command(body)
106
+ wired = request.app.state.wired
107
+
108
+ if "text/event-stream" in request.headers.get("accept", ""):
109
+ return EventSourceResponse(_sse_generator(wired, command))
110
+
111
+ execution = await _run_in_thread(wired, command)
112
+ cache_hit = _was_cache_hit(execution)
113
+ response = _to_response(execution, cache_hit)
114
+ return JSONResponse(content=_to_dict(response))
@@ -0,0 +1,122 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Routes: /sessions — CRUD, stats, spec, and tags."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import secrets
8
+
9
+ from fastapi import APIRouter, HTTPException, Request
10
+
11
+ from generic_ml_cache_core.application.domain.model.session.session_spec import SessionSpec
12
+
13
+ from generic_ml_cache_daemon.models.session import (
14
+ SessionCreateBody,
15
+ SessionListResponse,
16
+ SessionResponse,
17
+ SessionStatsResponse,
18
+ SpecBody,
19
+ TagBody,
20
+ )
21
+
22
+ router = APIRouter(prefix="/sessions")
23
+
24
+ _HIT = "hit"
25
+ _MISS = "miss"
26
+
27
+
28
+ def _spec_to_body(spec: SessionSpec | None) -> SpecBody | None:
29
+ if spec is None:
30
+ return None
31
+ return SpecBody(client=spec.client, model=spec.model, effort=spec.effort)
32
+
33
+
34
+ def _session_response(metrics, session_id: str) -> SessionResponse:
35
+ return SessionResponse(
36
+ session_id=session_id,
37
+ tags=metrics.session_tags(session_id),
38
+ spec=_spec_to_body(metrics.session_spec(session_id)),
39
+ )
40
+
41
+
42
+ @router.get("")
43
+ def list_sessions(request: Request) -> SessionListResponse:
44
+ """Return all known session IDs."""
45
+ metrics = request.app.state.wired.metrics
46
+ return SessionListResponse(session_ids=metrics.list_session_ids())
47
+
48
+
49
+ @router.post("", status_code=201)
50
+ def create_session(body: SessionCreateBody, request: Request) -> SessionResponse:
51
+ """Create a new session, optionally seeding it with tags and/or a spec."""
52
+ session_id = secrets.token_hex(8)
53
+ metrics = request.app.state.wired.metrics
54
+ for tag in body.tags:
55
+ metrics.add_session_tag(session_id, tag)
56
+ if body.spec is not None:
57
+ metrics.set_session_spec(
58
+ session_id,
59
+ SessionSpec(client=body.spec.client, model=body.spec.model, effort=body.spec.effort),
60
+ )
61
+ return _session_response(metrics, session_id)
62
+
63
+
64
+ @router.get("/{session_id}", responses={404: {"description": "Session not found"}})
65
+ def get_session(session_id: str, request: Request) -> SessionResponse:
66
+ """Return tags and spec for a session."""
67
+ metrics = request.app.state.wired.metrics
68
+ tags = metrics.session_tags(session_id)
69
+ spec = metrics.session_spec(session_id)
70
+ if not tags and spec is None:
71
+ raise HTTPException(status_code=404, detail=f"session {session_id!r} not found")
72
+ return SessionResponse(session_id=session_id, tags=tags, spec=_spec_to_body(spec))
73
+
74
+
75
+ @router.get("/{session_id}/stats")
76
+ def get_session_stats(session_id: str, request: Request) -> SessionStatsResponse:
77
+ """Return call/hit statistics for a session."""
78
+ metrics = request.app.state.wired.metrics
79
+ counts = metrics.session_event_counts(session_id)
80
+ hits = counts.get(_HIT, 0)
81
+ misses = counts.get(_MISS, 0)
82
+ calls = hits + misses
83
+ hit_rate = round(hits / calls, 4) if calls > 0 else 0.0
84
+ return SessionStatsResponse(
85
+ session_id=session_id,
86
+ tags=metrics.session_tags(session_id),
87
+ spec=_spec_to_body(metrics.session_spec(session_id)),
88
+ calls=calls,
89
+ hits=hits,
90
+ hit_rate=hit_rate,
91
+ )
92
+
93
+
94
+ @router.put("/{session_id}/spec", status_code=200)
95
+ def set_session_spec(session_id: str, body: SpecBody, request: Request) -> SessionResponse:
96
+ """Attach or replace the execution spec for a session."""
97
+ metrics = request.app.state.wired.metrics
98
+ metrics.set_session_spec(
99
+ session_id,
100
+ SessionSpec(client=body.client, model=body.model, effort=body.effort),
101
+ )
102
+ return _session_response(metrics, session_id)
103
+
104
+
105
+ @router.delete("/{session_id}/spec", status_code=204)
106
+ def clear_session_spec(session_id: str, request: Request) -> None:
107
+ """Remove the execution spec for a session (no-op if absent)."""
108
+ request.app.state.wired.metrics.clear_session_spec(session_id)
109
+
110
+
111
+ @router.post("/{session_id}/tags", status_code=201)
112
+ def add_session_tag(session_id: str, body: TagBody, request: Request) -> SessionResponse:
113
+ """Add a tag to a session."""
114
+ metrics = request.app.state.wired.metrics
115
+ metrics.add_session_tag(session_id, body.tag)
116
+ return _session_response(metrics, session_id)
117
+
118
+
119
+ @router.delete("/{session_id}/tags/{tag}", status_code=204)
120
+ def remove_session_tag(session_id: str, tag: str, request: Request) -> None:
121
+ """Remove a tag from a session (no-op if tag is absent)."""
122
+ request.app.state.wired.metrics.remove_session_tag(session_id, tag)
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: generic-ml-cache-daemon
3
+ Version: 0.13.0
4
+ Summary: Local HTTP daemon for generic-ml-cache: REST API, gateway proxy, and session transport. A thin inbound driver over generic-ml-cache-core.
5
+ Project-URL: Homepage, https://github.com/danielslobozian/generic-ml-cache
6
+ Project-URL: Repository, https://github.com/danielslobozian/generic-ml-cache
7
+ Project-URL: Issues, https://github.com/danielslobozian/generic-ml-cache/issues
8
+ Project-URL: Changelog, https://github.com/danielslobozian/generic-ml-cache/blob/main/CHANGELOG.md
9
+ Author: Daniel Slobozian
10
+ License-Expression: Apache-2.0
11
+ Keywords: ai,cache,daemon,fastapi,gateway,http,llm,proxy
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: fastapi>=0.115
25
+ Requires-Dist: generic-ml-cache-core>=0.12.0
26
+ Requires-Dist: sse-starlette>=2.0
27
+ Requires-Dist: uvicorn>=0.30
28
+ Provides-Extra: dev
29
+ Requires-Dist: coverage>=7; extra == 'dev'
30
+ Requires-Dist: httpx>=0.27; extra == 'dev'
31
+ Requires-Dist: prometheus-client>=0.20; extra == 'dev'
32
+ Requires-Dist: pytest-cov; extra == 'dev'
33
+ Requires-Dist: pytest>=7; extra == 'dev'
34
+ Requires-Dist: ruff>=0.15; extra == 'dev'
35
+ Provides-Extra: metrics
36
+ Requires-Dist: prometheus-client>=0.20; extra == 'metrics'
@@ -0,0 +1,23 @@
1
+ generic_ml_cache_daemon/__init__.py,sha256=AXxNWYeI4Ago5Q0i6K_9XF7N7mhRGikxniniOL-olsE,109
2
+ generic_ml_cache_daemon/__main__.py,sha256=bK_4qHAvsSeI82iKWycVa7l_V66pfQ7UrxYrEWYZtII,819
3
+ generic_ml_cache_daemon/app.py,sha256=ao_O2G99wYqPXXH_y-x5HL3sDioORuzXDbM3iS6-Ess,2290
4
+ generic_ml_cache_daemon/jobs.py,sha256=PERCq9uk_aKsasUi4ulKY_CT7a2GujkMzKVPRLn7zRw,2504
5
+ generic_ml_cache_daemon/metrics.py,sha256=z5Go7NLznbZdMLIBr_ifTzUl6_qplQX8U5X0xRlabVk,507
6
+ generic_ml_cache_daemon/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ generic_ml_cache_daemon/models/__init__.py,sha256=Btv93JzW7tt7nDwSgwfnpysoCcAjHee0up8CJAOyKvQ,86
8
+ generic_ml_cache_daemon/models/execution.py,sha256=tQDhTwQadJ8T5ODJMF4xYZiMXRa9APlpBIhn44YzS44,1139
9
+ generic_ml_cache_daemon/models/gateway.py,sha256=hKxMZ5wK04CjtwNlnBQVhnNUqIgUG05BrwqQAw6fhrI,871
10
+ generic_ml_cache_daemon/models/health.py,sha256=abnyljTynlCkaTLJuY8UK01vzMLm7OVCmF5TYW93FLI,517
11
+ generic_ml_cache_daemon/models/job.py,sha256=LzTqr3TNImTEYcH4pQuQVrFjTJJMkV4QqPJ4CFbJcCA,659
12
+ generic_ml_cache_daemon/models/run.py,sha256=pq6tA9o7U8edFwdTsPiPUkFDuhcpK-qN6lF4iRObvuQ,575
13
+ generic_ml_cache_daemon/models/session.py,sha256=zrsuM5YTzYzbqJnUJOAUnBgSOkIumy430Erg-qS1n3M,799
14
+ generic_ml_cache_daemon/routes/__init__.py,sha256=Btv93JzW7tt7nDwSgwfnpysoCcAjHee0up8CJAOyKvQ,86
15
+ generic_ml_cache_daemon/routes/executions.py,sha256=Is8aK4lxyChdenzewJGowZI37c-vA1geK7TIpOc5DIw,3659
16
+ generic_ml_cache_daemon/routes/gateway.py,sha256=8b33uDkDsmKVF2VeqPkiml1FijkpqZ-FjaKHcr3kUto,4054
17
+ generic_ml_cache_daemon/routes/health.py,sha256=lnytyIPedyQig07k73YsMwqat2Z8Q-UdEECjld1T2Ck,2773
18
+ generic_ml_cache_daemon/routes/jobs.py,sha256=I_lSVDK9JxTS35o22W5czoIUXUes2KhV_cp3TG3Jf8g,3429
19
+ generic_ml_cache_daemon/routes/run.py,sha256=D2MdhGgBEanzhzgMUCtWyAyeDxw2sMzp9M6BnHT02Uw,4080
20
+ generic_ml_cache_daemon/routes/sessions.py,sha256=FfDi0TlPdQreY1ODzhzKqi4vqPdXwIyu1uRAV0Jlh7Q,4367
21
+ generic_ml_cache_daemon-0.13.0.dist-info/METADATA,sha256=--ru-jyhRlUpgENQJbkEwHwRYrjYGt_MTpxweBw8mQs,1720
22
+ generic_ml_cache_daemon-0.13.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
23
+ generic_ml_cache_daemon-0.13.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any