generic-ml-cache-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_core/__init__.py +64 -0
- generic_ml_cache_core/adapter/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/composition.py +96 -0
- generic_ml_cache_core/adapter/out/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/stub_api_client_adapter.py +30 -0
- generic_ml_cache_core/adapter/out/client/__init__.py +28 -0
- generic_ml_cache_core/adapter/out/client/claude.py +214 -0
- generic_ml_cache_core/adapter/out/client/codex.py +171 -0
- generic_ml_cache_core/adapter/out/client/cursor.py +208 -0
- generic_ml_cache_core/adapter/out/client/discover.py +121 -0
- generic_ml_cache_core/adapter/out/client/isolation.py +396 -0
- generic_ml_cache_core/adapter/out/client/local_client_runner.py +54 -0
- generic_ml_cache_core/adapter/out/client/passthrough_client_runner.py +47 -0
- generic_ml_cache_core/adapter/out/client/prime_directive.py +53 -0
- generic_ml_cache_core/adapter/out/client/registry.py +34 -0
- generic_ml_cache_core/adapter/out/clock/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/clock/system_clock.py +16 -0
- generic_ml_cache_core/adapter/out/fingerprint/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/fingerprint/filesystem_file_fingerprint.py +30 -0
- generic_ml_cache_core/adapter/out/metrics/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/metrics/access_registry.py +147 -0
- generic_ml_cache_core/adapter/out/metrics/journal_metrics.py +45 -0
- generic_ml_cache_core/adapter/out/persistence/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/persistence/call_identity_serialization.py +100 -0
- generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py +69 -0
- generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py +398 -0
- generic_ml_cache_core/adapter/out/storage/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/storage/filesystem_blob_store.py +47 -0
- generic_ml_cache_core/application/__init__.py +1 -0
- generic_ml_cache_core/application/domain/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/client_status.py +17 -0
- generic_ml_cache_core/application/domain/model/execution/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/execution/artifact.py +78 -0
- generic_ml_cache_core/application/domain/model/execution/execution_failure.py +32 -0
- generic_ml_cache_core/application/domain/model/execution/execution_kind.py +26 -0
- generic_ml_cache_core/application/domain/model/execution/execution_state.py +21 -0
- generic_ml_cache_core/application/domain/model/execution/ml_execution.py +41 -0
- generic_ml_cache_core/application/domain/model/identity/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/identity/api_call_identity.py +36 -0
- generic_ml_cache_core/application/domain/model/identity/call_identity.py +25 -0
- generic_ml_cache_core/application/domain/model/identity/managed_call_identity.py +54 -0
- generic_ml_cache_core/application/domain/model/identity/passthrough_call_identity.py +35 -0
- generic_ml_cache_core/application/domain/model/model_info.py +20 -0
- generic_ml_cache_core/application/domain/model/model_listing.py +29 -0
- generic_ml_cache_core/application/domain/model/parsed_output.py +23 -0
- generic_ml_cache_core/application/domain/model/probe/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/probe/probe_report.py +26 -0
- generic_ml_cache_core/application/domain/model/probe/probe_status.py +13 -0
- generic_ml_cache_core/application/domain/model/run/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/run/cache_mode.py +21 -0
- generic_ml_cache_core/application/domain/model/run/client_run_request.py +35 -0
- generic_ml_cache_core/application/domain/model/run/client_run_result.py +65 -0
- generic_ml_cache_core/application/domain/model/run/message.py +20 -0
- generic_ml_cache_core/application/domain/model/usage/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/usage/token_usage.py +53 -0
- generic_ml_cache_core/application/domain/model/usage/usage.py +108 -0
- generic_ml_cache_core/application/domain/service/__init__.py +1 -0
- generic_ml_cache_core/application/domain/service/cacheability.py +19 -0
- generic_ml_cache_core/application/domain/service/message_fingerprinting.py +25 -0
- generic_ml_cache_core/application/port/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/probe_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/probe_use_case.py +19 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_command.py +40 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py +48 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_use_case.py +25 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/out/__init__.py +1 -0
- generic_ml_cache_core/application/port/out/api_client_port.py +26 -0
- generic_ml_cache_core/application/port/out/base.py +272 -0
- generic_ml_cache_core/application/port/out/blob_store_port.py +37 -0
- generic_ml_cache_core/application/port/out/client_runner_port.py +26 -0
- generic_ml_cache_core/application/port/out/clock_port.py +22 -0
- generic_ml_cache_core/application/port/out/execution_repository_port.py +40 -0
- generic_ml_cache_core/application/port/out/file_fingerprint_port.py +25 -0
- generic_ml_cache_core/application/port/out/metrics_port.py +54 -0
- generic_ml_cache_core/application/port/out/passthrough_runner_port.py +25 -0
- generic_ml_cache_core/application/usecase/__init__.py +1 -0
- generic_ml_cache_core/application/usecase/cached_ml_execution_service.py +198 -0
- generic_ml_cache_core/application/usecase/call_identity_building.py +60 -0
- generic_ml_cache_core/application/usecase/journal_events.py +19 -0
- generic_ml_cache_core/application/usecase/probe_service.py +44 -0
- generic_ml_cache_core/application/usecase/run_api_execution_service.py +69 -0
- generic_ml_cache_core/application/usecase/run_managed_local_execution_service.py +84 -0
- generic_ml_cache_core/application/usecase/run_passthrough_execution_service.py +67 -0
- generic_ml_cache_core/common/__init__.py +1 -0
- generic_ml_cache_core/common/checksum.py +82 -0
- generic_ml_cache_core/common/errors.py +76 -0
- generic_ml_cache_core/stream.py +65 -0
- generic_ml_cache_core-0.2.0.dist-info/METADATA +104 -0
- generic_ml_cache_core-0.2.0.dist-info/RECORD +99 -0
- generic_ml_cache_core-0.2.0.dist-info/WHEEL +4 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/LICENSE +201 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/NOTICE +8 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ExecutionRepositoryPort."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExecutionRepositoryPort(ABC):
|
|
14
|
+
"""Outbound port for the structured execution record — the "database".
|
|
15
|
+
|
|
16
|
+
It stores and returns *dehydrated* executions (structure + artifact
|
|
17
|
+
references, no bytes); the use case hydrates output from the blob store.
|
|
18
|
+
Executions are append-only: a call identity (one key) accumulates many
|
|
19
|
+
executions over time, each one a real client call.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def find_current(self, execution_key: str) -> Optional[MlExecution]:
|
|
24
|
+
"""Return the current cached answer for ``execution_key`` — the success
|
|
25
|
+
that is still authoritative (state SUCCESS, not superseded, output
|
|
26
|
+
persisted) — or None if there is no servable execution."""
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def find_all(self, execution_key: str) -> List[MlExecution]:
|
|
30
|
+
"""Return every execution recorded for ``execution_key``, in the order
|
|
31
|
+
they were saved (current, stale, and failed alike). Empty if none.
|
|
32
|
+
|
|
33
|
+
This is the append-only history: its length is the number of real client
|
|
34
|
+
calls made for this identity."""
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def save(self, execution: MlExecution) -> None:
|
|
38
|
+
"""Append a new execution. If it is a servable success, atomically
|
|
39
|
+
supersede the prior current execution for the same key — the supersession
|
|
40
|
+
happens here, where atomicity belongs, never in the caller."""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""FileFingerprintPort."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileFingerprintPort(ABC):
|
|
11
|
+
"""Outbound port for fingerprinting a declared input file at the edge.
|
|
12
|
+
|
|
13
|
+
The adapter reads the file and applies the imported core rule, returning
|
|
14
|
+
ONLY the checksum. The file content never crosses back into the use case
|
|
15
|
+
or the domain — the engine fingerprints a file without ever holding its
|
|
16
|
+
content. The rule is owned by the core (common/checksum), never by the
|
|
17
|
+
adapter, so two adapters can never derive different keys for the same file.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def fingerprint(self, path: str) -> str:
|
|
22
|
+
"""Return the content fingerprint of the file at ``path``.
|
|
23
|
+
|
|
24
|
+
Raises if the path does not point to a readable regular file.
|
|
25
|
+
"""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""MetricsPort."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MetricsPort(ABC):
|
|
12
|
+
"""Outbound port for the call journal: append events, query projections.
|
|
13
|
+
|
|
14
|
+
Non-load-bearing by contract: record_event must never raise, because
|
|
15
|
+
observability must never break an execution. Implementations swallow
|
|
16
|
+
their own errors at the adapter level.
|
|
17
|
+
|
|
18
|
+
Events are the source of truth. hit_counts_by_key, event_counts, and
|
|
19
|
+
last_access are projections over the journal, not stored truths.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def record_event(
|
|
24
|
+
self,
|
|
25
|
+
event: str,
|
|
26
|
+
*,
|
|
27
|
+
execution_key: Optional[str],
|
|
28
|
+
client: str,
|
|
29
|
+
model: str,
|
|
30
|
+
effort: str,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Append one journal event. Must never raise."""
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def hit_counts_by_key(self) -> Dict[str, int]:
|
|
36
|
+
"""Return {execution_key: hit_count} across all HIT events.
|
|
37
|
+
|
|
38
|
+
An empty dict is the correct response when no data is available.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def event_counts(self) -> Dict[str, int]:
|
|
43
|
+
"""Return {event_name: count} across all recorded events.
|
|
44
|
+
|
|
45
|
+
An empty dict is the correct response when no data is available.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def last_access(self) -> Dict[str, float]:
|
|
50
|
+
"""Return {execution_key: epoch_seconds} of the latest event per key.
|
|
51
|
+
|
|
52
|
+
Used for LRU eviction ordering. An empty dict is the correct response
|
|
53
|
+
when no data is available.
|
|
54
|
+
"""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""PassthroughRunnerPort."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PassthroughRunnerPort(ABC):
|
|
14
|
+
"""Outbound port for a passthrough (alias) client launch.
|
|
15
|
+
|
|
16
|
+
Distinct from the managed ClientRunnerPort: the args are opaque (gmlcache
|
|
17
|
+
forwards the native tail verbatim), there is no isolated folder and no file
|
|
18
|
+
capture, and the client runs where the caller invoked it. The returned
|
|
19
|
+
ClientRunResult therefore carries stdout/stderr/exit but an empty file list.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def run(self, client: str, native_args: List[str]) -> ClientRunResult:
|
|
24
|
+
"""Launch ``client`` with the verbatim ``native_args`` and return the raw
|
|
25
|
+
result. Raises on unrecoverable launch failure."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""CachedMlExecutionService: the shared record-or-replay orchestration."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from dataclasses import replace
|
|
9
|
+
from typing import List, Optional, Protocol, Tuple
|
|
10
|
+
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact, ArtifactType
|
|
12
|
+
from generic_ml_cache_core.application.domain.model.run.cache_mode import CacheMode
|
|
13
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
14
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
|
|
15
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
16
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
17
|
+
from generic_ml_cache_core.application.port.out.blob_store_port import BlobStorePort
|
|
18
|
+
from generic_ml_cache_core.application.port.out.execution_repository_port import (
|
|
19
|
+
ExecutionRepositoryPort,
|
|
20
|
+
)
|
|
21
|
+
from generic_ml_cache_core.application.port.out.metrics_port import MetricsPort
|
|
22
|
+
from generic_ml_cache_core.application.usecase import journal_events
|
|
23
|
+
from generic_ml_cache_core.common.checksum import file_content_fingerprint
|
|
24
|
+
from generic_ml_cache_core.common.errors import ArtifactBlobMissing, CacheMiss
|
|
25
|
+
|
|
26
|
+
_TEXT_ENCODING = "utf-8"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CacheableExecutionCommand(Protocol):
|
|
30
|
+
"""What the shared flow needs of any execution command: a cache mode and a
|
|
31
|
+
persistence policy. The kind-specific fields are read through hooks."""
|
|
32
|
+
|
|
33
|
+
cache_mode: CacheMode
|
|
34
|
+
|
|
35
|
+
def should_persist(self, succeeded: bool) -> bool: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CachedMlExecutionService(ABC):
|
|
39
|
+
"""The record-or-replay flow shared by every kind of cached ML execution.
|
|
40
|
+
|
|
41
|
+
It owns the cache resolution (offline/cache/refresh), content-addressed
|
|
42
|
+
artifact storage, hydration on a hit, and journaling. Each concrete kind
|
|
43
|
+
supplies only what differs through the hooks below — how to build its
|
|
44
|
+
identity, how to run its client, its kind, and (optionally) whether a given
|
|
45
|
+
command is cacheable. This is where "what happens in what order" lives; the
|
|
46
|
+
kind-specific I/O lives in the subclass.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self, blob_store: BlobStorePort, repository: ExecutionRepositoryPort, metrics: MetricsPort
|
|
51
|
+
) -> None:
|
|
52
|
+
self._blob_store = blob_store
|
|
53
|
+
self._repository = repository
|
|
54
|
+
self._metrics = metrics
|
|
55
|
+
|
|
56
|
+
def execute(self, command: CacheableExecutionCommand) -> MlExecution:
|
|
57
|
+
call_identity = self._build_identity(command)
|
|
58
|
+
execution_key = call_identity.generate_key()
|
|
59
|
+
|
|
60
|
+
if self._is_uncacheable(command):
|
|
61
|
+
return self._run_uncacheable(command, call_identity, execution_key)
|
|
62
|
+
|
|
63
|
+
if command.cache_mode is CacheMode.OFFLINE:
|
|
64
|
+
return self._serve_offline(command, execution_key)
|
|
65
|
+
|
|
66
|
+
if command.cache_mode is CacheMode.CACHE:
|
|
67
|
+
current_execution = self._repository.find_current(execution_key)
|
|
68
|
+
if current_execution is not None:
|
|
69
|
+
return self._serve_hit(command, execution_key, current_execution)
|
|
70
|
+
|
|
71
|
+
return self._run_fresh(command, call_identity, execution_key, allow_store=True)
|
|
72
|
+
|
|
73
|
+
# -- kind-specific hooks ----------------------------------------------
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def _build_identity(self, command: CacheableExecutionCommand) -> CallIdentity:
|
|
77
|
+
"""Build the call identity (and thus the key) for this command."""
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def _run_client(self, command: CacheableExecutionCommand) -> ClientRunResult:
|
|
81
|
+
"""Run the client for this command and return its raw result."""
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def _execution_kind(self) -> ExecutionKind:
|
|
85
|
+
"""The kind every execution this service produces is tagged with."""
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def _journal_fields(self, command: CacheableExecutionCommand) -> Tuple[str, str, str]:
|
|
89
|
+
"""The (client, model, effort) a journal event records for this command;
|
|
90
|
+
a kind without a model/effort returns empty strings for them."""
|
|
91
|
+
|
|
92
|
+
def _is_uncacheable(self, command: CacheableExecutionCommand) -> bool:
|
|
93
|
+
"""Whether this command cannot be cached. Default: always cacheable."""
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
# -- resolution paths -------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def _serve_offline(self, command: CacheableExecutionCommand, execution_key: str) -> MlExecution:
|
|
99
|
+
current_execution = self._repository.find_current(execution_key)
|
|
100
|
+
if current_execution is None:
|
|
101
|
+
self._record_event(journal_events.MISS, execution_key, command)
|
|
102
|
+
raise CacheMiss(f"offline miss: no stored execution for key {execution_key}")
|
|
103
|
+
return self._serve_hit(command, execution_key, current_execution)
|
|
104
|
+
|
|
105
|
+
def _serve_hit(
|
|
106
|
+
self, command: CacheableExecutionCommand, execution_key: str, current_execution: MlExecution
|
|
107
|
+
) -> MlExecution:
|
|
108
|
+
hydrated_execution = self._hydrate(current_execution)
|
|
109
|
+
self._record_event(journal_events.HIT, execution_key, command)
|
|
110
|
+
return hydrated_execution
|
|
111
|
+
|
|
112
|
+
def _run_uncacheable(
|
|
113
|
+
self, command: CacheableExecutionCommand, call_identity: CallIdentity, execution_key: str
|
|
114
|
+
) -> MlExecution:
|
|
115
|
+
if command.cache_mode is CacheMode.OFFLINE:
|
|
116
|
+
self._record_event(journal_events.MISS, execution_key, command)
|
|
117
|
+
raise CacheMiss("offline: this call is not cacheable, so it cannot be served offline")
|
|
118
|
+
return self._run_fresh(command, call_identity, execution_key, allow_store=False)
|
|
119
|
+
|
|
120
|
+
def _run_fresh(
|
|
121
|
+
self,
|
|
122
|
+
command: CacheableExecutionCommand,
|
|
123
|
+
call_identity: CallIdentity,
|
|
124
|
+
execution_key: str,
|
|
125
|
+
allow_store: bool,
|
|
126
|
+
) -> MlExecution:
|
|
127
|
+
client_run_result = self._run_client(command)
|
|
128
|
+
should_store = allow_store and command.should_persist(client_run_result.succeeded)
|
|
129
|
+
artifacts = self._build_artifacts(client_run_result, store=should_store)
|
|
130
|
+
execution = MlExecution(
|
|
131
|
+
call_identity=call_identity,
|
|
132
|
+
execution_state=client_run_result.outcome(),
|
|
133
|
+
execution_kind=self._execution_kind(),
|
|
134
|
+
output_persisted=should_store,
|
|
135
|
+
artifacts=artifacts,
|
|
136
|
+
token_usage=client_run_result.token_usage,
|
|
137
|
+
failure=client_run_result.failure(),
|
|
138
|
+
)
|
|
139
|
+
if should_store:
|
|
140
|
+
self._repository.save(execution)
|
|
141
|
+
self._record_event(journal_events.RECORD, execution_key, command)
|
|
142
|
+
else:
|
|
143
|
+
self._record_event(journal_events.RUN, execution_key, command)
|
|
144
|
+
return execution
|
|
145
|
+
|
|
146
|
+
# -- artifacts --------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
def _build_artifacts(self, client_run_result: ClientRunResult, store: bool) -> List[Artifact]:
|
|
149
|
+
artifacts = [
|
|
150
|
+
self._store_artifact(
|
|
151
|
+
ArtifactType.STDOUT, None, client_run_result.stdout.encode(_TEXT_ENCODING), store
|
|
152
|
+
),
|
|
153
|
+
self._store_artifact(
|
|
154
|
+
ArtifactType.STDERR, None, client_run_result.stderr.encode(_TEXT_ENCODING), store
|
|
155
|
+
),
|
|
156
|
+
]
|
|
157
|
+
for generated_file in client_run_result.files:
|
|
158
|
+
artifacts.append(
|
|
159
|
+
self._store_artifact(
|
|
160
|
+
ArtifactType.OUTPUT_FILE, generated_file.name, generated_file.content, store
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
return artifacts
|
|
164
|
+
|
|
165
|
+
def _store_artifact(
|
|
166
|
+
self,
|
|
167
|
+
artifact_type: ArtifactType,
|
|
168
|
+
artifact_name: Optional[str],
|
|
169
|
+
content_bytes: bytes,
|
|
170
|
+
store: bool,
|
|
171
|
+
) -> Artifact:
|
|
172
|
+
blob_key = file_content_fingerprint(content_bytes)
|
|
173
|
+
if store:
|
|
174
|
+
self._blob_store.put(blob_key, content_bytes)
|
|
175
|
+
return Artifact.from_content(artifact_type, blob_key, content_bytes, name=artifact_name)
|
|
176
|
+
|
|
177
|
+
def _hydrate(self, execution: MlExecution) -> MlExecution:
|
|
178
|
+
hydrated_artifacts = [self._hydrate_artifact(artifact) for artifact in execution.artifacts]
|
|
179
|
+
return replace(execution, artifacts=hydrated_artifacts)
|
|
180
|
+
|
|
181
|
+
def _hydrate_artifact(self, artifact: Artifact) -> Artifact:
|
|
182
|
+
content_bytes = self._blob_store.get(artifact.blob_key)
|
|
183
|
+
if content_bytes is None:
|
|
184
|
+
raise ArtifactBlobMissing(
|
|
185
|
+
f"blob {artifact.blob_key} for a {artifact.artifact_type.value} "
|
|
186
|
+
"artifact is missing from the blob store"
|
|
187
|
+
)
|
|
188
|
+
return replace(artifact, content=content_bytes)
|
|
189
|
+
|
|
190
|
+
# -- journal ----------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
def _record_event(
|
|
193
|
+
self, event: str, execution_key: str, command: CacheableExecutionCommand
|
|
194
|
+
) -> None:
|
|
195
|
+
client, model, effort = self._journal_fields(command)
|
|
196
|
+
self._metrics.record_event(
|
|
197
|
+
event, execution_key=execution_key, client=client, model=model, effort=effort
|
|
198
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Shared construction of a CallIdentity from a command's keyed inputs.
|
|
4
|
+
|
|
5
|
+
A probe and a run must derive byte-for-byte the same key, so the fingerprinting
|
|
6
|
+
and assembly live here, once, and both services call it. The Protocol is the
|
|
7
|
+
function's parameter type and stays beside it (one cohesive unit).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import List, Protocol
|
|
13
|
+
|
|
14
|
+
from generic_ml_cache_core.application.domain.model.identity.managed_call_identity import (
|
|
15
|
+
ManagedCallIdentity,
|
|
16
|
+
)
|
|
17
|
+
from generic_ml_cache_core.application.port.out.file_fingerprint_port import FileFingerprintPort
|
|
18
|
+
from generic_ml_cache_core.common.checksum import fingerprint_arguments, text_checksum
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class KeyedCallInputs(Protocol):
|
|
22
|
+
"""The raw, key-determining fields any command must expose to be keyed.
|
|
23
|
+
|
|
24
|
+
Allow-paths and scan-trust are *not* here — they decide cacheability, not the
|
|
25
|
+
key (see ``domain/service/cacheability.py``).
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
client: str
|
|
29
|
+
model: str
|
|
30
|
+
effort: str
|
|
31
|
+
context: str
|
|
32
|
+
prompt: str
|
|
33
|
+
input_file_paths: List[str]
|
|
34
|
+
client_args: List[str]
|
|
35
|
+
grants: List[str]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_call_identity(
|
|
39
|
+
file_fingerprint: FileFingerprintPort, keyed_inputs: KeyedCallInputs
|
|
40
|
+
) -> ManagedCallIdentity:
|
|
41
|
+
"""Fingerprint the keyed inputs (files at the edge, text in place) and assemble
|
|
42
|
+
the managed identity. The file content never enters the engine — only
|
|
43
|
+
checksums."""
|
|
44
|
+
input_file_fingerprints = {
|
|
45
|
+
input_file_path: file_fingerprint.fingerprint(input_file_path)
|
|
46
|
+
for input_file_path in keyed_inputs.input_file_paths
|
|
47
|
+
}
|
|
48
|
+
client_args_fingerprint = (
|
|
49
|
+
fingerprint_arguments(keyed_inputs.client_args) if keyed_inputs.client_args else None
|
|
50
|
+
)
|
|
51
|
+
return ManagedCallIdentity(
|
|
52
|
+
client=keyed_inputs.client,
|
|
53
|
+
model=keyed_inputs.model,
|
|
54
|
+
effort=keyed_inputs.effort,
|
|
55
|
+
context_fingerprint=text_checksum(keyed_inputs.context),
|
|
56
|
+
prompt_fingerprint=text_checksum(keyed_inputs.prompt),
|
|
57
|
+
input_file_fingerprints=input_file_fingerprints,
|
|
58
|
+
client_args_fingerprint=client_args_fingerprint,
|
|
59
|
+
grants=frozenset(keyed_inputs.grants),
|
|
60
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Journal event names recorded through the MetricsPort.
|
|
4
|
+
|
|
5
|
+
The shared vocabulary every use case logs against. Caching and metrics are
|
|
6
|
+
independent concerns: every resolution emits exactly one event, including the
|
|
7
|
+
ones that store nothing — so even a non-cached run is surfaced.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
#: served from an existing stored execution (a replay)
|
|
13
|
+
HIT = "hit"
|
|
14
|
+
#: a fresh real call was made and stored
|
|
15
|
+
RECORD = "record"
|
|
16
|
+
#: wanted the cache but found nothing servable (an offline miss)
|
|
17
|
+
MISS = "miss"
|
|
18
|
+
#: a fresh real call ran but was not stored (uncacheable, or a non-persisted/failed run)
|
|
19
|
+
RUN = "run"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ProbeService."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from generic_ml_cache_core.application.domain.model.probe.probe_report import ProbeReport
|
|
8
|
+
from generic_ml_cache_core.application.domain.model.probe.probe_status import ProbeStatus
|
|
9
|
+
from generic_ml_cache_core.application.port.inbound.probe_command import ProbeCommand
|
|
10
|
+
from generic_ml_cache_core.application.port.inbound.probe_use_case import ProbeUseCase
|
|
11
|
+
from generic_ml_cache_core.application.port.out.execution_repository_port import (
|
|
12
|
+
ExecutionRepositoryPort,
|
|
13
|
+
)
|
|
14
|
+
from generic_ml_cache_core.application.port.out.file_fingerprint_port import FileFingerprintPort
|
|
15
|
+
from generic_ml_cache_core.application.usecase.call_identity_building import build_call_identity
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ProbeService(ProbeUseCase):
|
|
19
|
+
"""Forecast whether a call is cached, without running or recording anything.
|
|
20
|
+
|
|
21
|
+
It reuses the shared key-building and cacheability rule, so the verdict and the
|
|
22
|
+
key are byte-for-byte what a run would derive — a probe and a run can never
|
|
23
|
+
disagree. It launches no client, writes no blob, and records no journal event.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self, file_fingerprint: FileFingerprintPort, repository: ExecutionRepositoryPort
|
|
28
|
+
) -> None:
|
|
29
|
+
self._file_fingerprint = file_fingerprint
|
|
30
|
+
self._repository = repository
|
|
31
|
+
|
|
32
|
+
def execute(self, command: ProbeCommand) -> ProbeReport:
|
|
33
|
+
call_identity = build_call_identity(self._file_fingerprint, command)
|
|
34
|
+
execution_key = call_identity.generate_key()
|
|
35
|
+
|
|
36
|
+
if command.is_uncacheable:
|
|
37
|
+
return ProbeReport(status=ProbeStatus.NON_CACHEABLE, execution_key=execution_key)
|
|
38
|
+
|
|
39
|
+
current_execution = self._repository.find_current(execution_key)
|
|
40
|
+
if current_execution is None:
|
|
41
|
+
return ProbeReport(status=ProbeStatus.MISS, execution_key=execution_key)
|
|
42
|
+
return ProbeReport(
|
|
43
|
+
status=ProbeStatus.HIT, execution_key=execution_key, execution=current_execution
|
|
44
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""RunApiExecutionService."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Tuple
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.identity.api_call_identity import (
|
|
10
|
+
ApiCallIdentity,
|
|
11
|
+
)
|
|
12
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
13
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
|
|
14
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
15
|
+
from generic_ml_cache_core.application.domain.service.message_fingerprinting import (
|
|
16
|
+
fingerprint_messages,
|
|
17
|
+
)
|
|
18
|
+
from generic_ml_cache_core.application.port.inbound.run_api_execution_command import (
|
|
19
|
+
RunApiExecutionCommand,
|
|
20
|
+
)
|
|
21
|
+
from generic_ml_cache_core.application.port.inbound.run_api_execution_use_case import (
|
|
22
|
+
RunApiExecutionUseCase,
|
|
23
|
+
)
|
|
24
|
+
from generic_ml_cache_core.application.port.out.api_client_port import ApiClientPort
|
|
25
|
+
from generic_ml_cache_core.application.port.out.blob_store_port import BlobStorePort
|
|
26
|
+
from generic_ml_cache_core.application.port.out.execution_repository_port import (
|
|
27
|
+
ExecutionRepositoryPort,
|
|
28
|
+
)
|
|
29
|
+
from generic_ml_cache_core.application.port.out.metrics_port import MetricsPort
|
|
30
|
+
from generic_ml_cache_core.application.usecase.cached_ml_execution_service import (
|
|
31
|
+
CachedMlExecutionService,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RunApiExecutionService(CachedMlExecutionService, RunApiExecutionUseCase):
|
|
36
|
+
"""Record-or-replay a direct ML provider API call.
|
|
37
|
+
|
|
38
|
+
Implements the inbound port over the shared cached-execution flow, supplying
|
|
39
|
+
the API specifics: the identity is the provider, model, and a fingerprint of
|
|
40
|
+
the message list; the call goes through the API client port (no subprocess,
|
|
41
|
+
no files); and executions are tagged API. An API call is always cacheable.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
api_client: ApiClientPort,
|
|
47
|
+
blob_store: BlobStorePort,
|
|
48
|
+
repository: ExecutionRepositoryPort,
|
|
49
|
+
metrics: MetricsPort,
|
|
50
|
+
) -> None:
|
|
51
|
+
super().__init__(blob_store, repository, metrics)
|
|
52
|
+
self._api_client = api_client
|
|
53
|
+
|
|
54
|
+
def _build_identity(self, command: RunApiExecutionCommand) -> CallIdentity:
|
|
55
|
+
return ApiCallIdentity(
|
|
56
|
+
provider=command.provider,
|
|
57
|
+
model=command.model,
|
|
58
|
+
messages_fingerprint=fingerprint_messages(command.messages),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def _run_client(self, command: RunApiExecutionCommand) -> ClientRunResult:
|
|
62
|
+
return self._api_client.run(command.provider, command.model, command.messages)
|
|
63
|
+
|
|
64
|
+
def _execution_kind(self) -> ExecutionKind:
|
|
65
|
+
return ExecutionKind.API
|
|
66
|
+
|
|
67
|
+
def _journal_fields(self, command: RunApiExecutionCommand) -> Tuple[str, str, str]:
|
|
68
|
+
# The provider plays the role of "client" in the journal; no effort concept.
|
|
69
|
+
return command.provider, command.model, ""
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""RunManagedLocalExecutionService."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Tuple
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_request import ClientRunRequest
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.run.client_run_result import ClientRunResult
|
|
12
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
13
|
+
from generic_ml_cache_core.application.port.inbound.run_managed_local_execution_command import (
|
|
14
|
+
RunManagedLocalExecutionCommand,
|
|
15
|
+
)
|
|
16
|
+
from generic_ml_cache_core.application.port.inbound.run_managed_local_execution_use_case import (
|
|
17
|
+
RunManagedLocalExecutionUseCase,
|
|
18
|
+
)
|
|
19
|
+
from generic_ml_cache_core.application.port.out.blob_store_port import BlobStorePort
|
|
20
|
+
from generic_ml_cache_core.application.port.out.client_runner_port import ClientRunnerPort
|
|
21
|
+
from generic_ml_cache_core.application.port.out.execution_repository_port import (
|
|
22
|
+
ExecutionRepositoryPort,
|
|
23
|
+
)
|
|
24
|
+
from generic_ml_cache_core.application.port.out.file_fingerprint_port import FileFingerprintPort
|
|
25
|
+
from generic_ml_cache_core.application.port.out.metrics_port import MetricsPort
|
|
26
|
+
from generic_ml_cache_core.application.usecase.cached_ml_execution_service import (
|
|
27
|
+
CachedMlExecutionService,
|
|
28
|
+
)
|
|
29
|
+
from generic_ml_cache_core.application.usecase.call_identity_building import build_call_identity
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RunManagedLocalExecutionService(CachedMlExecutionService, RunManagedLocalExecutionUseCase):
|
|
33
|
+
"""Record-or-replay a fully managed local ML client call.
|
|
34
|
+
|
|
35
|
+
Implements the inbound port over the shared cached-execution flow, supplying
|
|
36
|
+
the managed specifics: fingerprint inputs to build the identity, launch the
|
|
37
|
+
client in an isolated folder via the client runner, tag executions
|
|
38
|
+
LOCAL_MANAGED, and treat allow-path folders as non-cacheable (unless
|
|
39
|
+
scan-trust).
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
file_fingerprint: FileFingerprintPort,
|
|
45
|
+
client_runner: ClientRunnerPort,
|
|
46
|
+
blob_store: BlobStorePort,
|
|
47
|
+
repository: ExecutionRepositoryPort,
|
|
48
|
+
metrics: MetricsPort,
|
|
49
|
+
) -> None:
|
|
50
|
+
super().__init__(blob_store, repository, metrics)
|
|
51
|
+
self._file_fingerprint = file_fingerprint
|
|
52
|
+
self._client_runner = client_runner
|
|
53
|
+
|
|
54
|
+
def _build_identity(self, command: RunManagedLocalExecutionCommand) -> CallIdentity:
|
|
55
|
+
return build_call_identity(self._file_fingerprint, command)
|
|
56
|
+
|
|
57
|
+
def _run_client(self, command: RunManagedLocalExecutionCommand) -> ClientRunResult:
|
|
58
|
+
return self._client_runner.run(self._build_client_run_request(command))
|
|
59
|
+
|
|
60
|
+
def _execution_kind(self) -> ExecutionKind:
|
|
61
|
+
return ExecutionKind.LOCAL_MANAGED
|
|
62
|
+
|
|
63
|
+
def _journal_fields(self, command: RunManagedLocalExecutionCommand) -> Tuple[str, str, str]:
|
|
64
|
+
return command.client, command.model, command.effort
|
|
65
|
+
|
|
66
|
+
def _is_uncacheable(self, command: RunManagedLocalExecutionCommand) -> bool:
|
|
67
|
+
return command.is_uncacheable
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _build_client_run_request(command: RunManagedLocalExecutionCommand) -> ClientRunRequest:
|
|
71
|
+
# The one allowed self-less method (AGENTS §6): the inbound-command ->
|
|
72
|
+
# outbound-port-DTO boundary mapping, which is the use case's own job.
|
|
73
|
+
return ClientRunRequest(
|
|
74
|
+
client=command.client,
|
|
75
|
+
model=command.model,
|
|
76
|
+
effort=command.effort,
|
|
77
|
+
context=command.context,
|
|
78
|
+
prompt=command.prompt,
|
|
79
|
+
user_system_prompt=command.user_system_prompt,
|
|
80
|
+
input_file_paths=command.input_file_paths,
|
|
81
|
+
allow_paths=command.allow_paths,
|
|
82
|
+
client_args=command.client_args,
|
|
83
|
+
grants=frozenset(command.grants),
|
|
84
|
+
)
|