generic-ml-cache-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_core/__init__.py +64 -0
- generic_ml_cache_core/adapter/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/composition.py +96 -0
- generic_ml_cache_core/adapter/out/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/stub_api_client_adapter.py +30 -0
- generic_ml_cache_core/adapter/out/client/__init__.py +28 -0
- generic_ml_cache_core/adapter/out/client/claude.py +214 -0
- generic_ml_cache_core/adapter/out/client/codex.py +171 -0
- generic_ml_cache_core/adapter/out/client/cursor.py +208 -0
- generic_ml_cache_core/adapter/out/client/discover.py +121 -0
- generic_ml_cache_core/adapter/out/client/isolation.py +396 -0
- generic_ml_cache_core/adapter/out/client/local_client_runner.py +54 -0
- generic_ml_cache_core/adapter/out/client/passthrough_client_runner.py +47 -0
- generic_ml_cache_core/adapter/out/client/prime_directive.py +53 -0
- generic_ml_cache_core/adapter/out/client/registry.py +34 -0
- generic_ml_cache_core/adapter/out/clock/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/clock/system_clock.py +16 -0
- generic_ml_cache_core/adapter/out/fingerprint/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/fingerprint/filesystem_file_fingerprint.py +30 -0
- generic_ml_cache_core/adapter/out/metrics/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/metrics/access_registry.py +147 -0
- generic_ml_cache_core/adapter/out/metrics/journal_metrics.py +45 -0
- generic_ml_cache_core/adapter/out/persistence/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/persistence/call_identity_serialization.py +100 -0
- generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py +69 -0
- generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py +398 -0
- generic_ml_cache_core/adapter/out/storage/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/storage/filesystem_blob_store.py +47 -0
- generic_ml_cache_core/application/__init__.py +1 -0
- generic_ml_cache_core/application/domain/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/client_status.py +17 -0
- generic_ml_cache_core/application/domain/model/execution/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/execution/artifact.py +78 -0
- generic_ml_cache_core/application/domain/model/execution/execution_failure.py +32 -0
- generic_ml_cache_core/application/domain/model/execution/execution_kind.py +26 -0
- generic_ml_cache_core/application/domain/model/execution/execution_state.py +21 -0
- generic_ml_cache_core/application/domain/model/execution/ml_execution.py +41 -0
- generic_ml_cache_core/application/domain/model/identity/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/identity/api_call_identity.py +36 -0
- generic_ml_cache_core/application/domain/model/identity/call_identity.py +25 -0
- generic_ml_cache_core/application/domain/model/identity/managed_call_identity.py +54 -0
- generic_ml_cache_core/application/domain/model/identity/passthrough_call_identity.py +35 -0
- generic_ml_cache_core/application/domain/model/model_info.py +20 -0
- generic_ml_cache_core/application/domain/model/model_listing.py +29 -0
- generic_ml_cache_core/application/domain/model/parsed_output.py +23 -0
- generic_ml_cache_core/application/domain/model/probe/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/probe/probe_report.py +26 -0
- generic_ml_cache_core/application/domain/model/probe/probe_status.py +13 -0
- generic_ml_cache_core/application/domain/model/run/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/run/cache_mode.py +21 -0
- generic_ml_cache_core/application/domain/model/run/client_run_request.py +35 -0
- generic_ml_cache_core/application/domain/model/run/client_run_result.py +65 -0
- generic_ml_cache_core/application/domain/model/run/message.py +20 -0
- generic_ml_cache_core/application/domain/model/usage/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/usage/token_usage.py +53 -0
- generic_ml_cache_core/application/domain/model/usage/usage.py +108 -0
- generic_ml_cache_core/application/domain/service/__init__.py +1 -0
- generic_ml_cache_core/application/domain/service/cacheability.py +19 -0
- generic_ml_cache_core/application/domain/service/message_fingerprinting.py +25 -0
- generic_ml_cache_core/application/port/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/probe_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/probe_use_case.py +19 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_command.py +40 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py +48 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_use_case.py +25 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/out/__init__.py +1 -0
- generic_ml_cache_core/application/port/out/api_client_port.py +26 -0
- generic_ml_cache_core/application/port/out/base.py +272 -0
- generic_ml_cache_core/application/port/out/blob_store_port.py +37 -0
- generic_ml_cache_core/application/port/out/client_runner_port.py +26 -0
- generic_ml_cache_core/application/port/out/clock_port.py +22 -0
- generic_ml_cache_core/application/port/out/execution_repository_port.py +40 -0
- generic_ml_cache_core/application/port/out/file_fingerprint_port.py +25 -0
- generic_ml_cache_core/application/port/out/metrics_port.py +54 -0
- generic_ml_cache_core/application/port/out/passthrough_runner_port.py +25 -0
- generic_ml_cache_core/application/usecase/__init__.py +1 -0
- generic_ml_cache_core/application/usecase/cached_ml_execution_service.py +198 -0
- generic_ml_cache_core/application/usecase/call_identity_building.py +60 -0
- generic_ml_cache_core/application/usecase/journal_events.py +19 -0
- generic_ml_cache_core/application/usecase/probe_service.py +44 -0
- generic_ml_cache_core/application/usecase/run_api_execution_service.py +69 -0
- generic_ml_cache_core/application/usecase/run_managed_local_execution_service.py +84 -0
- generic_ml_cache_core/application/usecase/run_passthrough_execution_service.py +67 -0
- generic_ml_cache_core/common/__init__.py +1 -0
- generic_ml_cache_core/common/checksum.py +82 -0
- generic_ml_cache_core/common/errors.py +76 -0
- generic_ml_cache_core/stream.py +65 -0
- generic_ml_cache_core-0.2.0.dist-info/METADATA +104 -0
- generic_ml_cache_core-0.2.0.dist-info/RECORD +99 -0
- generic_ml_cache_core-0.2.0.dist-info/WHEEL +4 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/LICENSE +201 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/NOTICE +8 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ApiCallIdentity."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
11
|
+
from generic_ml_cache_core.common.checksum import checksum_input_data
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ApiCallIdentity(CallIdentity):
|
|
16
|
+
"""The identity of a direct API call.
|
|
17
|
+
|
|
18
|
+
Addressed by provider, model, and a fingerprint of the full message list —
|
|
19
|
+
the raw messages may carry sensitive context and are never keyed or stored,
|
|
20
|
+
only their digest. The kind is folded into the key, so an API call can never
|
|
21
|
+
collide with a local managed or passthrough call.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
provider: str
|
|
25
|
+
model: str
|
|
26
|
+
messages_fingerprint: str
|
|
27
|
+
|
|
28
|
+
def generate_key(self) -> str:
|
|
29
|
+
return checksum_input_data(
|
|
30
|
+
{
|
|
31
|
+
"kind": ExecutionKind.API.value,
|
|
32
|
+
"provider": self.provider,
|
|
33
|
+
"model": self.model,
|
|
34
|
+
"messages": self.messages_fingerprint,
|
|
35
|
+
}
|
|
36
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""CallIdentity."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CallIdentity(ABC):
|
|
11
|
+
"""The value object that determines an execution's cache key.
|
|
12
|
+
|
|
13
|
+
Polymorphic: each execution kind determines its key from different fields
|
|
14
|
+
(a managed call from fingerprints of model/prompt/files; a passthrough call
|
|
15
|
+
from its opaque native args; an API call from its provider and messages). The
|
|
16
|
+
aggregate is addressed by ``generate_key``; every implementation folds its
|
|
17
|
+
kind into the key, so identities of different kinds can never collide.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def generate_key(self) -> str:
|
|
22
|
+
"""Return a stable hex digest that uniquely addresses this call.
|
|
23
|
+
|
|
24
|
+
Pure: hashes only the already-in-memory fingerprints. No I/O.
|
|
25
|
+
"""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ManagedCallIdentity."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Dict, FrozenSet, Optional
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
12
|
+
from generic_ml_cache_core.common.checksum import checksum_input_data
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class ManagedCallIdentity(CallIdentity):
|
|
17
|
+
"""The identity of a fully managed local call.
|
|
18
|
+
|
|
19
|
+
Holds only processed fields — by the time it is constructed, every text input
|
|
20
|
+
has been fingerprinted and every file path resolved to its content
|
|
21
|
+
fingerprint. It is not the user's raw request.
|
|
22
|
+
|
|
23
|
+
allow_paths (permission grants to scan folders) are NOT a field here: they do
|
|
24
|
+
not enter the key and travel separately to the client runner.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
client: str
|
|
28
|
+
model: str
|
|
29
|
+
effort: str
|
|
30
|
+
context_fingerprint: str
|
|
31
|
+
prompt_fingerprint: str
|
|
32
|
+
input_file_fingerprints: Dict[str, str] = field(default_factory=dict)
|
|
33
|
+
client_args_fingerprint: Optional[str] = None
|
|
34
|
+
grants: FrozenSet[str] = field(default_factory=frozenset)
|
|
35
|
+
|
|
36
|
+
def generate_key(self) -> str:
|
|
37
|
+
key_data: Dict[str, str] = {
|
|
38
|
+
"kind": ExecutionKind.LOCAL_MANAGED.value,
|
|
39
|
+
"client": self.client,
|
|
40
|
+
"model": self.model,
|
|
41
|
+
"effort": self.effort,
|
|
42
|
+
"context": self.context_fingerprint,
|
|
43
|
+
"prompt": self.prompt_fingerprint,
|
|
44
|
+
}
|
|
45
|
+
# Path-sensitive: the path enters the key alongside the content fingerprint.
|
|
46
|
+
# A rename is a real change (the prompt may reference the file by name), so it
|
|
47
|
+
# must yield a new key — soundness over hit-rate (prefer a miss to a wrong hit).
|
|
48
|
+
for file_path, file_fingerprint in sorted(self.input_file_fingerprints.items()):
|
|
49
|
+
key_data[f"file:{file_path}"] = file_fingerprint
|
|
50
|
+
if self.client_args_fingerprint is not None:
|
|
51
|
+
key_data[f"args:{self.client_args_fingerprint}"] = self.client_args_fingerprint
|
|
52
|
+
if self.grants:
|
|
53
|
+
key_data["grants"] = ",".join(sorted(self.grants))
|
|
54
|
+
return checksum_input_data(key_data)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""PassthroughCallIdentity."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
11
|
+
from generic_ml_cache_core.common.checksum import checksum_input_data
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class PassthroughCallIdentity(CallIdentity):
|
|
16
|
+
"""The identity of a passthrough (alias) call.
|
|
17
|
+
|
|
18
|
+
A passthrough is opaque: gmlcache does not model its inputs, only forwards the
|
|
19
|
+
native argument tail to the client. So the identity is just the client plus a
|
|
20
|
+
*fingerprint* of those native args — the raw args may carry secrets and are
|
|
21
|
+
never keyed or stored, only their digest. The kind is folded into the key, so
|
|
22
|
+
a passthrough can never collide with a managed call.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
client: str
|
|
26
|
+
native_args_fingerprint: str
|
|
27
|
+
|
|
28
|
+
def generate_key(self) -> str:
|
|
29
|
+
return checksum_input_data(
|
|
30
|
+
{
|
|
31
|
+
"kind": ExecutionKind.LOCAL_PASSTHROUGH.value,
|
|
32
|
+
"client": self.client,
|
|
33
|
+
"args": self.native_args_fingerprint,
|
|
34
|
+
}
|
|
35
|
+
)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""ModelInfo."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ModelInfo:
|
|
10
|
+
"""One model a client reports it can use. Purely what the client relayed.
|
|
11
|
+
|
|
12
|
+
``id`` is the string a caller would pass as ``--model``; ``name`` is the
|
|
13
|
+
client's own human label. ``default``/``current`` mirror any marker the
|
|
14
|
+
client printed. The cache neither invents nor validates these fields.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
id: str
|
|
18
|
+
name: str
|
|
19
|
+
default: bool = False
|
|
20
|
+
current: bool = False
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""ModelListing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from generic_ml_cache_core.application.port.out.base import ModelInfo
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ModelListing:
|
|
13
|
+
"""What discovery could learn about one client's available models.
|
|
14
|
+
|
|
15
|
+
Three honest outcomes, never a guess:
|
|
16
|
+
|
|
17
|
+
* absent client -> ``present=False`` (``supported`` is meaningless, left False);
|
|
18
|
+
* present but no listing mechanism -> ``supported=False`` with a ``reason``;
|
|
19
|
+
* present and listed -> ``supported=True`` and ``models`` populated (possibly
|
|
20
|
+
empty if the client genuinely reported none).
|
|
21
|
+
|
|
22
|
+
``models`` is whatever the client relayed -- the cache invents nothing.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
present: bool
|
|
27
|
+
supported: bool
|
|
28
|
+
models: Optional[List[ModelInfo]] = None
|
|
29
|
+
reason: Optional[str] = None
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""ParsedOutput."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from generic_ml_cache_core.application.domain.model.usage.usage import Usage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class ParsedOutput:
|
|
13
|
+
"""What an adapter extracted from a client's structured output.
|
|
14
|
+
|
|
15
|
+
``text`` is the clean answer the caller should see on stdout (the client's
|
|
16
|
+
own answer text, lifted out of its JSON wrapper). ``usage`` is the normalized
|
|
17
|
+
envelope read from the same output, or ``None`` when the client offered no
|
|
18
|
+
usage (or its output could not be parsed -- the text then falls back to the
|
|
19
|
+
raw stdout so the core call still resolves).
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
text: str
|
|
23
|
+
usage: Optional[Usage] = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ProbeReport."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.probe.probe_status import ProbeStatus
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ProbeReport:
|
|
16
|
+
"""What a read-only probe forecasts for a call.
|
|
17
|
+
|
|
18
|
+
``execution_key`` is derived for every verdict (so a caller can show the key
|
|
19
|
+
even on a miss). ``execution`` is the dehydrated current execution on a HIT —
|
|
20
|
+
present only then — carrying the queryable metadata (artifacts, usage) without
|
|
21
|
+
fetching any output bytes.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
status: ProbeStatus
|
|
25
|
+
execution_key: str
|
|
26
|
+
execution: Optional[MlExecution] = None
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""ProbeStatus."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProbeStatus(enum.Enum):
|
|
9
|
+
"""The verdict of a read-only cache probe (see :func:`probe`)."""
|
|
10
|
+
|
|
11
|
+
HIT = "hit" # a stored execution exists for this exact call
|
|
12
|
+
MISS = "miss" # cacheable, but no execution recorded yet
|
|
13
|
+
NON_CACHEABLE = "non-cacheable" # declares allow-path folders -> never cached
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""CacheMode."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CacheMode(enum.Enum):
|
|
11
|
+
"""Cache resolution policy for an MlExecution.
|
|
12
|
+
|
|
13
|
+
CACHE -- serve from cache on a hit; call the client and record on a miss.
|
|
14
|
+
Default.
|
|
15
|
+
OFFLINE -- serve from cache only; a miss raises an error.
|
|
16
|
+
REFRESH -- always call the client and overwrite any existing stored output.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
CACHE = "cache"
|
|
20
|
+
OFFLINE = "offline"
|
|
21
|
+
REFRESH = "refresh"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ClientRunRequest."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import FrozenSet, List, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class ClientRunRequest:
|
|
13
|
+
"""The DTO the use case constructs and passes to ClientRunnerPort.
|
|
14
|
+
|
|
15
|
+
Carries only what the client runner needs to launch the client. The
|
|
16
|
+
command's gmlcache-specific policy fields (cache_mode, persist_output,
|
|
17
|
+
scan_trust) do not appear here — they are the use case's concern, not
|
|
18
|
+
the client runner's.
|
|
19
|
+
|
|
20
|
+
input_file_paths are the declared files the client is granted read access to
|
|
21
|
+
(their content is already fingerprinted into the key); allow_paths are the
|
|
22
|
+
permission-grant folder paths the client may scan. The runner opens the
|
|
23
|
+
read-door for both; it fingerprints neither (that already happened).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
client: str
|
|
27
|
+
model: str
|
|
28
|
+
effort: str
|
|
29
|
+
context: str
|
|
30
|
+
prompt: str
|
|
31
|
+
input_file_paths: List[str] = field(default_factory=list)
|
|
32
|
+
allow_paths: List[str] = field(default_factory=list)
|
|
33
|
+
client_args: List[str] = field(default_factory=list)
|
|
34
|
+
grants: FrozenSet[str] = field(default_factory=frozenset)
|
|
35
|
+
user_system_prompt: Optional[str] = None
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ClientRunResult and GeneratedFile."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_failure import (
|
|
11
|
+
ExecutionFailure,
|
|
12
|
+
FailureReason,
|
|
13
|
+
)
|
|
14
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
|
|
15
|
+
from generic_ml_cache_core.application.domain.model.usage.token_usage import TokenUsage
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class GeneratedFile:
|
|
20
|
+
"""One file the client produced, captured raw — name and bytes only.
|
|
21
|
+
|
|
22
|
+
No checksum and no blob key: storage is the use case's job, not the runner's.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
content: bytes
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class ClientRunResult:
|
|
31
|
+
"""The raw, transient result the ClientRunnerPort returns.
|
|
32
|
+
|
|
33
|
+
The contract surface of the runner port — not an adapter-internal type — but
|
|
34
|
+
nothing here is stored yet. The use case turns this into stored Artifacts
|
|
35
|
+
(hash each piece, put it in the blob store) and assembles the MlExecution.
|
|
36
|
+
The runner itself never hashes, never computes a key, never stores.
|
|
37
|
+
|
|
38
|
+
It also interprets its own ``exit_code`` into a run outcome — that rule reads
|
|
39
|
+
only this object's data, so it lives here, not in the use case.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
exit_code: int
|
|
43
|
+
stdout: str = ""
|
|
44
|
+
stderr: str = ""
|
|
45
|
+
files: List[GeneratedFile] = field(default_factory=list)
|
|
46
|
+
#: Token accounting the runner observed (a structured client or an API), or
|
|
47
|
+
#: None when none was reported. Carried to MlExecution.token_usage by the
|
|
48
|
+
#: shared flow; not stored as output bytes (it is database-bound accounting).
|
|
49
|
+
token_usage: Optional[TokenUsage] = None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def succeeded(self) -> bool:
|
|
53
|
+
return self.exit_code == 0
|
|
54
|
+
|
|
55
|
+
def outcome(self) -> ExecutionState:
|
|
56
|
+
return ExecutionState.SUCCESS if self.succeeded else ExecutionState.FAILED
|
|
57
|
+
|
|
58
|
+
def failure(self) -> Optional[ExecutionFailure]:
|
|
59
|
+
if self.succeeded:
|
|
60
|
+
return None
|
|
61
|
+
return ExecutionFailure(
|
|
62
|
+
reason=FailureReason.NONZERO_EXIT,
|
|
63
|
+
message=f"client exited with status {self.exit_code}",
|
|
64
|
+
exit_code=self.exit_code,
|
|
65
|
+
)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Message."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class Message:
|
|
12
|
+
"""One message in an API call's context: a role and its content.
|
|
13
|
+
|
|
14
|
+
Provider-agnostic — the caller builds the full message list and gmlcache
|
|
15
|
+
forwards it to the provider. ``role`` is kept as a plain string because
|
|
16
|
+
providers differ on the roles they accept (system, user, assistant, tool, …).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
role: str
|
|
20
|
+
content: str
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""TokenUsage."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from generic_ml_cache_core.application.domain.model.usage.usage import float_or_none, int_or_none
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class TokenUsage:
|
|
15
|
+
"""Normalized token counts for one ML execution, with the raw block kept.
|
|
16
|
+
|
|
17
|
+
Accounting data: database-bound, separate from the output artifacts.
|
|
18
|
+
Every count is Optional[int]: a value the client reported, or None when it
|
|
19
|
+
did not report that field at all. None means unknown, never zero.
|
|
20
|
+
cost_usd is the client's own advisory estimate; never derived by gmlcache.
|
|
21
|
+
raw preserves the client's verbatim usage structure losslessly.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
input_tokens: Optional[int] = None
|
|
25
|
+
output_tokens: Optional[int] = None
|
|
26
|
+
cache_read_tokens: Optional[int] = None
|
|
27
|
+
cache_write_tokens: Optional[int] = None
|
|
28
|
+
reasoning_tokens: Optional[int] = None
|
|
29
|
+
cost_usd: Optional[float] = None
|
|
30
|
+
raw: Dict[str, Any] = field(default_factory=dict)
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
33
|
+
return {
|
|
34
|
+
"input_tokens": self.input_tokens,
|
|
35
|
+
"output_tokens": self.output_tokens,
|
|
36
|
+
"cache_read_tokens": self.cache_read_tokens,
|
|
37
|
+
"cache_write_tokens": self.cache_write_tokens,
|
|
38
|
+
"reasoning_tokens": self.reasoning_tokens,
|
|
39
|
+
"cost_usd": self.cost_usd,
|
|
40
|
+
"raw": self.raw,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_dict(cls, token_usage_dict: Dict[str, Any]) -> "TokenUsage":
|
|
45
|
+
return cls(
|
|
46
|
+
input_tokens=int_or_none(token_usage_dict.get("input_tokens")),
|
|
47
|
+
output_tokens=int_or_none(token_usage_dict.get("output_tokens")),
|
|
48
|
+
cache_read_tokens=int_or_none(token_usage_dict.get("cache_read_tokens")),
|
|
49
|
+
cache_write_tokens=int_or_none(token_usage_dict.get("cache_write_tokens")),
|
|
50
|
+
reasoning_tokens=int_or_none(token_usage_dict.get("reasoning_tokens")),
|
|
51
|
+
cost_usd=float_or_none(token_usage_dict.get("cost_usd")),
|
|
52
|
+
raw=dict(token_usage_dict.get("raw", {})),
|
|
53
|
+
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""The usage envelope: what one recorded call consumed, in a common shape.
|
|
4
|
+
|
|
5
|
+
Two things live here:
|
|
6
|
+
|
|
7
|
+
* :class:`Usage` -- a **normalized** token/cost envelope with a small core every
|
|
8
|
+
client fills, plus an optional ring only some report. It keeps the client's
|
|
9
|
+
**raw** usage block verbatim alongside, so nothing the client reported is lost.
|
|
10
|
+
* :class:`ParsedOutput` -- what an adapter pulls out of a client's structured
|
|
11
|
+
output: the clean answer text (what the caller sees on stdout) and the
|
|
12
|
+
:class:`Usage` it read from the same output.
|
|
13
|
+
|
|
14
|
+
Design rulings this encodes (do not relitigate):
|
|
15
|
+
|
|
16
|
+
* **Tokens are the spine, not dollars.** Every client reports tokens; only some
|
|
17
|
+
report a dollar figure, and even that figure is the *client's own local
|
|
18
|
+
estimate* (computed from a price table bundled into the client at build time),
|
|
19
|
+
not authoritative billing. So ``cost_usd`` is advisory: recorded when offered,
|
|
20
|
+
never derived by the cache, never authoritative.
|
|
21
|
+
* **Unknown is not zero.** A field the client did not report is ``None``
|
|
22
|
+
("unknown"), never ``0``. Codex reports no cache-write count -- that is unknown,
|
|
23
|
+
not "wrote nothing". This distinction is in the type from the start because we
|
|
24
|
+
cannot anticipate what any given client (or client version, or detached/parallel
|
|
25
|
+
run) chooses to report.
|
|
26
|
+
* **We record what the call reported; we do not reconstruct.** If a client
|
|
27
|
+
under-reports (e.g. subagents that billed outside the single invocation we
|
|
28
|
+
launched), that is the client's gap; we mark it unknown rather than invent a
|
|
29
|
+
total.
|
|
30
|
+
* **The model the call ran under lives on the stored execution** (its ``model`` field), so
|
|
31
|
+
a reader always shows usage *next to its model* -- a Haiku token is not an Opus
|
|
32
|
+
token. The full per-model / per-subagent breakdown a client may give (e.g.
|
|
33
|
+
Claude's ``modelUsage``) is preserved in :attr:`Usage.raw`, not flattened here.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
from dataclasses import dataclass, field
|
|
39
|
+
from typing import Any, Dict, Optional
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def int_or_none(value: Any) -> Optional[int]:
|
|
43
|
+
"""Coerce a client-reported count to ``int``, or ``None`` if absent/unusable.
|
|
44
|
+
|
|
45
|
+
Used by adapters reading a client's JSON: a missing or non-numeric field
|
|
46
|
+
becomes ``None`` ("unknown"), never ``0``, so a value the client did not
|
|
47
|
+
report is never mistaken for a real zero.
|
|
48
|
+
"""
|
|
49
|
+
if value is None or isinstance(value, bool):
|
|
50
|
+
return None
|
|
51
|
+
try:
|
|
52
|
+
return int(value)
|
|
53
|
+
except (TypeError, ValueError):
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def float_or_none(value: Any) -> Optional[float]:
|
|
58
|
+
"""Coerce a client-reported amount to ``float``, or ``None`` if absent/unusable."""
|
|
59
|
+
if value is None or isinstance(value, bool):
|
|
60
|
+
return None
|
|
61
|
+
try:
|
|
62
|
+
return float(value)
|
|
63
|
+
except (TypeError, ValueError):
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class Usage:
|
|
69
|
+
"""Normalized token counts for one recorded call, with the raw block kept.
|
|
70
|
+
|
|
71
|
+
Every count is ``Optional[int]``: a value the client reported, or ``None``
|
|
72
|
+
when it did not report that count at all. ``cost_usd`` is the client's own
|
|
73
|
+
estimate in US dollars when it offered one (advisory only -- see module docs),
|
|
74
|
+
else ``None``. ``raw`` is the client's verbatim usage structure, so a caller
|
|
75
|
+
that wants a client-specific field we did not normalize can read it straight
|
|
76
|
+
from the stored execution.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
#: Prompt/input tokens the call consumed.
|
|
80
|
+
input_tokens: Optional[int] = None
|
|
81
|
+
#: Generated/output tokens. For clients that fold reasoning into output
|
|
82
|
+
#: (Claude), reasoning is included here and ``reasoning_tokens`` is unknown.
|
|
83
|
+
output_tokens: Optional[int] = None
|
|
84
|
+
#: Input tokens served from the client's prompt cache (a reduced-rate read).
|
|
85
|
+
cache_read_tokens: Optional[int] = None
|
|
86
|
+
#: Input tokens spent writing new prompt-cache entries. Unknown for clients
|
|
87
|
+
#: that do not report a cache-write count (e.g. Codex).
|
|
88
|
+
cache_write_tokens: Optional[int] = None
|
|
89
|
+
#: Reasoning tokens reported *separately* from output (e.g. Codex). Unknown
|
|
90
|
+
#: when the client folds reasoning into output (Claude) or omits it (Cursor).
|
|
91
|
+
reasoning_tokens: Optional[int] = None
|
|
92
|
+
#: The client's own dollar estimate for the call, when it reports one (only
|
|
93
|
+
#: Claude does, today). Advisory, not authoritative billing; never derived.
|
|
94
|
+
cost_usd: Optional[float] = None
|
|
95
|
+
#: The client's verbatim usage structure (lossless), so unanticipated
|
|
96
|
+
#: client-specific fields stay reachable. Shape is per-client.
|
|
97
|
+
raw: Dict[str, Any] = field(default_factory=dict)
|
|
98
|
+
|
|
99
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
100
|
+
return {
|
|
101
|
+
"input_tokens": self.input_tokens,
|
|
102
|
+
"output_tokens": self.output_tokens,
|
|
103
|
+
"cache_read_tokens": self.cache_read_tokens,
|
|
104
|
+
"cache_write_tokens": self.cache_write_tokens,
|
|
105
|
+
"reasoning_tokens": self.reasoning_tokens,
|
|
106
|
+
"cost_usd": self.cost_usd,
|
|
107
|
+
"raw": self.raw,
|
|
108
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Cacheability rule: a pure domain rule shared by every front door."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Sequence
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def is_call_uncacheable(allow_paths: Sequence[str], scan_trust: bool) -> bool:
|
|
11
|
+
"""Whether a call is non-cacheable because it declares allow-path folders.
|
|
12
|
+
|
|
13
|
+
Their contents are unbounded and cannot be fingerprinted, so the cache cannot
|
|
14
|
+
tell when they change — the call is therefore non-cacheable. ``scan_trust`` is
|
|
15
|
+
the caller's explicit override (asserting the folders are stable). This is the
|
|
16
|
+
single source of the rule, so a probe and a run can never disagree about
|
|
17
|
+
whether a given call is cacheable.
|
|
18
|
+
"""
|
|
19
|
+
return bool(allow_paths) and not scan_trust
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Fingerprinting of an API message list — a pure domain rule."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Sequence
|
|
8
|
+
|
|
9
|
+
from generic_ml_cache_core.application.domain.model.run.message import Message
|
|
10
|
+
from generic_ml_cache_core.common.checksum import checksum_input_data
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def fingerprint_messages(messages: Sequence[Message]) -> str:
|
|
14
|
+
"""Fingerprint an ordered message list into the key.
|
|
15
|
+
|
|
16
|
+
The messages may carry the user's full context (sensitive), so only their
|
|
17
|
+
digest is ever keyed or stored — never the raw content. Order is significant
|
|
18
|
+
and preserved by the positional keys; identical message lists fingerprint
|
|
19
|
+
identically.
|
|
20
|
+
"""
|
|
21
|
+
data: Dict[str, str] = {}
|
|
22
|
+
for index, message in enumerate(messages):
|
|
23
|
+
data[f"{index}:role"] = message.role
|
|
24
|
+
data[f"{index}:content"] = message.content
|
|
25
|
+
return checksum_input_data(data)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|