generic-ml-cache-core 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. generic_ml_cache_core/__init__.py +64 -0
  2. generic_ml_cache_core/adapter/__init__.py +1 -0
  3. generic_ml_cache_core/adapter/inbound/__init__.py +1 -0
  4. generic_ml_cache_core/adapter/inbound/composition.py +96 -0
  5. generic_ml_cache_core/adapter/out/__init__.py +1 -0
  6. generic_ml_cache_core/adapter/out/api/__init__.py +1 -0
  7. generic_ml_cache_core/adapter/out/api/stub_api_client_adapter.py +30 -0
  8. generic_ml_cache_core/adapter/out/client/__init__.py +28 -0
  9. generic_ml_cache_core/adapter/out/client/claude.py +214 -0
  10. generic_ml_cache_core/adapter/out/client/codex.py +171 -0
  11. generic_ml_cache_core/adapter/out/client/cursor.py +208 -0
  12. generic_ml_cache_core/adapter/out/client/discover.py +121 -0
  13. generic_ml_cache_core/adapter/out/client/isolation.py +396 -0
  14. generic_ml_cache_core/adapter/out/client/local_client_runner.py +54 -0
  15. generic_ml_cache_core/adapter/out/client/passthrough_client_runner.py +47 -0
  16. generic_ml_cache_core/adapter/out/client/prime_directive.py +53 -0
  17. generic_ml_cache_core/adapter/out/client/registry.py +34 -0
  18. generic_ml_cache_core/adapter/out/clock/__init__.py +1 -0
  19. generic_ml_cache_core/adapter/out/clock/system_clock.py +16 -0
  20. generic_ml_cache_core/adapter/out/fingerprint/__init__.py +1 -0
  21. generic_ml_cache_core/adapter/out/fingerprint/filesystem_file_fingerprint.py +30 -0
  22. generic_ml_cache_core/adapter/out/metrics/__init__.py +1 -0
  23. generic_ml_cache_core/adapter/out/metrics/access_registry.py +147 -0
  24. generic_ml_cache_core/adapter/out/metrics/journal_metrics.py +45 -0
  25. generic_ml_cache_core/adapter/out/persistence/__init__.py +1 -0
  26. generic_ml_cache_core/adapter/out/persistence/call_identity_serialization.py +100 -0
  27. generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py +69 -0
  28. generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py +398 -0
  29. generic_ml_cache_core/adapter/out/storage/__init__.py +1 -0
  30. generic_ml_cache_core/adapter/out/storage/filesystem_blob_store.py +47 -0
  31. generic_ml_cache_core/application/__init__.py +1 -0
  32. generic_ml_cache_core/application/domain/__init__.py +1 -0
  33. generic_ml_cache_core/application/domain/model/__init__.py +1 -0
  34. generic_ml_cache_core/application/domain/model/client_status.py +17 -0
  35. generic_ml_cache_core/application/domain/model/execution/__init__.py +1 -0
  36. generic_ml_cache_core/application/domain/model/execution/artifact.py +78 -0
  37. generic_ml_cache_core/application/domain/model/execution/execution_failure.py +32 -0
  38. generic_ml_cache_core/application/domain/model/execution/execution_kind.py +26 -0
  39. generic_ml_cache_core/application/domain/model/execution/execution_state.py +21 -0
  40. generic_ml_cache_core/application/domain/model/execution/ml_execution.py +41 -0
  41. generic_ml_cache_core/application/domain/model/identity/__init__.py +1 -0
  42. generic_ml_cache_core/application/domain/model/identity/api_call_identity.py +36 -0
  43. generic_ml_cache_core/application/domain/model/identity/call_identity.py +25 -0
  44. generic_ml_cache_core/application/domain/model/identity/managed_call_identity.py +54 -0
  45. generic_ml_cache_core/application/domain/model/identity/passthrough_call_identity.py +35 -0
  46. generic_ml_cache_core/application/domain/model/model_info.py +20 -0
  47. generic_ml_cache_core/application/domain/model/model_listing.py +29 -0
  48. generic_ml_cache_core/application/domain/model/parsed_output.py +23 -0
  49. generic_ml_cache_core/application/domain/model/probe/__init__.py +1 -0
  50. generic_ml_cache_core/application/domain/model/probe/probe_report.py +26 -0
  51. generic_ml_cache_core/application/domain/model/probe/probe_status.py +13 -0
  52. generic_ml_cache_core/application/domain/model/run/__init__.py +1 -0
  53. generic_ml_cache_core/application/domain/model/run/cache_mode.py +21 -0
  54. generic_ml_cache_core/application/domain/model/run/client_run_request.py +35 -0
  55. generic_ml_cache_core/application/domain/model/run/client_run_result.py +65 -0
  56. generic_ml_cache_core/application/domain/model/run/message.py +20 -0
  57. generic_ml_cache_core/application/domain/model/usage/__init__.py +1 -0
  58. generic_ml_cache_core/application/domain/model/usage/token_usage.py +53 -0
  59. generic_ml_cache_core/application/domain/model/usage/usage.py +108 -0
  60. generic_ml_cache_core/application/domain/service/__init__.py +1 -0
  61. generic_ml_cache_core/application/domain/service/cacheability.py +19 -0
  62. generic_ml_cache_core/application/domain/service/message_fingerprinting.py +25 -0
  63. generic_ml_cache_core/application/port/__init__.py +1 -0
  64. generic_ml_cache_core/application/port/inbound/__init__.py +1 -0
  65. generic_ml_cache_core/application/port/inbound/probe_command.py +35 -0
  66. generic_ml_cache_core/application/port/inbound/probe_use_case.py +19 -0
  67. generic_ml_cache_core/application/port/inbound/run_api_execution_command.py +40 -0
  68. generic_ml_cache_core/application/port/inbound/run_api_execution_use_case.py +20 -0
  69. generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py +48 -0
  70. generic_ml_cache_core/application/port/inbound/run_managed_local_execution_use_case.py +25 -0
  71. generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py +35 -0
  72. generic_ml_cache_core/application/port/inbound/run_passthrough_execution_use_case.py +20 -0
  73. generic_ml_cache_core/application/port/out/__init__.py +1 -0
  74. generic_ml_cache_core/application/port/out/api_client_port.py +26 -0
  75. generic_ml_cache_core/application/port/out/base.py +272 -0
  76. generic_ml_cache_core/application/port/out/blob_store_port.py +37 -0
  77. generic_ml_cache_core/application/port/out/client_runner_port.py +26 -0
  78. generic_ml_cache_core/application/port/out/clock_port.py +22 -0
  79. generic_ml_cache_core/application/port/out/execution_repository_port.py +40 -0
  80. generic_ml_cache_core/application/port/out/file_fingerprint_port.py +25 -0
  81. generic_ml_cache_core/application/port/out/metrics_port.py +54 -0
  82. generic_ml_cache_core/application/port/out/passthrough_runner_port.py +25 -0
  83. generic_ml_cache_core/application/usecase/__init__.py +1 -0
  84. generic_ml_cache_core/application/usecase/cached_ml_execution_service.py +198 -0
  85. generic_ml_cache_core/application/usecase/call_identity_building.py +60 -0
  86. generic_ml_cache_core/application/usecase/journal_events.py +19 -0
  87. generic_ml_cache_core/application/usecase/probe_service.py +44 -0
  88. generic_ml_cache_core/application/usecase/run_api_execution_service.py +69 -0
  89. generic_ml_cache_core/application/usecase/run_managed_local_execution_service.py +84 -0
  90. generic_ml_cache_core/application/usecase/run_passthrough_execution_service.py +67 -0
  91. generic_ml_cache_core/common/__init__.py +1 -0
  92. generic_ml_cache_core/common/checksum.py +82 -0
  93. generic_ml_cache_core/common/errors.py +76 -0
  94. generic_ml_cache_core/stream.py +65 -0
  95. generic_ml_cache_core-0.2.0.dist-info/METADATA +104 -0
  96. generic_ml_cache_core-0.2.0.dist-info/RECORD +99 -0
  97. generic_ml_cache_core-0.2.0.dist-info/WHEEL +4 -0
  98. generic_ml_cache_core-0.2.0.dist-info/licenses/LICENSE +201 -0
  99. generic_ml_cache_core-0.2.0.dist-info/licenses/NOTICE +8 -0
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """ApiCallIdentity."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
10
+ from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
11
+ from generic_ml_cache_core.common.checksum import checksum_input_data
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ApiCallIdentity(CallIdentity):
16
+ """The identity of a direct API call.
17
+
18
+ Addressed by provider, model, and a fingerprint of the full message list —
19
+ the raw messages may carry sensitive context and are never keyed or stored,
20
+ only their digest. The kind is folded into the key, so an API call can never
21
+ collide with a local managed or passthrough call.
22
+ """
23
+
24
+ provider: str
25
+ model: str
26
+ messages_fingerprint: str
27
+
28
+ def generate_key(self) -> str:
29
+ return checksum_input_data(
30
+ {
31
+ "kind": ExecutionKind.API.value,
32
+ "provider": self.provider,
33
+ "model": self.model,
34
+ "messages": self.messages_fingerprint,
35
+ }
36
+ )
@@ -0,0 +1,25 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """CallIdentity."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from abc import ABC, abstractmethod
8
+
9
+
10
+ class CallIdentity(ABC):
11
+ """The value object that determines an execution's cache key.
12
+
13
+ Polymorphic: each execution kind determines its key from different fields
14
+ (a managed call from fingerprints of model/prompt/files; a passthrough call
15
+ from its opaque native args; an API call from its provider and messages). The
16
+ aggregate is addressed by ``generate_key``; every implementation folds its
17
+ kind into the key, so identities of different kinds can never collide.
18
+ """
19
+
20
+ @abstractmethod
21
+ def generate_key(self) -> str:
22
+ """Return a stable hex digest that uniquely addresses this call.
23
+
24
+ Pure: hashes only the already-in-memory fingerprints. No I/O.
25
+ """
@@ -0,0 +1,54 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """ManagedCallIdentity."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Dict, FrozenSet, Optional
9
+
10
+ from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
11
+ from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
12
+ from generic_ml_cache_core.common.checksum import checksum_input_data
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class ManagedCallIdentity(CallIdentity):
17
+ """The identity of a fully managed local call.
18
+
19
+ Holds only processed fields — by the time it is constructed, every text input
20
+ has been fingerprinted and every file path resolved to its content
21
+ fingerprint. It is not the user's raw request.
22
+
23
+ allow_paths (permission grants to scan folders) are NOT a field here: they do
24
+ not enter the key and travel separately to the client runner.
25
+ """
26
+
27
+ client: str
28
+ model: str
29
+ effort: str
30
+ context_fingerprint: str
31
+ prompt_fingerprint: str
32
+ input_file_fingerprints: Dict[str, str] = field(default_factory=dict)
33
+ client_args_fingerprint: Optional[str] = None
34
+ grants: FrozenSet[str] = field(default_factory=frozenset)
35
+
36
+ def generate_key(self) -> str:
37
+ key_data: Dict[str, str] = {
38
+ "kind": ExecutionKind.LOCAL_MANAGED.value,
39
+ "client": self.client,
40
+ "model": self.model,
41
+ "effort": self.effort,
42
+ "context": self.context_fingerprint,
43
+ "prompt": self.prompt_fingerprint,
44
+ }
45
+ # Path-sensitive: the path enters the key alongside the content fingerprint.
46
+ # A rename is a real change (the prompt may reference the file by name), so it
47
+ # must yield a new key — soundness over hit-rate (prefer a miss to a wrong hit).
48
+ for file_path, file_fingerprint in sorted(self.input_file_fingerprints.items()):
49
+ key_data[f"file:{file_path}"] = file_fingerprint
50
+ if self.client_args_fingerprint is not None:
51
+ key_data[f"args:{self.client_args_fingerprint}"] = self.client_args_fingerprint
52
+ if self.grants:
53
+ key_data["grants"] = ",".join(sorted(self.grants))
54
+ return checksum_input_data(key_data)
@@ -0,0 +1,35 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """PassthroughCallIdentity."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
10
+ from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
11
+ from generic_ml_cache_core.common.checksum import checksum_input_data
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class PassthroughCallIdentity(CallIdentity):
16
+ """The identity of a passthrough (alias) call.
17
+
18
+ A passthrough is opaque: gmlcache does not model its inputs, only forwards the
19
+ native argument tail to the client. So the identity is just the client plus a
20
+ *fingerprint* of those native args — the raw args may carry secrets and are
21
+ never keyed or stored, only their digest. The kind is folded into the key, so
22
+ a passthrough can never collide with a managed call.
23
+ """
24
+
25
+ client: str
26
+ native_args_fingerprint: str
27
+
28
+ def generate_key(self) -> str:
29
+ return checksum_input_data(
30
+ {
31
+ "kind": ExecutionKind.LOCAL_PASSTHROUGH.value,
32
+ "client": self.client,
33
+ "args": self.native_args_fingerprint,
34
+ }
35
+ )
@@ -0,0 +1,20 @@
1
+ """ModelInfo."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class ModelInfo:
10
+ """One model a client reports it can use. Purely what the client relayed.
11
+
12
+ ``id`` is the string a caller would pass as ``--model``; ``name`` is the
13
+ client's own human label. ``default``/``current`` mirror any marker the
14
+ client printed. The cache neither invents nor validates these fields.
15
+ """
16
+
17
+ id: str
18
+ name: str
19
+ default: bool = False
20
+ current: bool = False
@@ -0,0 +1,29 @@
1
+ """ModelListing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List, Optional
7
+
8
+ from generic_ml_cache_core.application.port.out.base import ModelInfo
9
+
10
+
11
+ @dataclass
12
+ class ModelListing:
13
+ """What discovery could learn about one client's available models.
14
+
15
+ Three honest outcomes, never a guess:
16
+
17
+ * absent client -> ``present=False`` (``supported`` is meaningless, left False);
18
+ * present but no listing mechanism -> ``supported=False`` with a ``reason``;
19
+ * present and listed -> ``supported=True`` and ``models`` populated (possibly
20
+ empty if the client genuinely reported none).
21
+
22
+ ``models`` is whatever the client relayed -- the cache invents nothing.
23
+ """
24
+
25
+ name: str
26
+ present: bool
27
+ supported: bool
28
+ models: Optional[List[ModelInfo]] = None
29
+ reason: Optional[str] = None
@@ -0,0 +1,23 @@
1
+ """ParsedOutput."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+ from generic_ml_cache_core.application.domain.model.usage.usage import Usage
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ParsedOutput:
13
+ """What an adapter extracted from a client's structured output.
14
+
15
+ ``text`` is the clean answer the caller should see on stdout (the client's
16
+ own answer text, lifted out of its JSON wrapper). ``usage`` is the normalized
17
+ envelope read from the same output, or ``None`` when the client offered no
18
+ usage (or its output could not be parsed -- the text then falls back to the
19
+ raw stdout so the core call still resolves).
20
+ """
21
+
22
+ text: str
23
+ usage: Optional[Usage] = None
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""
@@ -0,0 +1,26 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """ProbeReport."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Optional
9
+
10
+ from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
11
+ from generic_ml_cache_core.application.domain.model.probe.probe_status import ProbeStatus
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ProbeReport:
16
+ """What a read-only probe forecasts for a call.
17
+
18
+ ``execution_key`` is derived for every verdict (so a caller can show the key
19
+ even on a miss). ``execution`` is the dehydrated current execution on a HIT —
20
+ present only then — carrying the queryable metadata (artifacts, usage) without
21
+ fetching any output bytes.
22
+ """
23
+
24
+ status: ProbeStatus
25
+ execution_key: str
26
+ execution: Optional[MlExecution] = None
@@ -0,0 +1,13 @@
1
+ """ProbeStatus."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import enum
6
+
7
+
8
+ class ProbeStatus(enum.Enum):
9
+ """The verdict of a read-only cache probe (see :func:`probe`)."""
10
+
11
+ HIT = "hit" # a stored execution exists for this exact call
12
+ MISS = "miss" # cacheable, but no execution recorded yet
13
+ NON_CACHEABLE = "non-cacheable" # declares allow-path folders -> never cached
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""
@@ -0,0 +1,21 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """CacheMode."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import enum
8
+
9
+
10
+ class CacheMode(enum.Enum):
11
+ """Cache resolution policy for an MlExecution.
12
+
13
+ CACHE -- serve from cache on a hit; call the client and record on a miss.
14
+ Default.
15
+ OFFLINE -- serve from cache only; a miss raises an error.
16
+ REFRESH -- always call the client and overwrite any existing stored output.
17
+ """
18
+
19
+ CACHE = "cache"
20
+ OFFLINE = "offline"
21
+ REFRESH = "refresh"
@@ -0,0 +1,35 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """ClientRunRequest."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import FrozenSet, List, Optional
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ClientRunRequest:
13
+ """The DTO the use case constructs and passes to ClientRunnerPort.
14
+
15
+ Carries only what the client runner needs to launch the client. The
16
+ command's gmlcache-specific policy fields (cache_mode, persist_output,
17
+ scan_trust) do not appear here — they are the use case's concern, not
18
+ the client runner's.
19
+
20
+ input_file_paths are the declared files the client is granted read access to
21
+ (their content is already fingerprinted into the key); allow_paths are the
22
+ permission-grant folder paths the client may scan. The runner opens the
23
+ read-door for both; it fingerprints neither (that already happened).
24
+ """
25
+
26
+ client: str
27
+ model: str
28
+ effort: str
29
+ context: str
30
+ prompt: str
31
+ input_file_paths: List[str] = field(default_factory=list)
32
+ allow_paths: List[str] = field(default_factory=list)
33
+ client_args: List[str] = field(default_factory=list)
34
+ grants: FrozenSet[str] = field(default_factory=frozenset)
35
+ user_system_prompt: Optional[str] = None
@@ -0,0 +1,65 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """ClientRunResult and GeneratedFile."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import List, Optional
9
+
10
+ from generic_ml_cache_core.application.domain.model.execution.execution_failure import (
11
+ ExecutionFailure,
12
+ FailureReason,
13
+ )
14
+ from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
15
+ from generic_ml_cache_core.application.domain.model.usage.token_usage import TokenUsage
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class GeneratedFile:
20
+ """One file the client produced, captured raw — name and bytes only.
21
+
22
+ No checksum and no blob key: storage is the use case's job, not the runner's.
23
+ """
24
+
25
+ name: str
26
+ content: bytes
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class ClientRunResult:
31
+ """The raw, transient result the ClientRunnerPort returns.
32
+
33
+ The contract surface of the runner port — not an adapter-internal type — but
34
+ nothing here is stored yet. The use case turns this into stored Artifacts
35
+ (hash each piece, put it in the blob store) and assembles the MlExecution.
36
+ The runner itself never hashes, never computes a key, never stores.
37
+
38
+ It also interprets its own ``exit_code`` into a run outcome — that rule reads
39
+ only this object's data, so it lives here, not in the use case.
40
+ """
41
+
42
+ exit_code: int
43
+ stdout: str = ""
44
+ stderr: str = ""
45
+ files: List[GeneratedFile] = field(default_factory=list)
46
+ #: Token accounting the runner observed (a structured client or an API), or
47
+ #: None when none was reported. Carried to MlExecution.token_usage by the
48
+ #: shared flow; not stored as output bytes (it is database-bound accounting).
49
+ token_usage: Optional[TokenUsage] = None
50
+
51
+ @property
52
+ def succeeded(self) -> bool:
53
+ return self.exit_code == 0
54
+
55
+ def outcome(self) -> ExecutionState:
56
+ return ExecutionState.SUCCESS if self.succeeded else ExecutionState.FAILED
57
+
58
+ def failure(self) -> Optional[ExecutionFailure]:
59
+ if self.succeeded:
60
+ return None
61
+ return ExecutionFailure(
62
+ reason=FailureReason.NONZERO_EXIT,
63
+ message=f"client exited with status {self.exit_code}",
64
+ exit_code=self.exit_code,
65
+ )
@@ -0,0 +1,20 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Message."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class Message:
12
+ """One message in an API call's context: a role and its content.
13
+
14
+ Provider-agnostic — the caller builds the full message list and gmlcache
15
+ forwards it to the provider. ``role`` is kept as a plain string because
16
+ providers differ on the roles they accept (system, user, assistant, tool, …).
17
+ """
18
+
19
+ role: str
20
+ content: str
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""
@@ -0,0 +1,53 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """TokenUsage."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, Optional
9
+
10
+ from generic_ml_cache_core.application.domain.model.usage.usage import float_or_none, int_or_none
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class TokenUsage:
15
+ """Normalized token counts for one ML execution, with the raw block kept.
16
+
17
+ Accounting data: database-bound, separate from the output artifacts.
18
+ Every count is Optional[int]: a value the client reported, or None when it
19
+ did not report that field at all. None means unknown, never zero.
20
+ cost_usd is the client's own advisory estimate; never derived by gmlcache.
21
+ raw preserves the client's verbatim usage structure losslessly.
22
+ """
23
+
24
+ input_tokens: Optional[int] = None
25
+ output_tokens: Optional[int] = None
26
+ cache_read_tokens: Optional[int] = None
27
+ cache_write_tokens: Optional[int] = None
28
+ reasoning_tokens: Optional[int] = None
29
+ cost_usd: Optional[float] = None
30
+ raw: Dict[str, Any] = field(default_factory=dict)
31
+
32
+ def to_dict(self) -> Dict[str, Any]:
33
+ return {
34
+ "input_tokens": self.input_tokens,
35
+ "output_tokens": self.output_tokens,
36
+ "cache_read_tokens": self.cache_read_tokens,
37
+ "cache_write_tokens": self.cache_write_tokens,
38
+ "reasoning_tokens": self.reasoning_tokens,
39
+ "cost_usd": self.cost_usd,
40
+ "raw": self.raw,
41
+ }
42
+
43
+ @classmethod
44
+ def from_dict(cls, token_usage_dict: Dict[str, Any]) -> "TokenUsage":
45
+ return cls(
46
+ input_tokens=int_or_none(token_usage_dict.get("input_tokens")),
47
+ output_tokens=int_or_none(token_usage_dict.get("output_tokens")),
48
+ cache_read_tokens=int_or_none(token_usage_dict.get("cache_read_tokens")),
49
+ cache_write_tokens=int_or_none(token_usage_dict.get("cache_write_tokens")),
50
+ reasoning_tokens=int_or_none(token_usage_dict.get("reasoning_tokens")),
51
+ cost_usd=float_or_none(token_usage_dict.get("cost_usd")),
52
+ raw=dict(token_usage_dict.get("raw", {})),
53
+ )
@@ -0,0 +1,108 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """The usage envelope: what one recorded call consumed, in a common shape.
4
+
5
+ Two things live here:
6
+
7
+ * :class:`Usage` -- a **normalized** token/cost envelope with a small core every
8
+ client fills, plus an optional ring only some report. It keeps the client's
9
+ **raw** usage block verbatim alongside, so nothing the client reported is lost.
10
+ * :class:`ParsedOutput` -- what an adapter pulls out of a client's structured
11
+ output: the clean answer text (what the caller sees on stdout) and the
12
+ :class:`Usage` it read from the same output.
13
+
14
+ Design rulings this encodes (do not relitigate):
15
+
16
+ * **Tokens are the spine, not dollars.** Every client reports tokens; only some
17
+ report a dollar figure, and even that figure is the *client's own local
18
+ estimate* (computed from a price table bundled into the client at build time),
19
+ not authoritative billing. So ``cost_usd`` is advisory: recorded when offered,
20
+ never derived by the cache, never authoritative.
21
+ * **Unknown is not zero.** A field the client did not report is ``None``
22
+ ("unknown"), never ``0``. Codex reports no cache-write count -- that is unknown,
23
+ not "wrote nothing". This distinction is in the type from the start because we
24
+ cannot anticipate what any given client (or client version, or detached/parallel
25
+ run) chooses to report.
26
+ * **We record what the call reported; we do not reconstruct.** If a client
27
+ under-reports (e.g. subagents that billed outside the single invocation we
28
+ launched), that is the client's gap; we mark it unknown rather than invent a
29
+ total.
30
+ * **The model the call ran under lives on the stored execution** (its ``model`` field), so
31
+ a reader always shows usage *next to its model* -- a Haiku token is not an Opus
32
+ token. The full per-model / per-subagent breakdown a client may give (e.g.
33
+ Claude's ``modelUsage``) is preserved in :attr:`Usage.raw`, not flattened here.
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ from dataclasses import dataclass, field
39
+ from typing import Any, Dict, Optional
40
+
41
+
42
+ def int_or_none(value: Any) -> Optional[int]:
43
+ """Coerce a client-reported count to ``int``, or ``None`` if absent/unusable.
44
+
45
+ Used by adapters reading a client's JSON: a missing or non-numeric field
46
+ becomes ``None`` ("unknown"), never ``0``, so a value the client did not
47
+ report is never mistaken for a real zero.
48
+ """
49
+ if value is None or isinstance(value, bool):
50
+ return None
51
+ try:
52
+ return int(value)
53
+ except (TypeError, ValueError):
54
+ return None
55
+
56
+
57
+ def float_or_none(value: Any) -> Optional[float]:
58
+ """Coerce a client-reported amount to ``float``, or ``None`` if absent/unusable."""
59
+ if value is None or isinstance(value, bool):
60
+ return None
61
+ try:
62
+ return float(value)
63
+ except (TypeError, ValueError):
64
+ return None
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class Usage:
69
+ """Normalized token counts for one recorded call, with the raw block kept.
70
+
71
+ Every count is ``Optional[int]``: a value the client reported, or ``None``
72
+ when it did not report that count at all. ``cost_usd`` is the client's own
73
+ estimate in US dollars when it offered one (advisory only -- see module docs),
74
+ else ``None``. ``raw`` is the client's verbatim usage structure, so a caller
75
+ that wants a client-specific field we did not normalize can read it straight
76
+ from the stored execution.
77
+ """
78
+
79
+ #: Prompt/input tokens the call consumed.
80
+ input_tokens: Optional[int] = None
81
+ #: Generated/output tokens. For clients that fold reasoning into output
82
+ #: (Claude), reasoning is included here and ``reasoning_tokens`` is unknown.
83
+ output_tokens: Optional[int] = None
84
+ #: Input tokens served from the client's prompt cache (a reduced-rate read).
85
+ cache_read_tokens: Optional[int] = None
86
+ #: Input tokens spent writing new prompt-cache entries. Unknown for clients
87
+ #: that do not report a cache-write count (e.g. Codex).
88
+ cache_write_tokens: Optional[int] = None
89
+ #: Reasoning tokens reported *separately* from output (e.g. Codex). Unknown
90
+ #: when the client folds reasoning into output (Claude) or omits it (Cursor).
91
+ reasoning_tokens: Optional[int] = None
92
+ #: The client's own dollar estimate for the call, when it reports one (only
93
+ #: Claude does, today). Advisory, not authoritative billing; never derived.
94
+ cost_usd: Optional[float] = None
95
+ #: The client's verbatim usage structure (lossless), so unanticipated
96
+ #: client-specific fields stay reachable. Shape is per-client.
97
+ raw: Dict[str, Any] = field(default_factory=dict)
98
+
99
+ def to_dict(self) -> Dict[str, Any]:
100
+ return {
101
+ "input_tokens": self.input_tokens,
102
+ "output_tokens": self.output_tokens,
103
+ "cache_read_tokens": self.cache_read_tokens,
104
+ "cache_write_tokens": self.cache_write_tokens,
105
+ "reasoning_tokens": self.reasoning_tokens,
106
+ "cost_usd": self.cost_usd,
107
+ "raw": self.raw,
108
+ }
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""
@@ -0,0 +1,19 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Cacheability rule: a pure domain rule shared by every front door."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Sequence
8
+
9
+
10
+ def is_call_uncacheable(allow_paths: Sequence[str], scan_trust: bool) -> bool:
11
+ """Whether a call is non-cacheable because it declares allow-path folders.
12
+
13
+ Their contents are unbounded and cannot be fingerprinted, so the cache cannot
14
+ tell when they change — the call is therefore non-cacheable. ``scan_trust`` is
15
+ the caller's explicit override (asserting the folders are stable). This is the
16
+ single source of the rule, so a probe and a run can never disagree about
17
+ whether a given call is cacheable.
18
+ """
19
+ return bool(allow_paths) and not scan_trust
@@ -0,0 +1,25 @@
1
+ # SPDX-FileCopyrightText: 2026 Daniel Slobozian
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Fingerprinting of an API message list — a pure domain rule."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, Sequence
8
+
9
+ from generic_ml_cache_core.application.domain.model.run.message import Message
10
+ from generic_ml_cache_core.common.checksum import checksum_input_data
11
+
12
+
13
+ def fingerprint_messages(messages: Sequence[Message]) -> str:
14
+ """Fingerprint an ordered message list into the key.
15
+
16
+ The messages may carry the user's full context (sensitive), so only their
17
+ digest is ever keyed or stored — never the raw content. Order is significant
18
+ and preserved by the positional keys; identical message lists fingerprint
19
+ identically.
20
+ """
21
+ data: Dict[str, str] = {}
22
+ for index, message in enumerate(messages):
23
+ data[f"{index}:role"] = message.role
24
+ data[f"{index}:content"] = message.content
25
+ return checksum_input_data(data)
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""
@@ -0,0 +1 @@
1
+ """Hexagonal layer package."""