generic-ml-cache-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- generic_ml_cache_core/__init__.py +64 -0
- generic_ml_cache_core/adapter/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/__init__.py +1 -0
- generic_ml_cache_core/adapter/inbound/composition.py +96 -0
- generic_ml_cache_core/adapter/out/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/api/stub_api_client_adapter.py +30 -0
- generic_ml_cache_core/adapter/out/client/__init__.py +28 -0
- generic_ml_cache_core/adapter/out/client/claude.py +214 -0
- generic_ml_cache_core/adapter/out/client/codex.py +171 -0
- generic_ml_cache_core/adapter/out/client/cursor.py +208 -0
- generic_ml_cache_core/adapter/out/client/discover.py +121 -0
- generic_ml_cache_core/adapter/out/client/isolation.py +396 -0
- generic_ml_cache_core/adapter/out/client/local_client_runner.py +54 -0
- generic_ml_cache_core/adapter/out/client/passthrough_client_runner.py +47 -0
- generic_ml_cache_core/adapter/out/client/prime_directive.py +53 -0
- generic_ml_cache_core/adapter/out/client/registry.py +34 -0
- generic_ml_cache_core/adapter/out/clock/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/clock/system_clock.py +16 -0
- generic_ml_cache_core/adapter/out/fingerprint/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/fingerprint/filesystem_file_fingerprint.py +30 -0
- generic_ml_cache_core/adapter/out/metrics/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/metrics/access_registry.py +147 -0
- generic_ml_cache_core/adapter/out/metrics/journal_metrics.py +45 -0
- generic_ml_cache_core/adapter/out/persistence/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/persistence/call_identity_serialization.py +100 -0
- generic_ml_cache_core/adapter/out/persistence/in_memory_execution_repository.py +69 -0
- generic_ml_cache_core/adapter/out/persistence/sqlite_execution_repository.py +398 -0
- generic_ml_cache_core/adapter/out/storage/__init__.py +1 -0
- generic_ml_cache_core/adapter/out/storage/filesystem_blob_store.py +47 -0
- generic_ml_cache_core/application/__init__.py +1 -0
- generic_ml_cache_core/application/domain/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/client_status.py +17 -0
- generic_ml_cache_core/application/domain/model/execution/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/execution/artifact.py +78 -0
- generic_ml_cache_core/application/domain/model/execution/execution_failure.py +32 -0
- generic_ml_cache_core/application/domain/model/execution/execution_kind.py +26 -0
- generic_ml_cache_core/application/domain/model/execution/execution_state.py +21 -0
- generic_ml_cache_core/application/domain/model/execution/ml_execution.py +41 -0
- generic_ml_cache_core/application/domain/model/identity/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/identity/api_call_identity.py +36 -0
- generic_ml_cache_core/application/domain/model/identity/call_identity.py +25 -0
- generic_ml_cache_core/application/domain/model/identity/managed_call_identity.py +54 -0
- generic_ml_cache_core/application/domain/model/identity/passthrough_call_identity.py +35 -0
- generic_ml_cache_core/application/domain/model/model_info.py +20 -0
- generic_ml_cache_core/application/domain/model/model_listing.py +29 -0
- generic_ml_cache_core/application/domain/model/parsed_output.py +23 -0
- generic_ml_cache_core/application/domain/model/probe/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/probe/probe_report.py +26 -0
- generic_ml_cache_core/application/domain/model/probe/probe_status.py +13 -0
- generic_ml_cache_core/application/domain/model/run/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/run/cache_mode.py +21 -0
- generic_ml_cache_core/application/domain/model/run/client_run_request.py +35 -0
- generic_ml_cache_core/application/domain/model/run/client_run_result.py +65 -0
- generic_ml_cache_core/application/domain/model/run/message.py +20 -0
- generic_ml_cache_core/application/domain/model/usage/__init__.py +1 -0
- generic_ml_cache_core/application/domain/model/usage/token_usage.py +53 -0
- generic_ml_cache_core/application/domain/model/usage/usage.py +108 -0
- generic_ml_cache_core/application/domain/service/__init__.py +1 -0
- generic_ml_cache_core/application/domain/service/cacheability.py +19 -0
- generic_ml_cache_core/application/domain/service/message_fingerprinting.py +25 -0
- generic_ml_cache_core/application/port/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/__init__.py +1 -0
- generic_ml_cache_core/application/port/inbound/probe_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/probe_use_case.py +19 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_command.py +40 -0
- generic_ml_cache_core/application/port/inbound/run_api_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_command.py +48 -0
- generic_ml_cache_core/application/port/inbound/run_managed_local_execution_use_case.py +25 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_command.py +35 -0
- generic_ml_cache_core/application/port/inbound/run_passthrough_execution_use_case.py +20 -0
- generic_ml_cache_core/application/port/out/__init__.py +1 -0
- generic_ml_cache_core/application/port/out/api_client_port.py +26 -0
- generic_ml_cache_core/application/port/out/base.py +272 -0
- generic_ml_cache_core/application/port/out/blob_store_port.py +37 -0
- generic_ml_cache_core/application/port/out/client_runner_port.py +26 -0
- generic_ml_cache_core/application/port/out/clock_port.py +22 -0
- generic_ml_cache_core/application/port/out/execution_repository_port.py +40 -0
- generic_ml_cache_core/application/port/out/file_fingerprint_port.py +25 -0
- generic_ml_cache_core/application/port/out/metrics_port.py +54 -0
- generic_ml_cache_core/application/port/out/passthrough_runner_port.py +25 -0
- generic_ml_cache_core/application/usecase/__init__.py +1 -0
- generic_ml_cache_core/application/usecase/cached_ml_execution_service.py +198 -0
- generic_ml_cache_core/application/usecase/call_identity_building.py +60 -0
- generic_ml_cache_core/application/usecase/journal_events.py +19 -0
- generic_ml_cache_core/application/usecase/probe_service.py +44 -0
- generic_ml_cache_core/application/usecase/run_api_execution_service.py +69 -0
- generic_ml_cache_core/application/usecase/run_managed_local_execution_service.py +84 -0
- generic_ml_cache_core/application/usecase/run_passthrough_execution_service.py +67 -0
- generic_ml_cache_core/common/__init__.py +1 -0
- generic_ml_cache_core/common/checksum.py +82 -0
- generic_ml_cache_core/common/errors.py +76 -0
- generic_ml_cache_core/stream.py +65 -0
- generic_ml_cache_core-0.2.0.dist-info/METADATA +104 -0
- generic_ml_cache_core-0.2.0.dist-info/RECORD +99 -0
- generic_ml_cache_core-0.2.0.dist-info/WHEEL +4 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/LICENSE +201 -0
- generic_ml_cache_core-0.2.0.dist-info/licenses/NOTICE +8 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""SqliteExecutionRepository: the durable, append-only execution store."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import sqlite3
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
|
|
14
|
+
from generic_ml_cache_core.adapter.out.persistence.call_identity_serialization import (
|
|
15
|
+
SerializedIdentity,
|
|
16
|
+
deserialize_identity,
|
|
17
|
+
serialize_identity,
|
|
18
|
+
)
|
|
19
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact, ArtifactType
|
|
20
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
21
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_failure import (
|
|
22
|
+
ExecutionFailure,
|
|
23
|
+
FailureReason,
|
|
24
|
+
)
|
|
25
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
26
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
|
|
27
|
+
from generic_ml_cache_core.application.domain.model.execution.ml_execution import MlExecution
|
|
28
|
+
from generic_ml_cache_core.application.domain.model.usage.token_usage import TokenUsage
|
|
29
|
+
from generic_ml_cache_core.application.port.out.clock_port import ClockPort
|
|
30
|
+
from generic_ml_cache_core.application.port.out.execution_repository_port import (
|
|
31
|
+
ExecutionRepositoryPort,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
_DB_NAME = "executions.sqlite3"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class ExecutionSummary:
|
|
39
|
+
"""A uniform reporting row for an execution, across all identity kinds."""
|
|
40
|
+
|
|
41
|
+
execution_key: str
|
|
42
|
+
kind: str
|
|
43
|
+
client: str
|
|
44
|
+
model: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
_SCHEMA = """
|
|
48
|
+
CREATE TABLE IF NOT EXISTS call_identities (
|
|
49
|
+
execution_key TEXT PRIMARY KEY,
|
|
50
|
+
kind TEXT NOT NULL,
|
|
51
|
+
client TEXT NOT NULL,
|
|
52
|
+
model TEXT NOT NULL,
|
|
53
|
+
effort TEXT NOT NULL,
|
|
54
|
+
identity_json TEXT NOT NULL
|
|
55
|
+
);
|
|
56
|
+
CREATE TABLE IF NOT EXISTS executions (
|
|
57
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
58
|
+
execution_key TEXT NOT NULL,
|
|
59
|
+
kind TEXT NOT NULL,
|
|
60
|
+
state TEXT NOT NULL,
|
|
61
|
+
output_persisted INTEGER NOT NULL,
|
|
62
|
+
superseded_at TEXT,
|
|
63
|
+
failure_reason TEXT,
|
|
64
|
+
failure_message TEXT,
|
|
65
|
+
failure_exit_code INTEGER,
|
|
66
|
+
created_at TEXT NOT NULL
|
|
67
|
+
);
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_executions_key ON executions(execution_key);
|
|
69
|
+
CREATE TABLE IF NOT EXISTS artifacts (
|
|
70
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
71
|
+
execution_id INTEGER NOT NULL,
|
|
72
|
+
artifact_type TEXT NOT NULL,
|
|
73
|
+
name TEXT,
|
|
74
|
+
encoding TEXT NOT NULL,
|
|
75
|
+
blob_key TEXT NOT NULL,
|
|
76
|
+
size_bytes INTEGER NOT NULL
|
|
77
|
+
);
|
|
78
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_execution ON artifacts(execution_id);
|
|
79
|
+
CREATE TABLE IF NOT EXISTS token_usage (
|
|
80
|
+
execution_id INTEGER PRIMARY KEY,
|
|
81
|
+
input_tokens INTEGER,
|
|
82
|
+
output_tokens INTEGER,
|
|
83
|
+
cache_read_tokens INTEGER,
|
|
84
|
+
cache_write_tokens INTEGER,
|
|
85
|
+
reasoning_tokens INTEGER,
|
|
86
|
+
cost_usd REAL,
|
|
87
|
+
raw_json TEXT NOT NULL
|
|
88
|
+
);
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class SqliteExecutionRepository(ExecutionRepositoryPort):
|
|
93
|
+
"""A durable, append-only execution store over SQLite.
|
|
94
|
+
|
|
95
|
+
The hybrid identity persistence (domain-model §3): the queryable fields are
|
|
96
|
+
real columns; the divergent identity fields ride in a JSON column. Executions
|
|
97
|
+
are append-only — many per key — and a servable success atomically supersedes
|
|
98
|
+
the prior current one inside a single transaction. The store holds structure
|
|
99
|
+
only; artifact bytes live in the blob store, so reconstructed artifacts are
|
|
100
|
+
dehydrated (content is None). The clock is injected and stamps supersession.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(self, path: Path, clock: ClockPort) -> None:
|
|
104
|
+
self._path = Path(path)
|
|
105
|
+
self._clock = clock
|
|
106
|
+
self._ensure_schema()
|
|
107
|
+
|
|
108
|
+
def _connect(self) -> sqlite3.Connection:
|
|
109
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
return sqlite3.connect(self._path)
|
|
111
|
+
|
|
112
|
+
def _ensure_schema(self) -> None:
|
|
113
|
+
connection = self._connect()
|
|
114
|
+
try:
|
|
115
|
+
connection.executescript(_SCHEMA)
|
|
116
|
+
connection.commit()
|
|
117
|
+
finally:
|
|
118
|
+
connection.close()
|
|
119
|
+
|
|
120
|
+
# -- reads ------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
def find_current(self, execution_key: str) -> Optional[MlExecution]:
|
|
123
|
+
connection = self._connect()
|
|
124
|
+
try:
|
|
125
|
+
row = connection.execute(
|
|
126
|
+
f"SELECT {_EXECUTION_COLUMNS} FROM executions WHERE execution_key = ? "
|
|
127
|
+
"AND state = ? AND output_persisted = 1 AND superseded_at IS NULL "
|
|
128
|
+
"ORDER BY id DESC LIMIT 1",
|
|
129
|
+
(execution_key, ExecutionState.SUCCESS.value),
|
|
130
|
+
).fetchone()
|
|
131
|
+
return self._load_execution(connection, row) if row is not None else None
|
|
132
|
+
finally:
|
|
133
|
+
connection.close()
|
|
134
|
+
|
|
135
|
+
def find_all(self, execution_key: str) -> List[MlExecution]:
|
|
136
|
+
connection = self._connect()
|
|
137
|
+
try:
|
|
138
|
+
rows = connection.execute(
|
|
139
|
+
f"SELECT {_EXECUTION_COLUMNS} FROM executions WHERE execution_key = ? ORDER BY id",
|
|
140
|
+
(execution_key,),
|
|
141
|
+
).fetchall()
|
|
142
|
+
return [self._load_execution(connection, row) for row in rows]
|
|
143
|
+
finally:
|
|
144
|
+
connection.close()
|
|
145
|
+
|
|
146
|
+
# -- reporting (concrete; beyond the use-case port) -------------------
|
|
147
|
+
|
|
148
|
+
def current_execution_summaries(self) -> List["ExecutionSummary"]:
|
|
149
|
+
"""A uniform reporting view of the current (servable) executions: key,
|
|
150
|
+
kind, and the denormalized client/model — across all identity kinds."""
|
|
151
|
+
connection = self._connect()
|
|
152
|
+
try:
|
|
153
|
+
rows = connection.execute(
|
|
154
|
+
"SELECT e.execution_key, e.kind, i.client, i.model FROM executions e "
|
|
155
|
+
"JOIN call_identities i ON i.execution_key = e.execution_key "
|
|
156
|
+
"WHERE e.state = ? AND e.output_persisted = 1 AND e.superseded_at IS NULL "
|
|
157
|
+
"ORDER BY e.id",
|
|
158
|
+
(ExecutionState.SUCCESS.value,),
|
|
159
|
+
).fetchall()
|
|
160
|
+
return [
|
|
161
|
+
ExecutionSummary(execution_key=key, kind=kind, client=client, model=model)
|
|
162
|
+
for (key, kind, client, model) in rows
|
|
163
|
+
]
|
|
164
|
+
finally:
|
|
165
|
+
connection.close()
|
|
166
|
+
|
|
167
|
+
def find_current_by_key_prefix(self, key_prefix: str) -> List[MlExecution]:
|
|
168
|
+
"""The current executions whose key starts with ``key_prefix`` (so a short
|
|
169
|
+
key from ``list`` is enough to ``inspect``)."""
|
|
170
|
+
connection = self._connect()
|
|
171
|
+
try:
|
|
172
|
+
rows = connection.execute(
|
|
173
|
+
f"SELECT {_EXECUTION_COLUMNS} FROM executions WHERE execution_key LIKE ? "
|
|
174
|
+
"AND state = ? AND output_persisted = 1 AND superseded_at IS NULL ORDER BY id",
|
|
175
|
+
(key_prefix + "%", ExecutionState.SUCCESS.value),
|
|
176
|
+
).fetchall()
|
|
177
|
+
return [self._load_execution(connection, row) for row in rows]
|
|
178
|
+
finally:
|
|
179
|
+
connection.close()
|
|
180
|
+
|
|
181
|
+
# -- write ------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def save(self, execution: MlExecution) -> None:
|
|
184
|
+
execution_key = execution.call_identity.generate_key()
|
|
185
|
+
stamped_at = self._clock.now()
|
|
186
|
+
connection = self._connect()
|
|
187
|
+
try:
|
|
188
|
+
self._upsert_identity(connection, execution_key, execution.call_identity)
|
|
189
|
+
if self._is_servable(execution):
|
|
190
|
+
self._supersede_prior_current(connection, execution_key, stamped_at)
|
|
191
|
+
execution_id = self._insert_execution(connection, execution_key, execution, stamped_at)
|
|
192
|
+
self._insert_artifacts(connection, execution_id, execution.artifacts)
|
|
193
|
+
self._insert_token_usage(connection, execution_id, execution.token_usage)
|
|
194
|
+
connection.commit()
|
|
195
|
+
finally:
|
|
196
|
+
connection.close()
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _upsert_identity(
|
|
200
|
+
connection: sqlite3.Connection, execution_key: str, identity: CallIdentity
|
|
201
|
+
) -> None:
|
|
202
|
+
serialized = serialize_identity(identity)
|
|
203
|
+
connection.execute(
|
|
204
|
+
"INSERT OR IGNORE INTO call_identities "
|
|
205
|
+
"(execution_key, kind, client, model, effort, identity_json) VALUES (?, ?, ?, ?, ?, ?)",
|
|
206
|
+
(
|
|
207
|
+
execution_key,
|
|
208
|
+
serialized.kind,
|
|
209
|
+
serialized.client,
|
|
210
|
+
serialized.model,
|
|
211
|
+
serialized.effort,
|
|
212
|
+
serialized.identity_json,
|
|
213
|
+
),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def _supersede_prior_current(
|
|
218
|
+
connection: sqlite3.Connection, execution_key: str, stamped_at: datetime
|
|
219
|
+
) -> None:
|
|
220
|
+
connection.execute(
|
|
221
|
+
"UPDATE executions SET superseded_at = ? WHERE execution_key = ? "
|
|
222
|
+
"AND state = ? AND output_persisted = 1 AND superseded_at IS NULL",
|
|
223
|
+
(stamped_at.isoformat(), execution_key, ExecutionState.SUCCESS.value),
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _insert_execution(
|
|
228
|
+
connection: sqlite3.Connection,
|
|
229
|
+
execution_key: str,
|
|
230
|
+
execution: MlExecution,
|
|
231
|
+
stamped_at: datetime,
|
|
232
|
+
) -> int:
|
|
233
|
+
failure = execution.failure
|
|
234
|
+
cursor = connection.execute(
|
|
235
|
+
"INSERT INTO executions (execution_key, kind, state, output_persisted, superseded_at, "
|
|
236
|
+
"failure_reason, failure_message, failure_exit_code, created_at) "
|
|
237
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
238
|
+
(
|
|
239
|
+
execution_key,
|
|
240
|
+
execution.execution_kind.value,
|
|
241
|
+
execution.execution_state.value,
|
|
242
|
+
1 if execution.output_persisted else 0,
|
|
243
|
+
execution.superseded_at.isoformat() if execution.superseded_at else None,
|
|
244
|
+
failure.reason.value if failure else None,
|
|
245
|
+
failure.message if failure else None,
|
|
246
|
+
failure.exit_code if failure else None,
|
|
247
|
+
stamped_at.isoformat(),
|
|
248
|
+
),
|
|
249
|
+
)
|
|
250
|
+
return int(cursor.lastrowid)
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def _insert_artifacts(
|
|
254
|
+
connection: sqlite3.Connection, execution_id: int, artifacts: List[Artifact]
|
|
255
|
+
) -> None:
|
|
256
|
+
for artifact in artifacts:
|
|
257
|
+
connection.execute(
|
|
258
|
+
"INSERT INTO artifacts (execution_id, artifact_type, name, encoding, blob_key, "
|
|
259
|
+
"size_bytes) VALUES (?, ?, ?, ?, ?, ?)",
|
|
260
|
+
(
|
|
261
|
+
execution_id,
|
|
262
|
+
artifact.artifact_type.value,
|
|
263
|
+
artifact.name,
|
|
264
|
+
artifact.encoding,
|
|
265
|
+
artifact.blob_key,
|
|
266
|
+
artifact.size_bytes,
|
|
267
|
+
),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
def _insert_token_usage(
|
|
272
|
+
connection: sqlite3.Connection, execution_id: int, token_usage: Optional[TokenUsage]
|
|
273
|
+
) -> None:
|
|
274
|
+
if token_usage is None:
|
|
275
|
+
return
|
|
276
|
+
connection.execute(
|
|
277
|
+
"INSERT INTO token_usage (execution_id, input_tokens, output_tokens, cache_read_tokens, "
|
|
278
|
+
"cache_write_tokens, reasoning_tokens, cost_usd, raw_json) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
|
279
|
+
(
|
|
280
|
+
execution_id,
|
|
281
|
+
token_usage.input_tokens,
|
|
282
|
+
token_usage.output_tokens,
|
|
283
|
+
token_usage.cache_read_tokens,
|
|
284
|
+
token_usage.cache_write_tokens,
|
|
285
|
+
token_usage.reasoning_tokens,
|
|
286
|
+
token_usage.cost_usd,
|
|
287
|
+
json.dumps(token_usage.raw),
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# -- reconstruction ---------------------------------------------------
|
|
292
|
+
|
|
293
|
+
def _load_execution(self, connection: sqlite3.Connection, row: tuple) -> MlExecution:
|
|
294
|
+
(
|
|
295
|
+
execution_id,
|
|
296
|
+
execution_key,
|
|
297
|
+
kind,
|
|
298
|
+
state,
|
|
299
|
+
output_persisted,
|
|
300
|
+
superseded_at,
|
|
301
|
+
failure_reason,
|
|
302
|
+
failure_message,
|
|
303
|
+
failure_exit_code,
|
|
304
|
+
) = row
|
|
305
|
+
return MlExecution(
|
|
306
|
+
call_identity=self._load_identity(connection, execution_key),
|
|
307
|
+
execution_state=ExecutionState(state),
|
|
308
|
+
execution_kind=ExecutionKind(kind),
|
|
309
|
+
output_persisted=bool(output_persisted),
|
|
310
|
+
artifacts=self._load_artifacts(connection, execution_id),
|
|
311
|
+
token_usage=self._load_token_usage(connection, execution_id),
|
|
312
|
+
failure=(
|
|
313
|
+
ExecutionFailure(
|
|
314
|
+
reason=FailureReason(failure_reason),
|
|
315
|
+
message=failure_message,
|
|
316
|
+
exit_code=failure_exit_code,
|
|
317
|
+
)
|
|
318
|
+
if failure_reason is not None
|
|
319
|
+
else None
|
|
320
|
+
),
|
|
321
|
+
superseded_at=datetime.fromisoformat(superseded_at) if superseded_at else None,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
@staticmethod
|
|
325
|
+
def _load_identity(connection: sqlite3.Connection, execution_key: str) -> CallIdentity:
|
|
326
|
+
kind, client, model, effort, identity_json = connection.execute(
|
|
327
|
+
"SELECT kind, client, model, effort, identity_json FROM call_identities "
|
|
328
|
+
"WHERE execution_key = ?",
|
|
329
|
+
(execution_key,),
|
|
330
|
+
).fetchone()
|
|
331
|
+
return deserialize_identity(
|
|
332
|
+
SerializedIdentity(
|
|
333
|
+
kind=kind, client=client, model=model, effort=effort, identity_json=identity_json
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
@staticmethod
|
|
338
|
+
def _load_artifacts(connection: sqlite3.Connection, execution_id: int) -> List[Artifact]:
|
|
339
|
+
rows = connection.execute(
|
|
340
|
+
"SELECT artifact_type, name, encoding, blob_key, size_bytes FROM artifacts "
|
|
341
|
+
"WHERE execution_id = ? ORDER BY id",
|
|
342
|
+
(execution_id,),
|
|
343
|
+
).fetchall()
|
|
344
|
+
return [
|
|
345
|
+
Artifact(
|
|
346
|
+
artifact_type=ArtifactType(artifact_type),
|
|
347
|
+
blob_key=blob_key,
|
|
348
|
+
size_bytes=size_bytes,
|
|
349
|
+
name=name,
|
|
350
|
+
encoding=encoding,
|
|
351
|
+
content=None,
|
|
352
|
+
)
|
|
353
|
+
for (artifact_type, name, encoding, blob_key, size_bytes) in rows
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
@staticmethod
|
|
357
|
+
def _load_token_usage(
|
|
358
|
+
connection: sqlite3.Connection, execution_id: int
|
|
359
|
+
) -> Optional[TokenUsage]:
|
|
360
|
+
row = connection.execute(
|
|
361
|
+
"SELECT input_tokens, output_tokens, cache_read_tokens, cache_write_tokens, "
|
|
362
|
+
"reasoning_tokens, cost_usd, raw_json FROM token_usage WHERE execution_id = ?",
|
|
363
|
+
(execution_id,),
|
|
364
|
+
).fetchone()
|
|
365
|
+
if row is None:
|
|
366
|
+
return None
|
|
367
|
+
(
|
|
368
|
+
input_tokens,
|
|
369
|
+
output_tokens,
|
|
370
|
+
cache_read_tokens,
|
|
371
|
+
cache_write_tokens,
|
|
372
|
+
reasoning_tokens,
|
|
373
|
+
cost_usd,
|
|
374
|
+
raw_json,
|
|
375
|
+
) = row
|
|
376
|
+
return TokenUsage(
|
|
377
|
+
input_tokens=input_tokens,
|
|
378
|
+
output_tokens=output_tokens,
|
|
379
|
+
cache_read_tokens=cache_read_tokens,
|
|
380
|
+
cache_write_tokens=cache_write_tokens,
|
|
381
|
+
reasoning_tokens=reasoning_tokens,
|
|
382
|
+
cost_usd=cost_usd,
|
|
383
|
+
raw=json.loads(raw_json),
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
@staticmethod
|
|
387
|
+
def _is_servable(execution: MlExecution) -> bool:
|
|
388
|
+
return (
|
|
389
|
+
execution.execution_state is ExecutionState.SUCCESS
|
|
390
|
+
and execution.output_persisted
|
|
391
|
+
and execution.superseded_at is None
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
_EXECUTION_COLUMNS = (
|
|
396
|
+
"id, execution_key, kind, state, output_persisted, superseded_at, "
|
|
397
|
+
"failure_reason, failure_message, failure_exit_code"
|
|
398
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""FilesystemBlobStore: opaque artifact bytes addressed by content key on disk."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from generic_ml_cache_core.application.port.out.blob_store_port import BlobStorePort
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FilesystemBlobStore(BlobStorePort):
|
|
15
|
+
"""A directory of content-addressed blobs, one file per key.
|
|
16
|
+
|
|
17
|
+
Dumb by construction: it stores and returns opaque bytes by key and never
|
|
18
|
+
parses, computes a key, or interprets content. Writes are atomic (a unique
|
|
19
|
+
temp file in the same directory, then ``os.replace``), so a crash mid-write
|
|
20
|
+
never leaves a half-written blob.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, root: Path) -> None:
|
|
24
|
+
self._root = Path(root)
|
|
25
|
+
|
|
26
|
+
def _path_for(self, key: str) -> Path:
|
|
27
|
+
return self._root / key
|
|
28
|
+
|
|
29
|
+
def get(self, key: str) -> Optional[bytes]:
|
|
30
|
+
path = self._path_for(key)
|
|
31
|
+
if not path.exists():
|
|
32
|
+
return None
|
|
33
|
+
return path.read_bytes()
|
|
34
|
+
|
|
35
|
+
def put(self, key: str, output: bytes) -> None:
|
|
36
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
path = self._path_for(key)
|
|
38
|
+
temp_path = path.with_name(f"{path.name}.{os.getpid()}.tmp")
|
|
39
|
+
try:
|
|
40
|
+
temp_path.write_bytes(output)
|
|
41
|
+
os.replace(temp_path, path)
|
|
42
|
+
except BaseException:
|
|
43
|
+
temp_path.unlink(missing_ok=True)
|
|
44
|
+
raise
|
|
45
|
+
|
|
46
|
+
def remove(self, key: str) -> None:
|
|
47
|
+
self._path_for(key).unlink(missing_ok=True)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""ClientStatus."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ClientStatus:
|
|
11
|
+
"""What discovery found for one client. Purely informational."""
|
|
12
|
+
|
|
13
|
+
name: str
|
|
14
|
+
present: bool
|
|
15
|
+
executable: Optional[str] = None # resolved path, when present
|
|
16
|
+
version: Optional[str] = None # first line of `--version`, best-effort
|
|
17
|
+
detail: Optional[str] = None # why it's absent, or why version is unknown
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Artifact and ArtifactType."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
_UTF8 = "utf-8"
|
|
12
|
+
_BINARY = "binary"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ArtifactType(enum.Enum):
|
|
16
|
+
"""The kind of generated output an Artifact holds.
|
|
17
|
+
|
|
18
|
+
RAW_USAGE is reserved for a later step (the raw client usage block stored as
|
|
19
|
+
its own artifact); today raw usage still rides on TokenUsage.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
STDOUT = "stdout"
|
|
23
|
+
STDERR = "stderr"
|
|
24
|
+
OUTPUT_FILE = "output_file"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class Artifact:
|
|
29
|
+
"""One generated document of an execution's output.
|
|
30
|
+
|
|
31
|
+
An artifact is a STORED thing: it always has a ``blob_key`` (the content
|
|
32
|
+
checksum addressing its bytes in the blob store). ``content`` is materialised
|
|
33
|
+
only when the artifact is hydrated; dehydrated, only the reference remains.
|
|
34
|
+
The use case — never the client runner — computes the key and stores the
|
|
35
|
+
bytes; this object just records the result.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
artifact_type: ArtifactType
|
|
39
|
+
blob_key: str
|
|
40
|
+
size_bytes: int
|
|
41
|
+
name: Optional[str] = None
|
|
42
|
+
encoding: str = _UTF8
|
|
43
|
+
content: Optional[bytes] = None
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_content(
|
|
47
|
+
cls,
|
|
48
|
+
artifact_type: ArtifactType,
|
|
49
|
+
blob_key: str,
|
|
50
|
+
content: bytes,
|
|
51
|
+
name: Optional[str] = None,
|
|
52
|
+
) -> "Artifact":
|
|
53
|
+
"""Build a hydrated artifact from its bytes, deriving size and encoding.
|
|
54
|
+
|
|
55
|
+
The caller has already computed ``blob_key`` and stored the bytes; this
|
|
56
|
+
only assembles the value object from the content it owns.
|
|
57
|
+
"""
|
|
58
|
+
return cls(
|
|
59
|
+
artifact_type=artifact_type,
|
|
60
|
+
blob_key=blob_key,
|
|
61
|
+
size_bytes=len(content),
|
|
62
|
+
name=name,
|
|
63
|
+
encoding=cls._encoding_for(content),
|
|
64
|
+
content=content,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _encoding_for(content: bytes) -> str:
|
|
69
|
+
try:
|
|
70
|
+
content.decode(_UTF8)
|
|
71
|
+
return _UTF8
|
|
72
|
+
except UnicodeDecodeError:
|
|
73
|
+
return _BINARY
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def is_hydrated(self) -> bool:
|
|
77
|
+
"""True when the artifact's bytes are materialised in memory."""
|
|
78
|
+
return self.content is not None
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ExecutionFailure and FailureReason."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FailureReason(enum.Enum):
|
|
13
|
+
"""Why a run failed. Starts minimal; grows as features land (TIMEOUT,
|
|
14
|
+
NETWORK, CLIENT_ERROR, …)."""
|
|
15
|
+
|
|
16
|
+
NONZERO_EXIT = "nonzero_exit"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class ExecutionFailure:
|
|
21
|
+
"""The interpreted cause of a failed run — present only when the execution
|
|
22
|
+
state is FAILED.
|
|
23
|
+
|
|
24
|
+
Separate from stderr (captured output, an Artifact): this is *why* it failed.
|
|
25
|
+
It generalises across local and API executions — ``exit_code`` is the local
|
|
26
|
+
client's code when that is the cause, and ``None`` for an API failure whose
|
|
27
|
+
cause has no exit code.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
reason: FailureReason
|
|
31
|
+
message: str
|
|
32
|
+
exit_code: Optional[int] = None
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ExecutionKind."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ExecutionKind(enum.Enum):
|
|
11
|
+
"""The type of an MlExecution — how gmlcache handles it.
|
|
12
|
+
|
|
13
|
+
LOCAL_MANAGED -- gmlcache launches the client in an isolated temporary
|
|
14
|
+
folder, manages grants, captures generated files, and
|
|
15
|
+
computes fingerprints. Full execution model.
|
|
16
|
+
LOCAL_PASSTHROUGH -- gmlcache is a thin wrapper: raw native arguments are
|
|
17
|
+
passed verbatim to the client in the caller's folder.
|
|
18
|
+
No isolation, no grant management, no file capture.
|
|
19
|
+
stdout/stderr/exit can still be cached.
|
|
20
|
+
API -- gmlcache calls an ML provider API directly. No local
|
|
21
|
+
client executable, no filesystem isolation.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
LOCAL_MANAGED = "local_managed"
|
|
25
|
+
LOCAL_PASSTHROUGH = "local_passthrough"
|
|
26
|
+
API = "api"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""ExecutionState."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ExecutionState(enum.Enum):
|
|
11
|
+
"""Lifecycle state of an MlExecution.
|
|
12
|
+
|
|
13
|
+
Transitions: IN_PROGRESS -> SUCCESS | FAILED.
|
|
14
|
+
|
|
15
|
+
PASSTHROUGH is not a state — it is an ExecutionKind. A passthrough
|
|
16
|
+
execution has the same IN_PROGRESS -> SUCCESS | FAILED lifecycle.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
IN_PROGRESS = "in_progress"
|
|
20
|
+
SUCCESS = "success"
|
|
21
|
+
FAILED = "failed"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Daniel Slobozian
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""MlExecution."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from generic_ml_cache_core.application.domain.model.execution.artifact import Artifact
|
|
12
|
+
from generic_ml_cache_core.application.domain.model.identity.call_identity import CallIdentity
|
|
13
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_failure import (
|
|
14
|
+
ExecutionFailure,
|
|
15
|
+
)
|
|
16
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_kind import ExecutionKind
|
|
17
|
+
from generic_ml_cache_core.application.domain.model.execution.execution_state import ExecutionState
|
|
18
|
+
from generic_ml_cache_core.application.domain.model.usage.token_usage import TokenUsage
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class MlExecution:
|
|
23
|
+
"""Aggregate root: a demand to run an ML client and what came back.
|
|
24
|
+
|
|
25
|
+
The run lifecycle is ``execution_state`` (IN_PROGRESS -> SUCCESS | FAILED).
|
|
26
|
+
The output is a list of ``Artifact`` (stdout, stderr, output files) — there
|
|
27
|
+
is no separate output object and no top-level exit code. A failure's cause
|
|
28
|
+
lives in ``failure`` (present only when FAILED). ``superseded_at`` is the
|
|
29
|
+
cache-currency axis (None = current, set = stale); executions are append-only
|
|
30
|
+
per call identity. ``artifacts`` may be dehydrated (refs only) or hydrated
|
|
31
|
+
(bytes materialised).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
call_identity: CallIdentity
|
|
35
|
+
execution_state: ExecutionState
|
|
36
|
+
execution_kind: ExecutionKind
|
|
37
|
+
output_persisted: bool
|
|
38
|
+
artifacts: List[Artifact] = field(default_factory=list)
|
|
39
|
+
token_usage: Optional[TokenUsage] = None
|
|
40
|
+
failure: Optional[ExecutionFailure] = None
|
|
41
|
+
superseded_at: Optional[datetime] = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Hexagonal layer package."""
|