expops 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expops-0.1.3.dist-info/METADATA +826 -0
- expops-0.1.3.dist-info/RECORD +86 -0
- expops-0.1.3.dist-info/WHEEL +5 -0
- expops-0.1.3.dist-info/entry_points.txt +3 -0
- expops-0.1.3.dist-info/licenses/LICENSE +674 -0
- expops-0.1.3.dist-info/top_level.txt +1 -0
- mlops/__init__.py +0 -0
- mlops/__main__.py +11 -0
- mlops/_version.py +34 -0
- mlops/adapters/__init__.py +12 -0
- mlops/adapters/base.py +86 -0
- mlops/adapters/config_schema.py +89 -0
- mlops/adapters/custom/__init__.py +3 -0
- mlops/adapters/custom/custom_adapter.py +447 -0
- mlops/adapters/plugin_manager.py +113 -0
- mlops/adapters/sklearn/__init__.py +3 -0
- mlops/adapters/sklearn/adapter.py +94 -0
- mlops/cluster/__init__.py +3 -0
- mlops/cluster/controller.py +496 -0
- mlops/cluster/process_runner.py +91 -0
- mlops/cluster/providers.py +258 -0
- mlops/core/__init__.py +95 -0
- mlops/core/custom_model_base.py +38 -0
- mlops/core/dask_networkx_executor.py +1265 -0
- mlops/core/executor_worker.py +1239 -0
- mlops/core/experiment_tracker.py +81 -0
- mlops/core/graph_types.py +64 -0
- mlops/core/networkx_parser.py +135 -0
- mlops/core/payload_spill.py +278 -0
- mlops/core/pipeline_utils.py +162 -0
- mlops/core/process_hashing.py +216 -0
- mlops/core/step_state_manager.py +1298 -0
- mlops/core/step_system.py +956 -0
- mlops/core/workspace.py +99 -0
- mlops/environment/__init__.py +10 -0
- mlops/environment/base.py +43 -0
- mlops/environment/conda_manager.py +307 -0
- mlops/environment/factory.py +70 -0
- mlops/environment/pyenv_manager.py +146 -0
- mlops/environment/setup_env.py +31 -0
- mlops/environment/system_manager.py +66 -0
- mlops/environment/utils.py +105 -0
- mlops/environment/venv_manager.py +134 -0
- mlops/main.py +527 -0
- mlops/managers/project_manager.py +400 -0
- mlops/managers/reproducibility_manager.py +575 -0
- mlops/platform.py +996 -0
- mlops/reporting/__init__.py +16 -0
- mlops/reporting/context.py +187 -0
- mlops/reporting/entrypoint.py +292 -0
- mlops/reporting/kv_utils.py +77 -0
- mlops/reporting/registry.py +50 -0
- mlops/runtime/__init__.py +9 -0
- mlops/runtime/context.py +34 -0
- mlops/runtime/env_export.py +113 -0
- mlops/storage/__init__.py +12 -0
- mlops/storage/adapters/__init__.py +9 -0
- mlops/storage/adapters/gcp_kv_store.py +778 -0
- mlops/storage/adapters/gcs_object_store.py +96 -0
- mlops/storage/adapters/memory_store.py +240 -0
- mlops/storage/adapters/redis_store.py +438 -0
- mlops/storage/factory.py +199 -0
- mlops/storage/interfaces/__init__.py +6 -0
- mlops/storage/interfaces/kv_store.py +118 -0
- mlops/storage/path_utils.py +38 -0
- mlops/templates/premier-league/charts/plot_metrics.js +70 -0
- mlops/templates/premier-league/charts/plot_metrics.py +145 -0
- mlops/templates/premier-league/charts/requirements.txt +6 -0
- mlops/templates/premier-league/configs/cluster_config.yaml +13 -0
- mlops/templates/premier-league/configs/project_config.yaml +207 -0
- mlops/templates/premier-league/data/England CSV.csv +12154 -0
- mlops/templates/premier-league/models/premier_league_model.py +638 -0
- mlops/templates/premier-league/requirements.txt +8 -0
- mlops/templates/sklearn-basic/README.md +22 -0
- mlops/templates/sklearn-basic/charts/plot_metrics.py +85 -0
- mlops/templates/sklearn-basic/charts/requirements.txt +3 -0
- mlops/templates/sklearn-basic/configs/project_config.yaml +64 -0
- mlops/templates/sklearn-basic/data/train.csv +14 -0
- mlops/templates/sklearn-basic/models/model.py +62 -0
- mlops/templates/sklearn-basic/requirements.txt +10 -0
- mlops/web/__init__.py +3 -0
- mlops/web/server.py +585 -0
- mlops/web/ui/index.html +52 -0
- mlops/web/ui/mlops-charts.js +357 -0
- mlops/web/ui/script.js +1244 -0
- mlops/web/ui/styles.css +248 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
import numbers
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import redis # type: ignore
|
|
11
|
+
except Exception: # pragma: no cover - optional dependency
|
|
12
|
+
redis = None # type: ignore
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from ..interfaces.kv_store import KeyValueEventStore
|
|
16
|
+
from ..path_utils import encode_probe_path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RedisStore(KeyValueEventStore):
|
|
20
|
+
"""Lightweight Redis wrapper for pipeline state, cache lookup, metrics, and events.
|
|
21
|
+
|
|
22
|
+
Key design:
|
|
23
|
+
- Namespaced keys per project: prefix = f"mlops:projects:{project_id}"
|
|
24
|
+
- Step cache index key (exact-hash match):
|
|
25
|
+
f"{prefix}:steps:{process}:{step}:idx:{input_hash}:{config_hash}:{function_hash or 'none'}" -> JSON(record)
|
|
26
|
+
- Process cache index key (exact-hash match):
|
|
27
|
+
f"{prefix}:process:{process}:idx:{input_hash}:{config_hash}:{function_hash or 'none'}" -> JSON(record)
|
|
28
|
+
- Pipeline execution status:
|
|
29
|
+
f"{prefix}:runs:{run_id}:status" -> "running|completed|failed"
|
|
30
|
+
f"{prefix}:runs:{run_id}:timestamps" -> JSON({start,end})
|
|
31
|
+
- Metrics (optional):
|
|
32
|
+
f"{prefix}:runs:{run_id}:metrics" -> JSON(flat metrics)
|
|
33
|
+
- Events channel:
|
|
34
|
+
channel = f"{prefix}:events"
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
project_id: str,
|
|
40
|
+
host: Optional[str] = None,
|
|
41
|
+
port: Optional[int] = None,
|
|
42
|
+
db: Optional[int] = None,
|
|
43
|
+
password: Optional[str] = None,
|
|
44
|
+
namespace_prefix: str = "mlops:projects",
|
|
45
|
+
connection_timeout: float = 1.0,
|
|
46
|
+
) -> None:
|
|
47
|
+
if redis is None:
|
|
48
|
+
raise RuntimeError("redis-py not installed. Please add 'redis' to dependencies.")
|
|
49
|
+
|
|
50
|
+
self.project_id = project_id
|
|
51
|
+
self.prefix = f"{namespace_prefix}:{project_id}"
|
|
52
|
+
self.channel = f"{self.prefix}:events"
|
|
53
|
+
|
|
54
|
+
self.host = host or os.getenv("MLOPS_REDIS_HOST", "127.0.0.1")
|
|
55
|
+
self.port = int(port or os.getenv("MLOPS_REDIS_PORT", "6379"))
|
|
56
|
+
self.db = int(db or os.getenv("MLOPS_REDIS_DB", "0"))
|
|
57
|
+
self.password = password or os.getenv("MLOPS_REDIS_PASSWORD", None)
|
|
58
|
+
|
|
59
|
+
self.client = redis.Redis(
|
|
60
|
+
host=self.host,
|
|
61
|
+
port=self.port,
|
|
62
|
+
db=self.db,
|
|
63
|
+
password=self.password,
|
|
64
|
+
socket_connect_timeout=connection_timeout,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Eagerly validate connection; fail fast to allow fallback
|
|
68
|
+
self.client.ping()
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def required_env(config: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
|
|
72
|
+
"""Redis requires no SDK envs, but allow optional host/port/db/password env passthrough."""
|
|
73
|
+
envs: Dict[str, str] = {}
|
|
74
|
+
try:
|
|
75
|
+
for key in ("MLOPS_REDIS_HOST", "MLOPS_REDIS_PORT", "MLOPS_REDIS_DB", "MLOPS_REDIS_PASSWORD"):
|
|
76
|
+
val = os.environ.get(key)
|
|
77
|
+
if val:
|
|
78
|
+
envs[key] = val
|
|
79
|
+
except Exception:
|
|
80
|
+
pass
|
|
81
|
+
return envs
|
|
82
|
+
|
|
83
|
+
# -------------------- Helpers --------------------
|
|
84
|
+
def _json_set(self, key: str, value: Dict[str, Any], ttl_seconds: Optional[int] = None) -> None:
|
|
85
|
+
payload = json.dumps(value, default=str)
|
|
86
|
+
self.client.set(key, payload)
|
|
87
|
+
if ttl_seconds and ttl_seconds > 0:
|
|
88
|
+
self.client.expire(key, ttl_seconds)
|
|
89
|
+
|
|
90
|
+
def _json_get(self, key: str) -> Optional[Dict[str, Any]]:
|
|
91
|
+
data = self.client.get(key)
|
|
92
|
+
if not data:
|
|
93
|
+
return None
|
|
94
|
+
try:
|
|
95
|
+
if isinstance(data, (bytes, bytearray)):
|
|
96
|
+
data = data.decode("utf-8")
|
|
97
|
+
return json.loads(data)
|
|
98
|
+
except Exception:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
# -------------------- Cache indices --------------------
|
|
102
|
+
def set_step_cache_record(
|
|
103
|
+
self,
|
|
104
|
+
process_name: str,
|
|
105
|
+
step_name: str,
|
|
106
|
+
input_hash: str,
|
|
107
|
+
config_hash: str,
|
|
108
|
+
function_hash: Optional[str],
|
|
109
|
+
record: Dict[str, Any],
|
|
110
|
+
ttl_seconds: Optional[int] = None,
|
|
111
|
+
) -> None:
|
|
112
|
+
fhash = function_hash or "none"
|
|
113
|
+
key = f"{self.prefix}:steps:{process_name}:{step_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
114
|
+
self._json_set(key, record, ttl_seconds)
|
|
115
|
+
|
|
116
|
+
def get_step_cache_path(
|
|
117
|
+
self,
|
|
118
|
+
process_name: str,
|
|
119
|
+
step_name: str,
|
|
120
|
+
input_hash: Optional[str],
|
|
121
|
+
config_hash: Optional[str],
|
|
122
|
+
function_hash: Optional[str],
|
|
123
|
+
) -> Optional[str]:
|
|
124
|
+
# Only strict-hash lookups are supported in Redis backend to keep operations O(1)
|
|
125
|
+
if not input_hash or not config_hash:
|
|
126
|
+
return None
|
|
127
|
+
fhash = function_hash or "none"
|
|
128
|
+
key = f"{self.prefix}:steps:{process_name}:{step_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
129
|
+
rec = self._json_get(key)
|
|
130
|
+
if not rec:
|
|
131
|
+
return None
|
|
132
|
+
if rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
|
|
133
|
+
return rec["cache_path"]
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
def get_step_cache_record(
|
|
137
|
+
self,
|
|
138
|
+
process_name: str,
|
|
139
|
+
step_name: str,
|
|
140
|
+
input_hash: Optional[str],
|
|
141
|
+
config_hash: Optional[str],
|
|
142
|
+
function_hash: Optional[str],
|
|
143
|
+
) -> Optional[Dict[str, Any]]:
|
|
144
|
+
if not input_hash or not config_hash:
|
|
145
|
+
return None
|
|
146
|
+
fhash = function_hash or "none"
|
|
147
|
+
key = f"{self.prefix}:steps:{process_name}:{step_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
148
|
+
return self._json_get(key)
|
|
149
|
+
|
|
150
|
+
def set_process_cache_record(
|
|
151
|
+
self,
|
|
152
|
+
process_name: str,
|
|
153
|
+
input_hash: str,
|
|
154
|
+
config_hash: str,
|
|
155
|
+
function_hash: Optional[str],
|
|
156
|
+
record: Dict[str, Any],
|
|
157
|
+
ttl_seconds: Optional[int] = None,
|
|
158
|
+
) -> None:
|
|
159
|
+
fhash = function_hash or "none"
|
|
160
|
+
key = f"{self.prefix}:process:{process_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
161
|
+
self._json_set(key, record, ttl_seconds)
|
|
162
|
+
|
|
163
|
+
def get_process_cache_path(
|
|
164
|
+
self,
|
|
165
|
+
process_name: str,
|
|
166
|
+
input_hash: Optional[str],
|
|
167
|
+
config_hash: Optional[str],
|
|
168
|
+
function_hash: Optional[str],
|
|
169
|
+
) -> Optional[str]:
|
|
170
|
+
if not input_hash or not config_hash:
|
|
171
|
+
return None
|
|
172
|
+
fhash = function_hash or "none"
|
|
173
|
+
key = f"{self.prefix}:process:{process_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
174
|
+
rec = self._json_get(key)
|
|
175
|
+
if not rec:
|
|
176
|
+
return None
|
|
177
|
+
if rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
|
|
178
|
+
return rec["cache_path"]
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
def get_process_cache_record(
|
|
182
|
+
self,
|
|
183
|
+
process_name: str,
|
|
184
|
+
input_hash: Optional[str],
|
|
185
|
+
config_hash: Optional[str],
|
|
186
|
+
function_hash: Optional[str],
|
|
187
|
+
) -> Optional[Dict[str, Any]]:
|
|
188
|
+
if not input_hash or not config_hash:
|
|
189
|
+
return None
|
|
190
|
+
fhash = function_hash or "none"
|
|
191
|
+
key = f"{self.prefix}:process:{process_name}:idx:{input_hash}:{config_hash}:{fhash}"
|
|
192
|
+
return self._json_get(key)
|
|
193
|
+
|
|
194
|
+
def get_process_cache_paths_batch(
|
|
195
|
+
self,
|
|
196
|
+
lookups: list[tuple[str, Optional[str], Optional[str], Optional[str]]],
|
|
197
|
+
) -> dict[str, Optional[str]]:
|
|
198
|
+
"""Batched fetch of process cache paths using MGET where possible.
|
|
199
|
+
|
|
200
|
+
Input tuples: (process_name, input_hash, config_hash, function_hash)
|
|
201
|
+
Returns mapping from composite key "process_name|ih|ch|fh" to cache_path (or None).
|
|
202
|
+
"""
|
|
203
|
+
# Build keys and index map
|
|
204
|
+
keys: list[str] = []
|
|
205
|
+
composite_keys: list[str] = []
|
|
206
|
+
for process_name, ih, ch, fh in lookups or []:
|
|
207
|
+
if not ih or not ch:
|
|
208
|
+
# Maintain position with a placeholder; will map to None
|
|
209
|
+
composite_keys.append(f"{process_name}|{ih}|{ch}|{fh or 'none'}")
|
|
210
|
+
keys.append("")
|
|
211
|
+
continue
|
|
212
|
+
fhash = fh or "none"
|
|
213
|
+
redis_key = f"{self.prefix}:process:{process_name}:idx:{ih}:{ch}:{fhash}"
|
|
214
|
+
keys.append(redis_key)
|
|
215
|
+
composite_keys.append(f"{process_name}|{ih}|{ch}|{fhash}")
|
|
216
|
+
|
|
217
|
+
result: dict[str, Optional[str]] = {}
|
|
218
|
+
if not keys:
|
|
219
|
+
return result
|
|
220
|
+
# Pipeline for efficiency
|
|
221
|
+
pipe = self.client.pipeline(transaction=False)
|
|
222
|
+
for k in keys:
|
|
223
|
+
if k:
|
|
224
|
+
pipe.get(k)
|
|
225
|
+
else:
|
|
226
|
+
# Push a None placeholder to keep ordering
|
|
227
|
+
pipe.execute_command("ECHO", "")
|
|
228
|
+
raw_vals = pipe.execute()
|
|
229
|
+
|
|
230
|
+
for comp, raw in zip(composite_keys, raw_vals):
|
|
231
|
+
try:
|
|
232
|
+
# Placeholder case from ECHO
|
|
233
|
+
if isinstance(raw, (bytes, bytearray)):
|
|
234
|
+
data = raw.decode()
|
|
235
|
+
# Empty string indicates placeholder => None
|
|
236
|
+
if data == "":
|
|
237
|
+
result[comp] = None
|
|
238
|
+
continue
|
|
239
|
+
elif raw is None:
|
|
240
|
+
result[comp] = None
|
|
241
|
+
continue
|
|
242
|
+
# Parse JSON
|
|
243
|
+
val = raw
|
|
244
|
+
if isinstance(val, (bytes, bytearray)):
|
|
245
|
+
val = val.decode("utf-8")
|
|
246
|
+
rec = json.loads(val) if isinstance(val, str) else None
|
|
247
|
+
if rec and isinstance(rec, dict) and rec.get("status") in ("completed", "cached") and rec.get("cache_path"):
|
|
248
|
+
result[comp] = rec.get("cache_path")
|
|
249
|
+
else:
|
|
250
|
+
result[comp] = None
|
|
251
|
+
except Exception:
|
|
252
|
+
result[comp] = None
|
|
253
|
+
return result
|
|
254
|
+
|
|
255
|
+
# -------------------- Pipeline status --------------------
|
|
256
|
+
def mark_pipeline_started(self, run_id: str) -> None:
|
|
257
|
+
self.client.set(f"{self.prefix}:runs:{run_id}:status", "running")
|
|
258
|
+
self._json_set(
|
|
259
|
+
f"{self.prefix}:runs:{run_id}:timestamps",
|
|
260
|
+
{"start": time.time(), "end": None},
|
|
261
|
+
)
|
|
262
|
+
self.publish_event({"type": "pipeline.started", "run_id": run_id, "status": "running"})
|
|
263
|
+
|
|
264
|
+
def mark_pipeline_completed(self, run_id: str, success: bool) -> None:
|
|
265
|
+
status = "completed" if success else "failed"
|
|
266
|
+
self.client.set(f"{self.prefix}:runs:{run_id}:status", status)
|
|
267
|
+
self._json_set(
|
|
268
|
+
f"{self.prefix}:runs:{run_id}:timestamps",
|
|
269
|
+
{"start": None, "end": time.time()},
|
|
270
|
+
)
|
|
271
|
+
self.publish_event({"type": "pipeline.completed", "run_id": run_id, "status": status})
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def get_run_status(self, run_id: str) -> Optional[str]:
|
|
275
|
+
try:
|
|
276
|
+
val = self.client.get(f"{self.prefix}:runs:{run_id}:status")
|
|
277
|
+
if val is None:
|
|
278
|
+
return None
|
|
279
|
+
if isinstance(val, (bytes, bytearray)):
|
|
280
|
+
val = val.decode()
|
|
281
|
+
return str(val).lower() if isinstance(val, str) else None
|
|
282
|
+
except Exception:
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
# -------------------- Events --------------------
|
|
286
|
+
def publish_event(self, event: Dict[str, Any]) -> None:
|
|
287
|
+
try:
|
|
288
|
+
self.client.publish(self.channel, json.dumps(event, default=str))
|
|
289
|
+
except Exception:
|
|
290
|
+
# Best-effort publishing; do not raise
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
def record_run_step(self, run_id: str, process_name: str, step_name: str, record: Dict[str, Any]) -> None:
|
|
294
|
+
key = f"{self.prefix}:runs:{run_id}:steps:{process_name}:{step_name}"
|
|
295
|
+
self._json_set(key, record)
|
|
296
|
+
|
|
297
|
+
def list_run_steps(self, run_id: str) -> Dict[str, Dict[str, Any]]:
|
|
298
|
+
pattern = f"{self.prefix}:runs:{run_id}:steps:*"
|
|
299
|
+
result: Dict[str, Dict[str, Any]] = {}
|
|
300
|
+
for key in self.client.scan_iter(match=pattern):
|
|
301
|
+
data = self._json_get(key.decode() if isinstance(key, bytes) else key)
|
|
302
|
+
if not data:
|
|
303
|
+
continue
|
|
304
|
+
# key format: ...:steps:{process}:{step}
|
|
305
|
+
parts = (key.decode() if isinstance(key, bytes) else key).split(":")
|
|
306
|
+
process = parts[-2]
|
|
307
|
+
step = parts[-1]
|
|
308
|
+
result[f"{process}.{step}"] = data
|
|
309
|
+
return result
|
|
310
|
+
|
|
311
|
+
# -------------------- Stats --------------------
|
|
312
|
+
def increment_stat(self, run_id: str, name: str, amount: int = 1) -> None:
|
|
313
|
+
self.client.hincrby(f"{self.prefix}:runs:{run_id}:stats", name, amount)
|
|
314
|
+
|
|
315
|
+
def get_pipeline_stats(self, run_id: str) -> Dict[str, Any]:
|
|
316
|
+
stats = self.client.hgetall(f"{self.prefix}:runs:{run_id}:stats")
|
|
317
|
+
parsed = { (k.decode() if isinstance(k, bytes) else k): int(v) for k, v in stats.items() } if stats else {}
|
|
318
|
+
return parsed
|
|
319
|
+
|
|
320
|
+
# -------------------- Charts index --------------------
|
|
321
|
+
def record_run_chart_artifacts(self, run_id: str, chart_name: str, artifacts: list[dict[str, Any]]) -> None:
|
|
322
|
+
key = f"{self.prefix}:runs:{run_id}:charts:{chart_name}"
|
|
323
|
+
self._json_set(key, {"items": artifacts})
|
|
324
|
+
|
|
325
|
+
def list_run_charts(self, run_id: str) -> Dict[str, Any]:
|
|
326
|
+
try:
|
|
327
|
+
import logging as _logging
|
|
328
|
+
_logging.getLogger(__name__).info(f"[RedisStore] list_run_charts(run_id={run_id})")
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
pattern = f"{self.prefix}:runs:{run_id}:charts:*"
|
|
332
|
+
result: Dict[str, Any] = {}
|
|
333
|
+
for key in self.client.scan_iter(match=pattern):
|
|
334
|
+
data = self._json_get(key.decode() if isinstance(key, bytes) else key)
|
|
335
|
+
if not data:
|
|
336
|
+
continue
|
|
337
|
+
name = (key.decode() if isinstance(key, bytes) else key).split(":")[-1]
|
|
338
|
+
items = data.get("items", [])
|
|
339
|
+
ctype = None
|
|
340
|
+
try:
|
|
341
|
+
if isinstance(items, list) and items and isinstance(items[0], dict):
|
|
342
|
+
ctype = items[0].get("chart_type")
|
|
343
|
+
except Exception:
|
|
344
|
+
ctype = None
|
|
345
|
+
result[name] = {"type": (ctype or "static"), "items": items}
|
|
346
|
+
try:
|
|
347
|
+
import logging as _logging
|
|
348
|
+
_logging.getLogger(__name__).info(f"[RedisStore] list_run_charts -> {list(result.keys())}")
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
return result
|
|
352
|
+
|
|
353
|
+
def copy_run_chart_artifacts(self, from_run_id: str, to_run_id: str, chart_name: str) -> bool:
|
|
354
|
+
"""Copy chart artifacts from one run to another.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
from_run_id: Source run ID
|
|
358
|
+
to_run_id: Destination run ID
|
|
359
|
+
chart_name: Name of the chart to copy
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
True if copy was successful, False otherwise
|
|
363
|
+
"""
|
|
364
|
+
try:
|
|
365
|
+
# Read chart artifacts from source run
|
|
366
|
+
from_key = f"{self.prefix}:runs:{from_run_id}:charts:{chart_name}"
|
|
367
|
+
from_data = self._json_get(from_key)
|
|
368
|
+
|
|
369
|
+
if not from_data:
|
|
370
|
+
try:
|
|
371
|
+
import logging as _logging
|
|
372
|
+
_logging.getLogger(__name__).info(f"[RedisStore] copy_run_chart_artifacts: chart {chart_name} not found in run {from_run_id}")
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
return False
|
|
376
|
+
|
|
377
|
+
# Write to destination run
|
|
378
|
+
to_key = f"{self.prefix}:runs:{to_run_id}:charts:{chart_name}"
|
|
379
|
+
self._json_set(to_key, from_data)
|
|
380
|
+
|
|
381
|
+
try:
|
|
382
|
+
import logging as _logging
|
|
383
|
+
_logging.getLogger(__name__).info(f"[RedisStore] copy_run_chart_artifacts: copied chart {chart_name} from {from_run_id} to {to_run_id}")
|
|
384
|
+
except Exception:
|
|
385
|
+
pass
|
|
386
|
+
|
|
387
|
+
return True
|
|
388
|
+
|
|
389
|
+
except Exception as e:
|
|
390
|
+
try:
|
|
391
|
+
import logging as _logging
|
|
392
|
+
_logging.getLogger(__name__).warning(f"[RedisStore] copy_run_chart_artifacts failed: {e}")
|
|
393
|
+
except Exception:
|
|
394
|
+
pass
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# -------------------- Probe metrics --------------------
|
|
399
|
+
def save_probe_metrics_by_path(self, run_id: str, probe_path: str, metrics: Dict[str, Any]) -> None:
|
|
400
|
+
enc = encode_probe_path(probe_path)
|
|
401
|
+
self._json_set(f"{self.prefix}:metric:{run_id}:probe_path:{enc}", metrics)
|
|
402
|
+
try:
|
|
403
|
+
self.publish_event({"type": "probe_metrics.updated", "run_id": run_id, "probe_path": probe_path, "metrics": metrics})
|
|
404
|
+
except Exception:
|
|
405
|
+
pass
|
|
406
|
+
|
|
407
|
+
def get_probe_metrics_by_path(self, run_id: str, probe_path: str) -> Dict[str, Any]:
|
|
408
|
+
enc = encode_probe_path(probe_path)
|
|
409
|
+
return self._json_get(f"{self.prefix}:metric:{run_id}:probe_path:{enc}") or {}
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# -------------------- Run listing (for UI) --------------------
|
|
413
|
+
def list_runs(self, limit: int = 100) -> list[str]:
|
|
414
|
+
"""List recent run IDs by scanning keys for this project namespace.
|
|
415
|
+
|
|
416
|
+
Note: Redis has no server-side ordering; we approximate by timestamps key presence.
|
|
417
|
+
"""
|
|
418
|
+
try:
|
|
419
|
+
pattern = f"{self.prefix}:runs:*:timestamps"
|
|
420
|
+
run_ids: list[str] = []
|
|
421
|
+
for key in self.client.scan_iter(match=pattern):
|
|
422
|
+
k = key.decode() if isinstance(key, (bytes, bytearray)) else str(key)
|
|
423
|
+
# key format: {prefix}:runs:{run_id}:timestamps
|
|
424
|
+
parts = k.split(":")
|
|
425
|
+
if len(parts) >= 5:
|
|
426
|
+
run_ids.append(parts[-2])
|
|
427
|
+
# Deduplicate and cap
|
|
428
|
+
seen = set()
|
|
429
|
+
uniq = []
|
|
430
|
+
for rid in run_ids:
|
|
431
|
+
if rid not in seen:
|
|
432
|
+
seen.add(rid)
|
|
433
|
+
uniq.append(rid)
|
|
434
|
+
return uniq[:limit]
|
|
435
|
+
except Exception:
|
|
436
|
+
return []
|
|
437
|
+
|
|
438
|
+
|
mlops/storage/factory.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Mapping, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _as_int(val: Any) -> Optional[int]:
|
|
8
|
+
if val is None or val == "":
|
|
9
|
+
return None
|
|
10
|
+
try:
|
|
11
|
+
return int(val)
|
|
12
|
+
except Exception:
|
|
13
|
+
return None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _norm_backend_type(value: Any) -> str:
|
|
17
|
+
try:
|
|
18
|
+
s = str(value or "").strip().lower()
|
|
19
|
+
except Exception:
|
|
20
|
+
return ""
|
|
21
|
+
aliases = {
|
|
22
|
+
"mem": "memory",
|
|
23
|
+
"inmem": "memory",
|
|
24
|
+
"in-memory": "memory",
|
|
25
|
+
"inmemory": "memory",
|
|
26
|
+
"firestore": "gcp",
|
|
27
|
+
}
|
|
28
|
+
return aliases.get(s, s)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resolve_relative_path(
|
|
32
|
+
raw: Any,
|
|
33
|
+
*,
|
|
34
|
+
workspace_root: Optional[Path] = None,
|
|
35
|
+
project_root: Optional[Path] = None,
|
|
36
|
+
) -> Optional[Path]:
|
|
37
|
+
if not raw:
|
|
38
|
+
return None
|
|
39
|
+
try:
|
|
40
|
+
p = Path(str(raw))
|
|
41
|
+
except Exception:
|
|
42
|
+
return None
|
|
43
|
+
if p.is_absolute():
|
|
44
|
+
return p
|
|
45
|
+
candidates: list[Path] = []
|
|
46
|
+
if project_root is not None:
|
|
47
|
+
candidates.append(project_root / p)
|
|
48
|
+
if workspace_root is not None:
|
|
49
|
+
candidates.append(workspace_root / p)
|
|
50
|
+
candidates.append(Path.cwd() / p)
|
|
51
|
+
for c in candidates:
|
|
52
|
+
try:
|
|
53
|
+
if c.exists():
|
|
54
|
+
return c.resolve()
|
|
55
|
+
except Exception:
|
|
56
|
+
continue
|
|
57
|
+
# Fall back to the most likely base for debugging purposes.
|
|
58
|
+
return (candidates[0] if candidates else p)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _maybe_apply_gcp_env(backend_cfg: dict[str, Any], *, workspace_root: Optional[Path], project_root: Optional[Path]) -> None:
|
|
62
|
+
"""Best-effort: export GCP env vars from backend config if present.
|
|
63
|
+
|
|
64
|
+
This mirrors existing behavior across the codebase and ensures that Firestore/GCS
|
|
65
|
+
SDKs can locate credentials when chart subprocesses or web server runs separately.
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
import os
|
|
69
|
+
|
|
70
|
+
creds_rel = backend_cfg.get("credentials_json")
|
|
71
|
+
if creds_rel and not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
|
|
72
|
+
p = _resolve_relative_path(creds_rel, workspace_root=workspace_root, project_root=project_root)
|
|
73
|
+
if p is not None:
|
|
74
|
+
os.environ.setdefault("GOOGLE_APPLICATION_CREDENTIALS", str(p))
|
|
75
|
+
|
|
76
|
+
gcp_project = backend_cfg.get("gcp_project") or os.environ.get("GOOGLE_CLOUD_PROJECT")
|
|
77
|
+
if gcp_project and not os.environ.get("GOOGLE_CLOUD_PROJECT"):
|
|
78
|
+
os.environ.setdefault("GOOGLE_CLOUD_PROJECT", str(gcp_project))
|
|
79
|
+
|
|
80
|
+
emulator_host = backend_cfg.get("emulator_host")
|
|
81
|
+
if emulator_host and not os.environ.get("FIRESTORE_EMULATOR_HOST"):
|
|
82
|
+
os.environ.setdefault("FIRESTORE_EMULATOR_HOST", str(emulator_host))
|
|
83
|
+
except Exception:
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def create_kv_store(
|
|
88
|
+
project_id: str,
|
|
89
|
+
backend_cfg: Optional[dict[str, Any]] = None,
|
|
90
|
+
*,
|
|
91
|
+
env: Optional[Mapping[str, str]] = None,
|
|
92
|
+
workspace_root: Optional[Path] = None,
|
|
93
|
+
project_root: Optional[Path] = None,
|
|
94
|
+
) -> Any:
|
|
95
|
+
"""Create a KV store instance (Redis, GCP Firestore/PubSub, or in-memory).
|
|
96
|
+
|
|
97
|
+
Precedence:
|
|
98
|
+
- `MLOPS_KV_BACKEND` env override (if set)
|
|
99
|
+
- `backend_cfg['type']` from config
|
|
100
|
+
- safe fallback: in-memory
|
|
101
|
+
"""
|
|
102
|
+
from mlops.storage.adapters.memory_store import InMemoryStore
|
|
103
|
+
|
|
104
|
+
cfg = backend_cfg if isinstance(backend_cfg, dict) else {}
|
|
105
|
+
|
|
106
|
+
def _env_get(key: str) -> Optional[str]:
|
|
107
|
+
try:
|
|
108
|
+
if env is not None:
|
|
109
|
+
v = env.get(key)
|
|
110
|
+
return str(v) if v is not None else None
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
try:
|
|
114
|
+
import os
|
|
115
|
+
|
|
116
|
+
return os.environ.get(key)
|
|
117
|
+
except Exception:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
backend_type = _norm_backend_type(_env_get("MLOPS_KV_BACKEND") or cfg.get("type") or "")
|
|
121
|
+
if not backend_type:
|
|
122
|
+
backend_type = "memory"
|
|
123
|
+
|
|
124
|
+
if backend_type == "memory":
|
|
125
|
+
return InMemoryStore(project_id)
|
|
126
|
+
|
|
127
|
+
if backend_type == "redis":
|
|
128
|
+
try:
|
|
129
|
+
from mlops.storage.adapters.redis_store import RedisStore
|
|
130
|
+
|
|
131
|
+
host = _env_get("MLOPS_REDIS_HOST") or cfg.get("host")
|
|
132
|
+
port = _as_int(_env_get("MLOPS_REDIS_PORT") or cfg.get("port"))
|
|
133
|
+
db = _as_int(_env_get("MLOPS_REDIS_DB") or cfg.get("db"))
|
|
134
|
+
password = _env_get("MLOPS_REDIS_PASSWORD") or cfg.get("password")
|
|
135
|
+
return RedisStore(project_id=project_id, host=host, port=port, db=db, password=password)
|
|
136
|
+
except Exception:
|
|
137
|
+
return InMemoryStore(project_id)
|
|
138
|
+
|
|
139
|
+
if backend_type == "gcp":
|
|
140
|
+
try:
|
|
141
|
+
from mlops.storage.adapters.gcp_kv_store import GCPStore
|
|
142
|
+
|
|
143
|
+
_maybe_apply_gcp_env(cfg, workspace_root=workspace_root, project_root=project_root)
|
|
144
|
+
gcp_project = cfg.get("gcp_project") or _env_get("GOOGLE_CLOUD_PROJECT")
|
|
145
|
+
emulator_host = cfg.get("emulator_host") or _env_get("FIRESTORE_EMULATOR_HOST")
|
|
146
|
+
topic_name = cfg.get("topic_name")
|
|
147
|
+
return GCPStore(project_id=project_id, gcp_project=gcp_project, topic_name=topic_name, emulator_host=emulator_host)
|
|
148
|
+
except Exception:
|
|
149
|
+
return InMemoryStore(project_id)
|
|
150
|
+
|
|
151
|
+
# Unknown type: fall back safely.
|
|
152
|
+
return InMemoryStore(project_id)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def create_object_store(
|
|
156
|
+
cache_cfg: Optional[dict[str, Any]] = None,
|
|
157
|
+
*,
|
|
158
|
+
env: Optional[Mapping[str, str]] = None,
|
|
159
|
+
) -> Any:
|
|
160
|
+
"""Create an object store instance for cache artifacts (currently GCS only)."""
|
|
161
|
+
cfg = cache_cfg if isinstance(cache_cfg, dict) else {}
|
|
162
|
+
store_cfg = cfg.get("object_store") if isinstance(cfg.get("object_store"), dict) else {}
|
|
163
|
+
store_type = _norm_backend_type(store_cfg.get("type") or "")
|
|
164
|
+
|
|
165
|
+
def _env_get(key: str) -> Optional[str]:
|
|
166
|
+
try:
|
|
167
|
+
if env is not None:
|
|
168
|
+
v = env.get(key)
|
|
169
|
+
return str(v) if v is not None else None
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
172
|
+
try:
|
|
173
|
+
import os
|
|
174
|
+
|
|
175
|
+
return os.environ.get(key)
|
|
176
|
+
except Exception:
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
if store_type == "gcs":
|
|
180
|
+
bucket = store_cfg.get("bucket") or _env_get("MLOPS_GCS_BUCKET")
|
|
181
|
+
prefix = store_cfg.get("prefix") or _env_get("MLOPS_GCS_PREFIX")
|
|
182
|
+
if not bucket:
|
|
183
|
+
return None
|
|
184
|
+
try:
|
|
185
|
+
from mlops.storage.adapters.gcs_object_store import GCSObjectStore
|
|
186
|
+
|
|
187
|
+
return GCSObjectStore(bucket=str(bucket), prefix=str(prefix) if prefix else None)
|
|
188
|
+
except Exception:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
__all__ = [
|
|
195
|
+
"create_kv_store",
|
|
196
|
+
"create_object_store",
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
|